summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2016-02-04 14:31:07 +0100
committerHeinz Mauelshagen <heinzm@redhat.com>2016-02-04 14:31:07 +0100
commit271561fea8fca807099ca0d846521ea1a62ffd0a (patch)
tree94ec158ed969d0ab02dca9c43b04ecd517a208d1
parent1263c8b27a66548002044332734d7bf982c046a8 (diff)
parent6a182bf33878542157430c6ffb32bebbeb255d70 (diff)
downloadlvm2-dev-lvmguy-raid-takeover-reshape-resize.tar.gz
Merge branch 'dev-lvmguy-raid-takeover-reshape-resize_work' into dev-lvmguy-raid-takeover-reshape-resizedev-lvmguy-raid-takeover-reshape-resize
-rw-r--r--.gitignore11
-rw-r--r--Makefile.in59
-rw-r--r--README2
-rw-r--r--VERSION2
-rw-r--r--VERSION_DM2
-rw-r--r--WHATS_NEW262
-rw-r--r--WHATS_NEW_DM112
-rw-r--r--acinclude.m44
-rw-r--r--aclocal.m44
-rw-r--r--conf/.gitignore4
-rw-r--r--conf/Makefile.in25
-rw-r--r--conf/cache-mq.profile20
-rw-r--r--conf/cache-smq.profile14
-rw-r--r--conf/command_profile_template.profile.in1
-rw-r--r--conf/example.conf.base23
-rw-r--r--conf/example.conf.in2910
-rw-r--r--conf/lvmlocal.conf.base19
-rw-r--r--conf/lvmlocal.conf.in57
-rw-r--r--conf/metadata_profile_template.profile.in2
-rwxr-xr-xconfigure1790
-rw-r--r--configure.in374
-rw-r--r--daemons/Makefile.in14
-rw-r--r--daemons/clvmd/.gitignore1
-rw-r--r--daemons/clvmd/Makefile.in4
-rw-r--r--daemons/clvmd/clvmd-command.c4
-rw-r--r--daemons/clvmd/clvmd-common.h9
-rw-r--r--daemons/clvmd/clvmd-openais.c2
-rw-r--r--daemons/clvmd/clvmd-singlenode.c4
-rw-r--r--daemons/clvmd/clvmd.c70
-rw-r--r--daemons/clvmd/lvm-functions.c4
-rw-r--r--daemons/cmirrord/.gitignore1
-rw-r--r--daemons/cmirrord/clogd.c62
-rw-r--r--daemons/cmirrord/cluster.c23
-rw-r--r--daemons/cmirrord/functions.c5
-rw-r--r--daemons/dmeventd/.gitignore1
-rw-r--r--daemons/dmeventd/dmeventd.c110
-rw-r--r--daemons/dmeventd/libdevmapper-event.c5
-rw-r--r--daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c3
-rw-r--r--daemons/dmeventd/plugins/mirror/dmeventd_mirror.c13
-rw-r--r--daemons/lvmetad/.gitignore2
-rw-r--r--daemons/lvmetad/Makefile.in9
-rw-r--r--daemons/lvmetad/lvmetactl.c183
-rw-r--r--daemons/lvmetad/lvmetad-core.c605
-rw-r--r--daemons/lvmetad/testclient.c17
-rw-r--r--daemons/lvmlockd/.gitignore2
-rw-r--r--daemons/lvmlockd/Makefile.in66
-rw-r--r--daemons/lvmlockd/lvmlockctl.c751
-rw-r--r--daemons/lvmlockd/lvmlockd-client.h51
-rw-r--r--daemons/lvmlockd/lvmlockd-core.c5896
-rw-r--r--daemons/lvmlockd/lvmlockd-dlm.c662
-rw-r--r--daemons/lvmlockd/lvmlockd-internal.h578
-rw-r--r--daemons/lvmlockd/lvmlockd-sanlock.c1800
-rw-r--r--daemons/lvmpolld/.gitignore1
-rw-r--r--daemons/lvmpolld/Makefile.in48
-rw-r--r--daemons/lvmpolld/lvmpolld-cmd-utils.c144
-rw-r--r--daemons/lvmpolld/lvmpolld-cmd-utils.h25
-rw-r--r--daemons/lvmpolld/lvmpolld-common.h (renamed from lib/misc/timestamp.h)28
-rw-r--r--daemons/lvmpolld/lvmpolld-core.c989
-rw-r--r--daemons/lvmpolld/lvmpolld-data-utils.c391
-rw-r--r--daemons/lvmpolld/lvmpolld-data-utils.h215
-rw-r--r--daemons/lvmpolld/lvmpolld-protocol.h52
-rw-r--r--daemons/lvmpolld/polling_ops.h25
-rw-r--r--doc/caching_foreign_vgs.txt86
-rw-r--r--doc/kernel/cache-policies.txt38
-rw-r--r--doc/kernel/cache.txt129
-rw-r--r--doc/kernel/crypt.txt28
-rw-r--r--doc/kernel/era.txt108
-rw-r--r--doc/kernel/log-writes.txt140
-rw-r--r--doc/kernel/raid.txt2
-rw-r--r--doc/kernel/statistics.txt186
-rw-r--r--doc/kernel/switch.txt138
-rw-r--r--doc/kernel/thin-provisioning.txt62
-rw-r--r--doc/kernel/verity.txt21
-rw-r--r--doc/lvmetad_design.txt11
-rw-r--r--doc/lvmpolld_overview.txt81
-rw-r--r--include/.symlinks.in8
-rw-r--r--include/Makefile.in12
-rw-r--r--lib/Makefile.in20
-rw-r--r--lib/activate/activate.c104
-rw-r--r--lib/activate/activate.h30
-rw-r--r--lib/activate/dev_manager.c266
-rw-r--r--lib/cache/lvmcache.c484
-rw-r--r--lib/cache/lvmcache.h37
-rw-r--r--lib/cache/lvmetad.c708
-rw-r--r--lib/cache/lvmetad.h30
-rw-r--r--lib/cache_segtype/cache.c130
-rw-r--r--lib/commands/toolcontext.c463
-rw-r--r--lib/commands/toolcontext.h152
-rw-r--r--lib/config/config.c543
-rw-r--r--lib/config/config.h109
-rw-r--r--lib/config/config_settings.h1952
-rw-r--r--lib/config/defaults.h53
-rw-r--r--lib/datastruct/str_list.c15
-rw-r--r--lib/datastruct/str_list.h1
-rw-r--r--lib/device/dev-cache.c42
-rw-r--r--lib/device/dev-ext-udev-constants.h52
-rw-r--r--lib/device/dev-ext.c164
-rw-r--r--lib/device/dev-io.c25
-rw-r--r--lib/device/dev-md.c48
-rw-r--r--lib/device/dev-type.c169
-rw-r--r--lib/device/dev-type.h3
-rw-r--r--lib/device/device.h30
-rw-r--r--lib/display/display.c216
-rw-r--r--lib/display/display.h3
-rw-r--r--lib/filters/filter-composite.c15
-rw-r--r--lib/filters/filter-fwraid.c123
-rw-r--r--lib/filters/filter-md.c3
-rw-r--r--lib/filters/filter-mpath.c49
-rw-r--r--lib/filters/filter-partitioned.c34
-rw-r--r--lib/filters/filter-persistent.c10
-rw-r--r--lib/filters/filter-usable.c110
-rw-r--r--lib/filters/filter.h4
-rw-r--r--lib/format1/disk-rep.h2
-rw-r--r--lib/format1/format1.c10
-rw-r--r--lib/format1/import-export.c42
-rw-r--r--lib/format1/import-extents.c4
-rw-r--r--lib/format_pool/format_pool.c2
-rw-r--r--lib/format_pool/import_export.c4
-rw-r--r--lib/format_text/archiver.c2
-rw-r--r--lib/format_text/export.c126
-rw-r--r--lib/format_text/flags.c9
-rw-r--r--lib/format_text/format-text.c149
-rw-r--r--lib/format_text/import-export.h31
-rw-r--r--lib/format_text/import.c96
-rw-r--r--lib/format_text/import_vsn1.c223
-rw-r--r--lib/format_text/layout.h10
-rw-r--r--lib/format_text/tags.c82
-rw-r--r--lib/format_text/text_label.c29
-rw-r--r--lib/label/label.c25
-rw-r--r--lib/locking/locking.c5
-rw-r--r--lib/locking/locking.h7
-rw-r--r--lib/locking/lvmlockd.c2604
-rw-r--r--lib/locking/lvmlockd.h246
-rw-r--r--lib/log/log.c111
-rw-r--r--lib/log/log.h6
-rw-r--r--lib/log/lvm-logging.h1
-rw-r--r--lib/lvmpolld/lvmpolld-client.c356
-rw-r--r--lib/lvmpolld/lvmpolld-client.h52
-rw-r--r--lib/lvmpolld/polldaemon.h (renamed from tools/polldaemon.h)27
-rw-r--r--lib/metadata/cache_manip.c173
-rw-r--r--lib/metadata/lv.c111
-rw-r--r--lib/metadata/lv.h8
-rw-r--r--lib/metadata/lv_alloc.h5
-rw-r--r--lib/metadata/lv_manip.c1840
-rw-r--r--lib/metadata/merge.c71
-rw-r--r--lib/metadata/metadata-exported.h140
-rw-r--r--lib/metadata/metadata.c800
-rw-r--r--lib/metadata/metadata.h18
-rw-r--r--lib/metadata/mirror.c120
-rw-r--r--lib/metadata/pool_manip.c4
-rw-r--r--lib/metadata/pv_manip.c61
-rw-r--r--lib/metadata/raid_manip.c11467
-rw-r--r--lib/metadata/replicator_manip.c6
-rw-r--r--lib/metadata/segtype.c17
-rw-r--r--lib/metadata/segtype.h123
-rw-r--r--lib/metadata/thin_manip.c138
-rw-r--r--lib/metadata/vg.c72
-rw-r--r--lib/metadata/vg.h35
-rw-r--r--lib/mirror/mirrored.c15
-rw-r--r--lib/misc/.gitignore1
-rw-r--r--lib/misc/configure.h.in131
-rw-r--r--lib/misc/lib.h28
-rw-r--r--lib/misc/lvm-exec.c14
-rw-r--r--lib/misc/lvm-file.c10
-rw-r--r--lib/misc/lvm-file.h11
-rw-r--r--lib/misc/lvm-globals.c29
-rw-r--r--lib/misc/lvm-globals.h5
-rw-r--r--lib/misc/lvm-signal.c2
-rw-r--r--lib/misc/lvm-string.c37
-rw-r--r--lib/misc/lvm-string.h4
-rw-r--r--lib/misc/timestamp.c129
-rw-r--r--lib/misc/util.h39
-rw-r--r--lib/mm/memlock.c15
-rw-r--r--lib/mm/xlate.h82
-rw-r--r--lib/properties/prop_common.h10
-rw-r--r--lib/raid/raid.c112
-rw-r--r--lib/replicator/replicator.c2
-rw-r--r--lib/report/columns.h53
-rw-r--r--lib/report/properties.c54
-rw-r--r--lib/report/report.c1618
-rw-r--r--lib/report/report.h57
-rw-r--r--lib/report/values.h28
-rw-r--r--lib/thin/thin.c28
-rw-r--r--lib/uuid/uuid.h1
-rw-r--r--libdaemon/Makefile.in7
-rw-r--r--libdaemon/client/config-util.c78
-rw-r--r--libdaemon/client/config-util.h10
-rw-r--r--libdaemon/client/daemon-client.c24
-rw-r--r--libdaemon/client/daemon-client.h1
-rw-r--r--libdaemon/client/daemon-io.c29
-rw-r--r--libdaemon/client/daemon-io.h6
-rw-r--r--libdaemon/server/daemon-log.c23
-rw-r--r--libdaemon/server/daemon-server.c79
-rw-r--r--libdaemon/server/daemon-server.h20
-rw-r--r--libdm/.exported_symbols11
-rw-r--r--libdm/.exported_symbols.Base287
-rw-r--r--libdm/.exported_symbols.DM_1_02_1002
-rw-r--r--libdm/.exported_symbols.DM_1_02_1012
-rw-r--r--libdm/.exported_symbols.DM_1_02_1037
-rw-r--r--libdm/.exported_symbols.DM_1_02_10477
-rw-r--r--libdm/.exported_symbols.DM_1_02_1054
-rw-r--r--libdm/.exported_symbols.DM_1_02_1061
-rw-r--r--libdm/.exported_symbols.DM_1_02_971
-rw-r--r--libdm/.exported_symbols.DM_1_02_981
-rw-r--r--libdm/.exported_symbols.DM_1_02_991
-rw-r--r--libdm/Makefile.in17
-rw-r--r--libdm/ioctl/libdm-iface.c132
-rw-r--r--libdm/ioctl/libdm-targets.h3
-rw-r--r--libdm/libdevmapper.h966
-rw-r--r--libdm/libdevmapper.pc.in2
-rw-r--r--libdm/libdm-common.c69
-rw-r--r--libdm/libdm-config.c54
-rw-r--r--libdm/libdm-deptree.c265
-rw-r--r--libdm/libdm-file.c4
-rw-r--r--libdm/libdm-report.c1391
-rw-r--r--libdm/libdm-stats.c1390
-rw-r--r--libdm/libdm-string.c157
-rw-r--r--libdm/libdm-timestamp.c178
-rw-r--r--libdm/misc/dm-ioctl.h11
-rw-r--r--libdm/mm/dbg_malloc.c95
-rw-r--r--liblvm/Makefile.in2
-rw-r--r--liblvm/lvm2app.h4
-rw-r--r--liblvm/lvm_base.c3
-rw-r--r--liblvm/lvm_misc.c1
-rw-r--r--liblvm/lvm_prop.c2
-rw-r--r--liblvm/lvm_pv.c9
-rw-r--r--liblvm/lvm_vg.c2
-rw-r--r--make.tmpl.in157
-rw-r--r--man/Makefile.in25
-rw-r--r--man/blkdeactivate.8.in4
-rw-r--r--man/clvmd.8.in2
-rw-r--r--man/cmirrord.8.in8
-rw-r--r--man/dmsetup.8.in23
-rw-r--r--man/dmstats.8.in715
-rw-r--r--man/lvchange.8.in58
-rw-r--r--man/lvconvert.8.in21
-rw-r--r--man/lvcreate.8.in33
-rw-r--r--man/lvdisplay.8.in8
-rw-r--r--man/lvm-config.8.in1
-rw-r--r--man/lvm-dumpconfig.8.in150
-rw-r--r--man/lvm-lvpoll.8.in89
-rw-r--r--man/lvm.8.in93
-rw-r--r--man/lvm.conf.5.in546
-rw-r--r--man/lvmcache.7.in39
-rw-r--r--man/lvmconf.8.in29
-rw-r--r--man/lvmconfig.8.in211
-rw-r--r--man/lvmdump.8.in9
-rw-r--r--man/lvmetad.8.in63
-rw-r--r--man/lvmlockd.8.in781
-rw-r--r--man/lvmpolld.8.in90
-rw-r--r--man/lvmsystemid.7.in352
-rw-r--r--man/lvmthin.7.in328
-rw-r--r--man/lvremove.8.in3
-rw-r--r--man/lvs.8.in1
-rw-r--r--man/pvchange.8.in2
-rw-r--r--man/pvdisplay.8.in8
-rw-r--r--man/pvscan.8.in142
-rw-r--r--man/vgchange.8.in99
-rw-r--r--man/vgcreate.8.in21
-rw-r--r--man/vgdisplay.8.in8
-rw-r--r--man/vgexport.8.in8
-rw-r--r--man/vgimport.8.in8
-rw-r--r--man/vgremove.8.in3
-rw-r--r--man/vgs.8.in2
-rw-r--r--nix/default.nix440
-rw-r--r--po/Makefile.in2
-rw-r--r--python/.gitignore2
-rw-r--r--python/example.py18
-rw-r--r--python/liblvm.c145
-rw-r--r--scripts/.gitignore15
-rw-r--r--scripts/Makefile.in27
-rw-r--r--scripts/blk_availability_systemd_red_hat.service.in2
-rw-r--r--scripts/blkdeactivate.sh.in24
-rw-r--r--scripts/clvmd_init_red_hat.in13
-rw-r--r--scripts/lvm2_activation_generator_systemd_red_hat.c39
-rw-r--r--scripts/lvm2_cluster_activation_red_hat.sh.in10
-rw-r--r--scripts/lvm2_lvmlockd_systemd_red_hat.service.in16
-rw-r--r--scripts/lvm2_lvmlocking_systemd_red_hat.service.in24
-rw-r--r--scripts/lvm2_lvmpolld_init_red_hat.in114
-rw-r--r--scripts/lvm2_lvmpolld_systemd_red_hat.service.in17
-rw-r--r--scripts/lvm2_lvmpolld_systemd_red_hat.socket.in12
-rw-r--r--scripts/lvm2_monitoring_init_red_hat.in17
-rw-r--r--scripts/lvm2_monitoring_systemd_red_hat.service.in2
-rw-r--r--scripts/lvm2_pvscan_systemd_red_hat@.service.in2
-rw-r--r--scripts/lvmconf.sh177
-rwxr-xr-xscripts/lvmdump.sh24
-rwxr-xr-xscripts/vgimportclone.sh6
-rw-r--r--spec/build.inc14
-rw-r--r--spec/packages.inc109
-rw-r--r--spec/source.inc33
-rw-r--r--test/Makefile.in238
-rw-r--r--test/api/percent.sh8
-rw-r--r--test/api/pytest.sh80
-rwxr-xr-xtest/api/python_lvm_unit.py251
-rw-r--r--test/api/thin_percent.sh2
-rw-r--r--test/lib/aux.sh727
-rw-r--r--test/lib/brick-shelltest.h1292
-rw-r--r--test/lib/check.sh17
-rw-r--r--test/lib/flavour-ndev-cluster-lvmpolld.sh2
-rw-r--r--test/lib/flavour-ndev-cluster.sh1
-rw-r--r--test/lib/flavour-ndev-lvmetad-lvmpolld.sh3
-rw-r--r--test/lib/flavour-ndev-lvmetad.sh2
-rw-r--r--test/lib/flavour-ndev-lvmpolld.sh2
-rw-r--r--test/lib/flavour-ndev-vanilla.sh1
-rw-r--r--test/lib/flavour-udev-cluster-lvmpolld.sh3
-rw-r--r--test/lib/flavour-udev-cluster.sh2
-rw-r--r--test/lib/flavour-udev-lvmetad-lvmpolld.sh4
-rw-r--r--test/lib/flavour-udev-lvmetad.sh3
-rw-r--r--test/lib/flavour-udev-lvmlockd-dlm.sh6
-rw-r--r--test/lib/flavour-udev-lvmlockd-sanlock.sh6
-rw-r--r--test/lib/flavour-udev-lvmpolld.sh3
-rw-r--r--test/lib/flavour-udev-vanilla.sh2
-rw-r--r--test/lib/get.sh4
-rw-r--r--test/lib/inittest.sh68
-rw-r--r--test/lib/lvm-wrapper.sh34
-rw-r--r--test/lib/not.c35
-rw-r--r--test/lib/runner.cpp40
-rw-r--r--test/lib/test-corosync-conf19
-rw-r--r--test/lib/test-dlm-conf4
-rw-r--r--test/lib/test-sanlock-conf2
-rw-r--r--test/lib/utils.sh99
-rw-r--r--test/shell/000-basic.sh4
-rw-r--r--test/shell/activate-minor.sh1
-rw-r--r--test/shell/activate-missing-segment.sh2
-rw-r--r--test/shell/activate-missing.sh2
-rw-r--r--test/shell/activate-partial.sh2
-rw-r--r--test/shell/activation-skip.sh2
-rw-r--r--test/shell/clvmd-restart.sh36
-rw-r--r--test/shell/covercmd.sh2
-rw-r--r--test/shell/discards-thin.sh4
-rw-r--r--test/shell/dlm-hello-world.sh27
-rw-r--r--test/shell/dlm-prepare.sh90
-rw-r--r--test/shell/dlm-remove.sh20
-rw-r--r--test/shell/dmeventd-restart.sh9
-rw-r--r--test/shell/dumpconfig.sh2
-rw-r--r--test/shell/error-usage.sh2
-rw-r--r--test/shell/fsadm.sh2
-rw-r--r--test/shell/inconsistent-metadata.sh2
-rw-r--r--test/shell/listings.sh2
-rw-r--r--test/shell/lock-blocking.sh8
-rw-r--r--test/shell/lock-parallel.sh12
-rw-r--r--test/shell/lvchange-cache.sh13
-rw-r--r--test/shell/lvchange-mirror.sh2
-rw-r--r--test/shell/lvchange-partial-raid10.sh2
-rw-r--r--test/shell/lvchange-partial.sh1
-rw-r--r--test/shell/lvchange-raid.sh2
-rw-r--r--test/shell/lvchange-raid10.sh2
-rw-r--r--test/shell/lvchange-raid456.sh2
-rw-r--r--test/shell/lvchange-syncaction-raid.sh33
-rw-r--r--test/shell/lvchange-thin.sh9
-rw-r--r--test/shell/lvconvert-cache-raid.sh4
-rw-r--r--test/shell/lvconvert-cache-smq.sh32
-rw-r--r--test/shell/lvconvert-cache-thin.sh2
-rw-r--r--test/shell/lvconvert-cache.sh2
-rw-r--r--test/shell/lvconvert-mirror-basic.sh4
-rw-r--r--test/shell/lvconvert-mirror-updown.sh1
-rw-r--r--test/shell/lvconvert-mirror.sh13
-rw-r--r--test/shell/lvconvert-raid-allocation.sh2
-rw-r--r--test/shell/lvconvert-raid.sh11
-rw-r--r--test/shell/lvconvert-raid10.sh2
-rw-r--r--test/shell/lvconvert-raid456.sh2
-rw-r--r--test/shell/lvconvert-repair-dmeventd.sh2
-rw-r--r--test/shell/lvconvert-repair-policy.sh4
-rw-r--r--test/shell/lvconvert-repair-raid-dmeventd.sh2
-rw-r--r--test/shell/lvconvert-repair-raid.sh6
-rw-r--r--test/shell/lvconvert-repair-replace.sh6
-rw-r--r--test/shell/lvconvert-repair-snapshot.sh7
-rw-r--r--test/shell/lvconvert-repair-thin.sh11
-rw-r--r--test/shell/lvconvert-repair-transient-dmeventd.sh4
-rw-r--r--test/shell/lvconvert-repair-transient.sh5
-rw-r--r--test/shell/lvconvert-snapshot.sh2
-rw-r--r--test/shell/lvconvert-striped-raid0.sh75
-rw-r--r--test/shell/lvconvert-thin-external.sh6
-rw-r--r--test/shell/lvconvert-thin-raid.sh30
-rw-r--r--test/shell/lvconvert-thin.sh14
-rw-r--r--test/shell/lvcreate-cache.sh5
-rw-r--r--test/shell/lvcreate-large-raid.sh2
-rw-r--r--test/shell/lvcreate-large-raid10.sh2
-rw-r--r--test/shell/lvcreate-large.sh2
-rw-r--r--test/shell/lvcreate-mirror.sh7
-rw-r--r--test/shell/lvcreate-missing.sh2
-rw-r--r--test/shell/lvcreate-operation.sh2
-rw-r--r--test/shell/lvcreate-pvtags.sh6
-rw-r--r--test/shell/lvcreate-raid.sh57
-rw-r--r--test/shell/lvcreate-raid10.sh10
-rw-r--r--test/shell/lvcreate-repair.sh2
-rw-r--r--test/shell/lvcreate-signature-wiping.sh2
-rw-r--r--test/shell/lvcreate-small-snap.sh2
-rw-r--r--test/shell/lvcreate-striped-mirror.sh2
-rw-r--r--test/shell/lvcreate-thin-big.sh67
-rw-r--r--test/shell/lvcreate-thin-external-size.sh92
-rw-r--r--test/shell/lvcreate-thin-external.sh8
-rw-r--r--test/shell/lvcreate-thin-power2.sh4
-rw-r--r--test/shell/lvcreate-thin-snap.sh4
-rw-r--r--test/shell/lvcreate-thin.sh50
-rw-r--r--test/shell/lvcreate-usage.sh6
-rw-r--r--test/shell/lvextend-percent-extents.sh2
-rw-r--r--test/shell/lvextend-snapshot-dmeventd.sh13
-rw-r--r--test/shell/lvextend-snapshot-policy.sh2
-rw-r--r--test/shell/lvextend-thin-metadata-dmeventd.sh15
-rw-r--r--test/shell/lvextend-thin.sh4
-rw-r--r--test/shell/lvm-init.sh2
-rw-r--r--test/shell/lvmcache-exercise.sh2
-rw-r--r--test/shell/lvmetad-ambiguous.sh1
-rw-r--r--test/shell/lvmetad-client-filter.sh2
-rw-r--r--test/shell/lvmetad-disabled.sh2
-rw-r--r--test/shell/lvmetad-dump.sh2
-rw-r--r--test/shell/lvmetad-lvm1.sh2
-rw-r--r--test/shell/lvmetad-lvscan-cache.sh3
-rw-r--r--test/shell/lvmetad-no-cluster.sh2
-rw-r--r--test/shell/lvmetad-override.sh2
-rw-r--r--test/shell/lvmetad-pvs.sh2
-rw-r--r--test/shell/lvmetad-pvscan-cache.sh14
-rw-r--r--test/shell/lvmetad-pvscan-filter.sh5
-rw-r--r--test/shell/lvmetad-pvscan-md.sh64
-rw-r--r--test/shell/lvmetad-pvscan-nomda-bg.sh57
-rw-r--r--test/shell/lvmetad-pvscan-nomda.sh8
-rw-r--r--test/shell/lvmetad-restart.sh2
-rw-r--r--test/shell/lvmetad-test.sh2
-rw-r--r--test/shell/lvmetad-warning.sh2
-rw-r--r--test/shell/lvresize-mirror.sh2
-rw-r--r--test/shell/lvresize-raid.sh2
-rw-r--r--test/shell/lvresize-raid10.sh2
-rw-r--r--test/shell/lvresize-rounding.sh6
-rw-r--r--test/shell/lvresize-thin-external-origin.sh15
-rw-r--r--test/shell/lvresize-thin-metadata.sh4
-rw-r--r--test/shell/lvresize-usage.sh2
-rw-r--r--test/shell/lvs-cache.sh2
-rw-r--r--test/shell/mda-rollback.sh2
-rw-r--r--test/shell/mdata-strings.sh2
-rw-r--r--test/shell/metadata-balance.sh2
-rw-r--r--test/shell/metadata-dirs.sh2
-rw-r--r--test/shell/metadata.sh5
-rw-r--r--test/shell/mirror-names.sh20
-rw-r--r--test/shell/mirror-vgreduce-removemissing.sh5
-rw-r--r--test/shell/name-mangling.sh1
-rw-r--r--test/shell/nomda-missing.sh2
-rw-r--r--test/shell/nomda-restoremissing.sh2
-rw-r--r--test/shell/orphan-ondisk.sh2
-rw-r--r--test/shell/pool-labels.sh1
-rw-r--r--test/shell/process-each-duplicate-pvs.sh135
-rw-r--r--test/shell/process-each-duplicate-vgnames.sh15
-rw-r--r--test/shell/process-each-lv.sh15
-rw-r--r--test/shell/process-each-pv-nomda-all.sh3
-rw-r--r--test/shell/process-each-pv-nomda.sh2
-rw-r--r--test/shell/process-each-pv.sh240
-rw-r--r--test/shell/process-each-pvresize.sh2
-rw-r--r--test/shell/process-each-vg.sh70
-rw-r--r--test/shell/process-each-vgreduce.sh327
-rw-r--r--test/shell/profiles-thin.sh20
-rw-r--r--test/shell/profiles.sh2
-rw-r--r--test/shell/pv-duplicate-uuid.sh4
-rw-r--r--test/shell/pv-duplicate.sh16
-rw-r--r--test/shell/pv-min-size.sh2
-rw-r--r--test/shell/pv-range-overflow.sh2
-rw-r--r--test/shell/pvchange-usage.sh2
-rw-r--r--test/shell/pvcreate-bootloaderarea.sh5
-rw-r--r--test/shell/pvcreate-ff.sh2
-rw-r--r--test/shell/pvcreate-metadata0.sh2
-rw-r--r--test/shell/pvcreate-operation-md.sh143
-rw-r--r--test/shell/pvcreate-operation.sh2
-rw-r--r--test/shell/pvcreate-restore.sh37
-rw-r--r--test/shell/pvcreate-usage.sh14
-rw-r--r--test/shell/pvmove-abort-all.sh81
-rw-r--r--test/shell/pvmove-abort.sh68
-rw-r--r--test/shell/pvmove-all-segtypes.sh2
-rw-r--r--test/shell/pvmove-background.sh4
-rw-r--r--test/shell/pvmove-basic.sh11
-rw-r--r--test/shell/pvmove-restart.sh61
-rw-r--r--test/shell/pvmove-resume-1.sh259
-rw-r--r--test/shell/pvmove-resume-2.sh202
-rw-r--r--test/shell/pvmove-resume-multiseg.sh232
-rw-r--r--test/shell/pvremove-thin.sh2
-rw-r--r--test/shell/pvremove-usage.sh2
-rw-r--r--test/shell/pvremove-warnings.sh7
-rw-r--r--test/shell/read-ahead.sh2
-rw-r--r--test/shell/sanlock-hello-world.sh27
-rw-r--r--test/shell/sanlock-prepare.sh86
-rw-r--r--test/shell/sanlock-remove.sh28
-rw-r--r--test/shell/select-report.sh (renamed from test/shell/report-select.sh)170
-rw-r--r--test/shell/select-tools-thin.sh41
-rw-r--r--test/shell/select-tools.sh278
-rw-r--r--test/shell/snapshot-autoumount-dmeventd.sh19
-rw-r--r--test/shell/snapshot-cluster.sh2
-rw-r--r--test/shell/snapshot-lvm1.sh2
-rw-r--r--test/shell/snapshot-maxsize.sh4
-rw-r--r--test/shell/snapshot-reactivate.sh2
-rw-r--r--test/shell/snapshot-rename.sh2
-rw-r--r--test/shell/snapshot-usage-exa.sh41
-rw-r--r--test/shell/snapshot-usage.sh28
-rw-r--r--test/shell/stray-device-node.sh2
-rw-r--r--test/shell/system_id.sh886
-rw-r--r--test/shell/tags.sh2
-rw-r--r--test/shell/test-partition.sh7
-rw-r--r--test/shell/thin-autoumount-dmeventd.sh4
-rw-r--r--test/shell/thin-defaults.sh4
-rw-r--r--test/shell/thin-merge.sh2
-rw-r--r--test/shell/thin-overprovisioning.sh71
-rw-r--r--test/shell/thin-restore.sh4
-rw-r--r--test/shell/thin-vglock.sh2
-rw-r--r--test/shell/thin-volume-list.sh4
-rw-r--r--test/shell/topology-support.sh55
-rw-r--r--test/shell/unknown-segment.sh2
-rw-r--r--test/shell/unlost-pv.sh12
-rw-r--r--test/shell/vg-name-from-env.sh2
-rw-r--r--test/shell/vgcfgbackup-usage.sh2
-rw-r--r--test/shell/vgchange-many.sh4
-rw-r--r--test/shell/vgchange-maxlv.sh2
-rw-r--r--test/shell/vgchange-partial.sh2
-rw-r--r--test/shell/vgchange-sysinit.sh1
-rw-r--r--test/shell/vgchange-usage.sh8
-rw-r--r--test/shell/vgck.sh2
-rw-r--r--test/shell/vgcreate-many-pvs.sh64
-rw-r--r--test/shell/vgcreate-usage.sh2
-rw-r--r--test/shell/vgextend-restoremissing.sh4
-rw-r--r--test/shell/vgextend-usage.sh2
-rw-r--r--test/shell/vgimportclone.sh2
-rw-r--r--test/shell/vgmerge-operation.sh2
-rw-r--r--test/shell/vgmerge-usage.sh2
-rw-r--r--test/shell/vgreduce-usage.sh2
-rw-r--r--test/shell/vgremove-corrupt-vg.sh2
-rw-r--r--test/shell/vgrename-usage.sh2
-rw-r--r--test/shell/vgsplit-operation.sh17
-rw-r--r--test/shell/vgsplit-raid.sh2
-rw-r--r--test/shell/vgsplit-stacked.sh2
-rw-r--r--test/shell/vgsplit-thin.sh4
-rw-r--r--test/shell/vgsplit-usage.sh2
-rw-r--r--test/shell/zero-usage.sh2
-rw-r--r--tools/.gitignore1
-rw-r--r--tools/Makefile.in13
-rw-r--r--tools/args.h20
-rw-r--r--tools/commands.h238
-rw-r--r--tools/dmsetup.c2654
-rw-r--r--tools/dumpconfig.c110
-rw-r--r--tools/lvchange.c248
-rw-r--r--tools/lvconvert.c955
-rw-r--r--tools/lvconvert_poll.c185
-rw-r--r--tools/lvconvert_poll.h51
-rw-r--r--tools/lvcreate.c198
-rw-r--r--tools/lvdisplay.c5
-rw-r--r--tools/lvm-static.c1
-rw-r--r--tools/lvm.c1
-rw-r--r--tools/lvm2cmdline.h2
-rw-r--r--tools/lvmcmdlib.c3
-rw-r--r--tools/lvmcmdline.c248
-rw-r--r--tools/lvpoll.c114
-rw-r--r--tools/lvremove.c5
-rw-r--r--tools/lvrename.c6
-rw-r--r--tools/lvresize.c6
-rw-r--r--tools/lvscan.c4
-rw-r--r--tools/polldaemon.c524
-rw-r--r--tools/pvchange.c204
-rw-r--r--tools/pvcreate.c6
-rw-r--r--tools/pvdisplay.c6
-rw-r--r--tools/pvmove.c456
-rw-r--r--tools/pvmove_poll.c196
-rw-r--r--tools/pvmove_poll.h30
-rw-r--r--tools/pvremove.c19
-rw-r--r--tools/pvresize.c32
-rw-r--r--tools/pvscan.c37
-rw-r--r--tools/reporter.c498
-rw-r--r--tools/tool.h31
-rw-r--r--tools/toollib.c1212
-rw-r--r--tools/toollib.h108
-rw-r--r--tools/tools.h27
-rw-r--r--tools/vgcfgbackup.c15
-rw-r--r--tools/vgchange.c611
-rw-r--r--tools/vgck.c2
-rw-r--r--tools/vgconvert.c14
-rw-r--r--tools/vgcreate.c55
-rw-r--r--tools/vgdisplay.c6
-rw-r--r--tools/vgexport.c27
-rw-r--r--tools/vgextend.c189
-rw-r--r--tools/vgimport.c24
-rw-r--r--tools/vgmerge.c13
-rw-r--r--tools/vgmknodes.c2
-rw-r--r--tools/vgreduce.c16
-rw-r--r--tools/vgremove.c42
-rw-r--r--tools/vgrename.c24
-rw-r--r--tools/vgscan.c4
-rw-r--r--tools/vgsplit.c26
-rw-r--r--udev/.gitignore5
-rw-r--r--udev/Makefile.in2
583 files changed, 64336 insertions, 11922 deletions
diff --git a/.gitignore b/.gitignore
index 52663d4d1..19181c966 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,16 @@
*.5
+*.7
*.8
*.a
*.d
*.o
+*.orig
*.pc
*.pot
+*.rej
*.so
*.so.*
-*.swp
+*.sw*
*~
.export.sym
@@ -17,11 +20,11 @@
Makefile
make.tmpl
-configure.h
-version.h
-
/autom4te.cache/
+/autoscan.log
/config.log
/config.status
+/configure.scan
/cscope.out
+/tags
/tmp/
diff --git a/Makefile.in b/Makefile.in
index 641b83364..35e5d8f47 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,6 +15,8 @@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
SUBDIRS = conf daemons include lib libdaemon libdm man scripts tools
@@ -91,10 +93,44 @@ cscope.out:
all: cscope.out
endif
DISTCLEAN_TARGETS += cscope.out
+CLEAN_DIRS += autom4te.cache
-check check_system check_cluster check_local check_lvmetad unit: all
+check check_system check_cluster check_local check_lvmetad check_lvmpolld unit: all
$(MAKE) -C test $(@)
+conf.generate: tools
+
+# how to use parenthesis in makefiles
+leftparen:=(
+LVM_VER := $(firstword $(subst $(leftparen), ,$(LVM_VERSION)))
+VER := LVM2.$(LVM_VER)
+# release file name
+FILE_VER := $(VER).tgz
+CLEAN_TARGETS += $(FILE_VER)
+CLEAN_DIRS += $(rpmbuilddir)
+
+dist:
+ @echo "Generating $(FILE_VER)";\
+ (cd $(top_srcdir); git ls-tree -r HEAD --name-only | xargs tar --transform "s,^,$(VER)/," -c) | gzip >$(FILE_VER)
+
+rpm: dist
+ $(RM) -r $(rpmbuilddir)/SOURCES
+ $(MKDIR_P) $(rpmbuilddir)/SOURCES
+ $(LN_S) -f $(abs_top_builddir)/$(FILE_VER) $(rpmbuilddir)/SOURCES
+ $(LN_S) -f $(abs_top_srcdir)/spec/build.inc $(rpmbuilddir)/SOURCES
+ $(LN_S) -f $(abs_top_srcdir)/spec/macros.inc $(rpmbuilddir)/SOURCES
+ $(LN_S) -f $(abs_top_srcdir)/spec/packages.inc $(rpmbuilddir)/SOURCES
+ DM_VER=$$(cut -d- -f1 $(top_srcdir)/VERSION_DM);\
+ GIT_VER=$$(cd $(top_srcdir); git describe | cut -d- --output-delimiter=. -f2,3 || echo 0);\
+ sed -e "s,\(device_mapper_version\) [0-9.]*$$,\1 $$DM_VER," \
+ -e "s,^\(Version:[^0-9%]*\)[0-9.]*$$,\1 $(LVM_VER)," \
+ -e "s,^\(Release:[^0-9%]*\)[0-9.]\+,\1 $$GIT_VER," \
+ $(top_srcdir)/spec/source.inc >$(rpmbuilddir)/SOURCES/source.inc
+ rpmbuild -v --define "_topdir $(rpmbuilddir)" -ba $(top_srcdir)/spec/lvm2.spec
+
+generate: conf.generate
+ $(MAKE) -C conf generate
+
install_system_dirs:
$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_SYS_DIR)
$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_ARCHIVE_DIR)
@@ -122,8 +158,11 @@ endif
install_tmpfiles_configuration:
$(MAKE) -C scripts install_tmpfiles_configuration
-LCOV_TRACES = libdm.info lib.info tools.info \
- daemons/dmeventd.info daemons/clvmd.info
+LCOV_TRACES = libdm.info lib.info liblvm.info tools.info \
+ libdaemon/client.info libdaemon/server.info \
+ daemons/clvmd.info daemons/dmeventd.info \
+ daemons/lvmetad.info
+
CLEAN_TARGETS += $(LCOV_TRACES)
ifneq ("$(LCOV)", "")
@@ -152,7 +191,7 @@ lcov: $(LCOV_TRACES)
$(RM) -r $(LCOV_REPORTS_DIR)
$(MKDIR_P) $(LCOV_REPORTS_DIR)
for i in $(LCOV_TRACES); do \
- test -s $$i && lc="$$lc $$i"; \
+ test -s $$i -a $$(wc -w <$$i) -ge 100 && lc="$$lc $$i"; \
done; \
test -z "$$lc" || $(GENHTML) -p @abs_top_builddir@ \
-o $(LCOV_REPORTS_DIR) $$lc
@@ -184,3 +223,13 @@ memcheck: test-programs
ruby-test:
$(RUBY) report-generators/test/ts.rb
endif
+
+ifneq ($(shell which ctags),)
+.PHONY: tags
+all: tags
+tags:
+ test -z "$(shell find $(top_srcdir) -type f -name '*.[ch]' -newer tags | head -1)" || $(RM) tags
+ test -f tags || find $(top_srcdir) -maxdepth 4 -type f -name '*.[ch]' -exec ctags -a '{}' +
+
+DISTCLEAN_TARGETS += tags
+endif
diff --git a/README b/README
index 6a7e11c3e..a0e3c9e39 100644
--- a/README
+++ b/README
@@ -18,7 +18,7 @@ Mailing list for general discussion related to LVM2:
Mailing lists for LVM2 development, patches and commits:
lvm-devel@redhat.com
- Subscribe from https://www.redhat.com/mailman/listinfo/linux-lvm
+ Subscribe from https://www.redhat.com/mailman/listinfo/lvm-devel
lvm2-commits@lists.fedorahosted.org (Read-only archive of commits)
Subscribe from https://fedorahosted.org/mailman/listinfo/lvm2-commits
diff --git a/VERSION b/VERSION
index 539cce1ca..256a2998d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.02.115(2)-git (2014-11-28)
+2.02.129(2)-git (2015-08-17)
diff --git a/VERSION_DM b/VERSION_DM
index e38dae6a8..84f6bdb5e 100644
--- a/VERSION_DM
+++ b/VERSION_DM
@@ -1 +1 @@
-1.02.93-git (2014-11-28)
+1.02.106-git (2015-08-17)
diff --git a/WHATS_NEW b/WHATS_NEW
index cc11a9a30..4eb852cdd 100644
--- a/WHATS_NEW
+++ b/WHATS_NEW
@@ -1,5 +1,263 @@
-Version 2.02.115 -
-=====================================
+Version 2.02.129 -
+===================================
+ Enable usage of --cachepolicy and --cachesetting with lvconvert.
+ Don't allow to reduce size of thin-pool metadata.
+ Fix debug buffer overflows in cmirrord logging.
+ Add --foreground and --help to cmirrord.
+
+Version 2.02.128 - 17th August 2015
+===================================
+ Allocation setting cache_pool_cachemode is replaced by cache_mode.
+ Don't attempt to close config file that couldn't be opened.
+ Check for valid cache mode in validation of cache segment.
+ Change internal interface handling cache mode and policy.
+ When no cache policy specified, prefer smq (if available) over mq.
+ Add demo cache-mq and cache-smq profiles.
+ Add cmd profilable allocation/cache_policy,cache_settings,cache_mode.
+ Require cache_check 0.5.4 for use of --clear-needs-check-flag.
+ Fix lvmetad udev rules to not override SYSTEMD_WANTS, add the service instead.
+
+Version 2.02.127 - 10th August 2015
+===================================
+ Do not init filters, locking, lvmetad, lvmpolld if command doesn't use it.
+ Order fields in struct cmd_context more logically.
+ Add lock_type to lvmcache VG summary and info structs.
+ Fix regression in cache causing some PVs to bypass filters (2.02.105).
+ Make configure --enable-realtime the default now.
+ Update .gitignore and configure.in files to reflect usage of current tree.
+
+Version 2.02.126 - 24th July 2015
+=================================
+ Fix long option hyphen removal. (2.02.122)
+ Fix clvmd freeze if client disappears without first releasing its locks.
+ Fix lvconvert segfaults while performing snapshots merge.
+ Ignore errors during detection if use_blkid_wiping=1 and --force is used.
+ Recognise DM_ABORT_ON_INTERNAL_ERRORS env var override in lvm logging fn.
+ Fix alloc segfault when extending LV with fewer stripes than in first seg.
+ Fix handling of cache policy name.
+ Set cache policy before with the first lvm2 cache pool metadata commit.
+ Fix detection of thin-pool overprovisioning (2.02.124).
+ Fix lvmpolld segfaults on 32 bit architectures.
+ Add lvmlockd lock_args validation to vg_validate.
+ Fix ignored --startstopservices option if running lvmconf with systemd.
+ Hide sanlock LVs when processing LVs in VG unless named or --all used.
+
+Version 2.02.125 - 7th July 2015
+================================
+ Fix getline memory usage in lvmpolld.
+ Add support --clear-needs-check-flag for cache_check of cache pool metadata.
+ Add lvmetactl for developer use only.
+ Rename global/lock_retries to lvmlockd_retries.
+ Replace --enable-lvmlockd by --enable-lockd-sanlock and --enable-lockd-dlm.
+
+Version 2.02.124 - 3rd July 2015
+================================
+ Move sending thin pool messages from resume to suspend phase.
+ Report warning when pool is overprovisioned and not auto resized.
+ Recognize free-form date/time values for lv_time field in selection criteria.
+ Added experimental lvmlockd with configure --enable-lvmlockd.
+ Fix regression in select to match string fields if using synonyms (2.02.123).
+ Fix regression when printing more lv names via display_lvname (2.02.122).
+ Add missing error logging to unlock_vg and sync_local_dev_names callers.
+
+Version 2.02.123 - 30th June 2015
+=================================
+ Add report/time_format lvm.conf option to define time format for report.
+ Fix makefile shell compare == when building lvmetad lvmpolld (2.02.120).
+ Add --type full to lvmconfig for full configuration tree view.
+ Add undocumented environment variables to lvm man page. (2.02.119)
+ Add device synchronization point before activating a new snapshot.
+ Add --withspaces to lvmconfig to add spaces in output for better readability.
+ Add custom main function to libdaemon.
+ Use lvmetad to track out-of-date metadata discovered.
+
+Version 2.02.122 - 20th June 2015
+=================================
+ Flush stdout before printing to stderr.
+ Use pre-allocated buffer for printed LV names in display_lvname.
+ Support thins with size of external origin unaligned with thin pool chunk.
+ Allow extension of reduced thin volumes with external origins.
+ Consider snapshot and origin LV as unusable if component devices suspended.
+ Fix lvmconfig segfault on settings with undefined default value (2.02.120).
+ Add explicit 's' (shared) LV activation mode.
+ Ignore hyphens in long options names (i.e. --long-option == --longoption).
+
+Version 2.02.121 - 12th June 2015
+=================================
+ Distinguish between on-disk and lvmetad versions of text metadata.
+ Remove DL_LIBS from Makefiles for daemons that don't need them.
+ Zero errno in before strtoul call in dmsetup if tested after the call.
+ Zero errno in before strtoul call in lvmpolld.
+ Fix a segfault in pvscan --cache --background command.
+ Fix test for AREA_PV when checking for failed mirrors.
+ Do not use --sysinit in lvm2-activation{-early,-net}.service if lvmpolld used.
+ Maintain outdated PV info in lvmetad till all old metadata is gone from disk.
+ Do not fail polling when poll LV not found (already finished or removed).
+ Replace poll_get_copy_vg/lv fns with vg_read() and find_lv() in polldaemon.
+ Close all device fds only in before sleep call in polldaemon.
+ Simplify Makefile targets that generate exported symbols.
+ Move various -D settings from Makefiles to configure.h.
+
+Version 2.02.120 - 15th May 2015
+================================
+ Make various adjustments to Makefile compilation flags.
+ Add lvmpolld debug message class.
+ Add lvmpolld client mode for querying running server instance for status info.
+ Fix some libdaemon socket creation and reuse error paths.
+ Daemons (libdaemon) support exit on idle also in non-systemd environment.
+ Provide make dist and make rpm targets
+ Configure lvm.conf for use_lvmetad and use_lvmpolld.
+ Add lvpoll for cmdline communication with lvmpolld.
+ Add lvmpolld acting as a free-standing version of polldaemon.
+ Avoid repeated identical lvmetad VG lookups in commands processing all VGs.
+ Handle switches to alternative duplicate PVs efficiently with lvmetad.
+ Properly validate PV size for pvcreate --restorefile.
+ Fix check if pvcreate wiped device (2.02.117).
+ Fix storing of vgid when caching metadata (2.02.118).
+ Fix recursive lvm-config man page. (2.02.119)
+ Refactor polldaemon interfaces to poll every operation by VG/LV couple
+ Skip wait after testing in _wait_for_single_lv when polling finished
+ Return 'None' in python for empty string properties instead of crashing.
+ Distinguish signed numerical property type in reports for lvm2app library.
+ Reread raid completion status immediately when progress appears to be zero.
+ lvm2app closes locking on lvm_quit().
+ Configure detects /run or /var/run.
+ Add missing newline in clvmd --help output.
+
+Version 2.02.119 - 2nd May 2015
+===============================
+ New LVM_LOG_FILE_EPOCH, LVM_EXPECTED_EXIT_STATUS env vars. Man page to follow.
+ Remove detailed content from lvm.conf man page: use lvmconfig instead.
+ Generate complete config files with lvmconfig or 'make generate'.
+ Also display info on deprecated config with lvmconfig --withcomments.
+ Display version since which config is deprecated in lvmconfig --withversions.
+ Add --showdeprecated to lvmconfig to also display deprecated settings.
+ Hide deprecated settings in lvmconfig output for all types but current,diff.
+ Introduce support for exit on idle feature in libdaemon
+ Add --showunsupported to lvmconfig to also display unsupported settings.
+ Display unsupported settings for lvmconfig --type current,diff only by default
+ Honour lvmconfig --ignoreunsupported and --ignoreadvanced for all --type.
+ Make python bindings usable with python3 (and compatible with 2.6 & 2.7).
+ Add lvmconfig -l|--list as shortcut for lvmconfig --type list --withsummary.
+ Add lvmconfig --type list to display plain list of configuration settings.
+ Introduce lvmconfig as the preferred form of 'lvm dumpconfig'.
+ Add lv_ancestors and lv_descendants reporting fields.
+ Add --ignorelocal option to dumpconfig to ignore the local section.
+ Close connection to lvmetad after fork.
+ Make lvchange able to resume background pvmove polling again.
+ Split pvmove update metadata fn in an initial one and a subsequent one.
+ Refactor shared pvmove and lvconvert code into new _poll files.
+ Add --unconfigured option to dumpconfig to print strings unconfigured.
+ Add --withsummary option to dumpconfig to print first line - summary comment.
+ Use number of device holders to help choose between duplicate PVs.
+ Try to make lvmetad and non-lvmetad duplicate PV handling as similar as poss.
+ Issue warnings about duplicate PVs discovered by lvmetad.
+ Track alternative devices with matching PVIDs in lvmetad.
+ Check for lvm binary in blkdeactivate and skip LVM processing if not present.
+ Add --enable-halvm and --disable-halvm options to lvmconf script.
+ Add --services, --mirrorservice and --startstopservices option to lvmconf.
+ Use proper default value of global/use_lvmetad when processing lvmconf script.
+ Respect allocation/cling_tag_list during intial contiguous allocation.
+ Add A_PARTITION_BY_TAGS set when allocated areas should not share tags.
+ Make changes persist with python addTag/removeTag.
+ Set correct vgid when updating cache when writing PV metadata.
+ More efficient clvmd singlenode locking emulation.
+ Reject lvcreate -m with raid4/5/6 to avoid unexpected layout.
+ Don't skip invalidation of cached orphans if vg write lck is held (2.02.118).
+ Log relevant PV tags when using cling allocation.
+ Add str_list_add_list() to combine two lists.
+ Fix LV processing with selection to always do the selection on initial state.
+ Add internal LV_REMOVED LV status flag.
+
+Version 2.02.118 - 23rd March 2015
+==================================
+ Store metadata size + checksum in lvmcache and add struct lvmcache_vgsummary.
+ Remove inaccessible clustered PVs from 'pvs -a'.
+ Don't invalidate cached orphan information while global lock is held.
+ Avoid rescan of all devices when requested pvscan for removed device.
+ Measure configuration timestamps with nanoseconds when available.
+ Disable lvchange of major and minor of pool LVs.
+ Fix pvscan --cache to not scan and read ignored metadata areas on PVs.
+ Add After=iscsi-shutdown.service to blk-availability.service systemd unit.
+ Disallow vgconvert from changing metadata format when lvmetad is used.
+ Don't do a full read of VG when creating a new VG with an existing name.
+ Reduce amount of VG metadata parsing when looking for vgname on a PV.
+ Avoid reparsing same metadata when reading same metadata from multiple PVs.
+ Save extra device open/close when scanning device for size.
+ Fix seg_monitor field to report status also for mirrors and thick snapshots.
+ Replace LVM_WRITE with LVM_WRITE_LOCKED flags in metadata if system ID is set.
+ Remove ACCESS_NEEDS_SYSTEM_ID VG status flag. (2.02.117)
+ Enable system ID features.
+
+Version 2.02.117 - 4th March 2015
+=================================
+ Add CFG_DISABLED for new system ID config settings that must not yet be used.
+ Preserve original format type field when processing backup files.
+ Implement status action for lvm2-monitor initscript to display monitored LVs.
+ Allow lvchange -p to change kernel state only if metadata state differs.
+ Fix incorrect persistent .cache after report with label fields only (2.02.106).
+ Reinstate PV tag recognition for pvs if reporting label fields only (2.02.105).
+ Rescan devices before vgimport with lvmetad so exported VG is seen.
+ Fix hang by adjusting cluster mirror regionsize, avoiding CPG msg limit.
+ Do not crash when --cachepolicy is given without --cachesettings.
+ Add NEEDS_FOREIGN_VGS flag to vgimport so --foreign is always supplied.
+ Add --foreign to the 6 display and reporting tools and vgcfgbackup.
+ Install /etc/lvm/lvmlocal.conf template with local section for systemid.
+ Record creation_host_system_id in lvm2 metadata (never set yet).
+ Reinstate recursive config file tag section processing. (2.02.99)
+ Add 'lvm systemid' to display the current system ID (never set yet).
+ Fix configure to properly recognize --with-default-raid10-segtype option.
+ Do not refresh filters/rescan if no signature is wiped during pvcreate.
+ Enforce none external dev info for wiping during pvcreate to avoid races.
+ Add global/system_id_source and system_id_file to lvm.conf (disabled).
+ Add support for VG system_id to control host access to VGs.
+ Update vgextend to use process_each_vg.
+ Add --ignoreskippedcluster to pvchange.
+ Allow pvchange to modify several properties at once.
+ Update pvchange to use process_each_pv.
+ Fix pvs -a used with lvmetad to filter out devices unsuitable for PVs.
+ Fix selection to recognize units for ba_start, vg_free and seg_start fields.
+ Add support for -S/--select to vgexport and vgimport.
+ Add support for -S/--select to vgdisplay, lvdisplay and pvdisplay without -C.
+ Add support for -S/--select to vgremove and lvremove.
+ Add support for -S/--select to vgchange,lvchange and pvchange.
+ Add infrastructure to support selection for non-reporting tools.
+ Add LVM_COMMAND_PROFILE env var to set default command profile name to use.
+ Set CLOEXEC flag on file descriptors originating in libdaemon.
+
+Version 2.02.116 - 30th January 2015
+====================================
+ Deactivate unused thin pools activated with lvm2 pre-2.02.112 versions.
+ Check lock holding LV when lvconverting stacked raid LV in cluster.
+ Support udev external dev info for filters: PV min size, mpath, md, partition.
+ Add fw_raid_component_detection lvm.conf option to enable FW raid detection.
+ Add devices/external_device_info_source lvm.conf option ("none" by default).
+ Scan pools in for_each_sub_lv() and add for_each_sub_lv_except_pools().
+ Fix lvm2app lvm_lv_get_property return value for fields with info/status ioctl.
+ Fix lvm2app regression in lvm_lv_get_attr causing unknown values (2.02.115).
+ Set default cache_mode to writehrough when missing in metadata.
+ Preserve chunk size with repair and metadata swap of a thin pool.
+ Fix raid --splitmirror 1 functionality (2.02.112).
+ Fix tree preload to handle splitting raid images.
+ Do not support unpartitioned DASD devices.
+ Improve config validation to check if setting with string value can be empty.
+
+Version 2.02.115 - 21st January 2015
+====================================
+ Report segment types without monitoring support as undefined.
+ Support lvchange --errorwhenfull for thin pools.
+ Improve the processing and reporting of duplicate PVs.
+ Report lv_health_status and health attribute also for thin pool.
+ Add lv_when_full reporting field.
+ Add support for lvcreate --errorwhenfull y|n for thin pools.
+ Fix lvconvert --repair to honour resilience requirement for segmented RAID LV.
+ Filter out partitioned device-mapper devices as unsuitable for use as PVs.
+ Also notify lvmetad about filtered device if using pvscan --cache DevicePath.
+ Use LVM's own selection instead of awk expressions in clvmd startup scripts.
+ Do not filter out snapshot origin LVs as unusable devices for an LVM stack.
+ Fix incorrect rimage names when converting from mirror to raid1 LV (2.02.112).
+ Introduce pvremove_many to avoid excessive metadata re-reading and messages.
+ Check for cmirror availability during cluster mirror creation and activation.
Add cache_policy and cache_settings reporting fields.
Add missing recognition for --binary option with {pv,vg,lv}display -C.
Fix vgimportclone to notify lvmetad about changes done if lvmetad is used.
diff --git a/WHATS_NEW_DM b/WHATS_NEW_DM
index 02b80454f..363f7bf7e 100644
--- a/WHATS_NEW_DM
+++ b/WHATS_NEW_DM
@@ -1,5 +1,113 @@
-Version 1.02.93 -
-====================================
+Version 1.02.106 -
+===================================
+
+Version 1.02.105 - 17th August 2015
+===================================
+ Fix 'dmstats list -o all' segfault.
+ Separate dmstats statistics fields from region information fields.
+ Add interval and interval_ns fields to dmstats reports.
+ Do not include internal glibc headers in libdm-timestamp.c (1.02.104)
+ Exit immediately if no device is supplied to dmsetup wipe_table.
+ Suppress dmsetup report headings when no data is output. (1.02.104)
+ Adjust dmsetup usage/help output selection to match command invoked.
+ Fix dmsetup -o all to select correct fields in splitname report.
+ Restructure internal dmsetup argument handling across all commands.
+ Add dm_report_is_empty() to indicate there is no data awaiting output.
+ Add more arg validation for dm_tree_node_add_cache_target().
+ Add --alldevices switch to replace use of --force for stats create / delete.
+
+Version 1.02.104 - 10th August 2015
+===================================
+ Add dmstats.8 man page
+ Add dmstats --segments switch to create one region per device segment.
+ Add dmstats --regionid, --allregions to specify a single / all stats regions.
+ Add dmstats --allprograms for stats commands that filter by program ID.
+ Add dmstats --auxdata and --programid args to specify aux data and program ID.
+ Add report stats sub-command to provide repeating stats reports.
+ Add clear, delete, list, and print stats sub-commands.
+ Add create stats sub-command and --start, --length, --areas and --areasize.
+ Recognize 'dmstats' as an alias for 'dmsetup stats' when run with this name.
+ Add a 'stats' command to dmsetup to configure, manage and report stats data.
+ Add statistics fields to dmsetup -o.
+ Add libdm-stats library to allow management of device-mapper statistics.
+ Add --nosuffix to suppress dmsetup unit suffixes in report output.
+ Add --units to control dmsetup report field output units.
+ Add support to redisplay column headings for repeating column reports.
+ Fix report header and row resource leaks.
+ Report timestamps of ioctls with dmsetup -vvv.
+ Recognize report field name variants without any underscores too.
+ Add dmsetup --interval and --count to repeat reports at specified intervals.
+ Add dm_timestamp functions to libdevmapper.
+ Recognise vg/lv name format in dmsetup.
+ Move size display code to libdevmapper as dm_size_to_string.
+
+Version 1.02.103 - 24th July 2015
+=================================
+ Introduce libdevmapper wrappers for all malloc-related functions.
+
+Version 1.02.102 - 7th July 2015
+================================
+ Include tool.h for default non-library use.
+ Introduce format macros with embedded % such as FMTu64.
+
+Version 1.02.101 - 3rd July 2015
+================================
+ Add experimental support to passing messages in suspend tree.
+ Add dm_report_value_cache_{set,get} to support caching during report/select.
+ Add dm_report_reserved_handler to handle report reserved value actions.
+ Support dynamic value in select: DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE.
+ Support fuzzy names in select: DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES.
+ Thin pool trace messages show a device name and major:minor.
+
+Version 1.02.100 - 30th June 2015
+=================================
+ Add since, after, until and before time operators to be used in selection.
+ Add support for time in reports and selection: DM_REPORT_FIELD_TYPE_TIME.
+ Support report reserved value ranges: DM_REPORT_FIELD_RESERVED_VALUE_RANGE.
+ Support report reserved value names: DM_REPORT_FIELD_RESERVED_VALUE_NAMED.
+ Add DM_CONFIG_VALUE_FMT_{INT_OCTAL,STRING_NO_QUOTES} config value format flag.
+ Add DM_CONFIG_VALUE_FMT_COMMON_{ARRAY,EXTRA_SPACE} config value format flag.
+ Add dm_config_value_{get,set}_format_flags to get and set config value format.
+
+Version 1.02.99 - 20th June 2015
+================================
+ New dm_tree_node_set_thin_pool_read_only(DM_1_02_99) for read-only thin pool.
+ Enhance error message when thin-pool message fails.
+ Fix dmeventd logging to avoid threaded use of static variable.
+ Remove redundant dmeventd SIGALRM coded.
+
+Version 1.02.98 - 12th June 2015
+================================
+ Add dm_task_get_errno() to return any unexpected errno from a dm ioctl call.
+ Use copy of errno made after each dm ioctl call in case errno changes later.
+
+Version 1.02.97 - 15th May 2015
+===============================
+ New dm_task_get_info(DM_1_02_97) supports internal_suspend state.
+ New symbols are versioned and comes with versioned symbol name (DM_1_02_97).
+
+Version 1.02.96 - 2nd May 2015
+==============================
+ Fix selection to not match if using reserved value in criteria with >,<,>=,<.
+ Fix selection to not match reserved values for size fields if using >,<,>=,<.
+ Include uuid or device number in log message after ioctl failure.
+ Add DM_INTERNAL_SUSPEND_FLAG to dm-ioctl.h.
+ Install blkdeactivate script and its man page with make install_device-mapper.
+
+Version 1.02.95 - 15th March 2015
+=================================
+ Makefile regenerated.
+
+Version 1.02.94 - 4th March 2015
+================================
+ Add dm_report_object_is_selected for generalized interface for report/select.
+
+Version 1.02.93 - 21st January 2015
+===================================
+ Reduce severity of ioctl error message when dmeventd waitevent is interrupted.
+ Report 'unknown version' when incompatible version numbers were not obtained.
+ Report more info from thin pool status (out of data, metadata-ro, fail).
+ Support error_if_no_space for thin pool target.
Fix segfault while using selection with regex and unbuffered reporting.
Add dm_report_compact_fields to remove empty fields from report output.
Remove unimplemented dm_report_set_output_selection from libdevmapper.h.
diff --git a/acinclude.m4 b/acinclude.m4
index 601ae9dea..b6c9b181c 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -37,6 +37,10 @@ AC_DEFUN([AC_TRY_CCFLAG],
fi
])
+dnl AC_IF_YES([TEST-FOR-YES], [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+dnl AS_IF() abstraction, checks shell variable for 'yes'
+AC_DEFUN([AC_IF_YES], [AS_IF([test $$1 = yes], [$2], [$3])])
+
dnl AC_TRY_LDFLAGS([LDFLAGS], [VAR], [ACTION-IF-WORKS], [ACTION-IF-FAILS])
dnl check if $CC supports given ld flags
diff --git a/aclocal.m4 b/aclocal.m4
index 6f752f28b..05e0ad530 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.13.4 -*- Autoconf -*-
+# generated automatically by aclocal 1.15 -*- Autoconf -*-
-# Copyright (C) 1996-2013 Free Software Foundation, Inc.
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/conf/.gitignore b/conf/.gitignore
new file mode 100644
index 000000000..f505a4950
--- /dev/null
+++ b/conf/.gitignore
@@ -0,0 +1,4 @@
+command_profile_template.profile
+example.conf
+lvmlocal.conf
+metadata_profile_template.profile
diff --git a/conf/Makefile.in b/conf/Makefile.in
index e56b25e41..128395e3f 100644
--- a/conf/Makefile.in
+++ b/conf/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -17,24 +17,41 @@ top_builddir = @top_builddir@
CONFSRC=example.conf
CONFDEST=lvm.conf
+CONFLOCAL=lvmlocal.conf
PROFILE_TEMPLATES=command_profile_template.profile metadata_profile_template.profile
-PROFILES=$(PROFILE_TEMPLATES) $(srcdir)/thin-generic.profile $(srcdir)/thin-performance.profile
+PROFILES=$(PROFILE_TEMPLATES) \
+ $(srcdir)/cache-mq.profile \
+ $(srcdir)/cache-smq.profile \
+ $(srcdir)/thin-generic.profile \
+ $(srcdir)/thin-performance.profile
include $(top_builddir)/make.tmpl
+.PHONY: install_conf install_localconf install_profiles
+
+generate:
+ (cat $(top_srcdir)/conf/example.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --ignorelocal --withspaces) > example.conf.in
+ (cat $(top_srcdir)/conf/lvmlocal.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --withspaces local) > lvmlocal.conf.in
+
install_conf: $(CONFSRC)
@if [ ! -e $(confdir)/$(CONFDEST) ]; then \
echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST)"; \
$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST); \
fi
+install_localconf: $(CONFLOCAL)
+ @if [ ! -e $(confdir)/$(CONFLOCAL) ]; then \
+ echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFLOCAL)"; \
+ $(INSTALL_WDATA) -D $< $(confdir)/$(CONFLOCAL); \
+ fi
+
install_profiles: $(PROFILES)
$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_PROFILE_DIR)
$(INSTALL_DATA) $(PROFILES) $(DESTDIR)$(DEFAULT_PROFILE_DIR)/
-install_lvm2: install_conf install_profiles
+install_lvm2: install_conf install_localconf install_profiles
install: install_lvm2
-DISTCLEAN_TARGETS += $(CONFSRC) $(PROFILE_TEMPLATES)
+DISTCLEAN_TARGETS += $(CONFSRC) $(CONFLOCAL) $(PROFILE_TEMPLATES)
diff --git a/conf/cache-mq.profile b/conf/cache-mq.profile
new file mode 100644
index 000000000..3c90331d1
--- /dev/null
+++ b/conf/cache-mq.profile
@@ -0,0 +1,20 @@
+# Demo configuration 'mq' cache policy
+#
+# Note: This policy has been deprecated in favor of the smq policy
+# keyword "default" means, setting is left with kernel defaults.
+#
+
+allocation {
+ cache_pool_chunk_size = 64
+ cache_mode = "writethrough"
+ cache_policy = "mq"
+ cache_settings {
+ mq {
+ sequential_threshold = "default" # #nr_sequential_ios
+ random_threshold = "default" # #nr_random_ios
+ read_promote_adjustment = "default"
+ write_promote_adjustment = "default"
+ discard_promote_adjustment = "default"
+ }
+ }
+}
diff --git a/conf/cache-smq.profile b/conf/cache-smq.profile
new file mode 100644
index 000000000..c0d6266f5
--- /dev/null
+++ b/conf/cache-smq.profile
@@ -0,0 +1,14 @@
+# Demo configuration 'smq' cache policy
+#
+# The stochastic multi-queue (smq) policy addresses some of the problems
+# with the multiqueue (mq) policy and uses less memory.
+#
+
+allocation {
+ cache_pool_chunk_size = 64
+ cache_mode = "writethrough"
+ cache_policy = "smq"
+ cache_settings {
+ # currently no settins for "smq" policy
+ }
+}
diff --git a/conf/command_profile_template.profile.in b/conf/command_profile_template.profile.in
index 0c1e6f089..402e30c9c 100644
--- a/conf/command_profile_template.profile.in
+++ b/conf/command_profile_template.profile.in
@@ -18,6 +18,7 @@ global {
lvdisplay_shows_full_device_path=0
}
report {
+ compact_output=0
aligned=1
buffered=1
headings=1
diff --git a/conf/example.conf.base b/conf/example.conf.base
new file mode 100644
index 000000000..5c498998f
--- /dev/null
+++ b/conf/example.conf.base
@@ -0,0 +1,23 @@
+# This is an example configuration file for the LVM2 system.
+# It contains the default settings that would be used if there was no
+# @DEFAULT_SYS_DIR@/lvm.conf file.
+#
+# Refer to 'man lvm.conf' for further information including the file layout.
+#
+# Refer to 'man lvm.conf' for information about how settings configured in
+# this file are combined with built-in values and command line options to
+# arrive at the final values used by LVM.
+#
+# Refer to 'man lvmconfig' for information about displaying the built-in
+# and configured values used by LVM.
+#
+# If a default value is set in this file (not commented out), then a
+# new version of LVM using this file will continue using that value,
+# even if the new version of LVM changes the built-in default value.
+#
+# To put this file in a different directory and override @DEFAULT_SYS_DIR@ set
+# the environment variable LVM_SYSTEM_DIR before running the tools.
+#
+# N.B. Take care that each setting only appears once if uncommenting
+# example settings in this file.
+
diff --git a/conf/example.conf.in b/conf/example.conf.in
index 70d3e6b24..014aa1ade 100644
--- a/conf/example.conf.in
+++ b/conf/example.conf.in
@@ -4,1297 +4,1699 @@
#
# Refer to 'man lvm.conf' for further information including the file layout.
#
+# Refer to 'man lvm.conf' for information about how settings configured in
+# this file are combined with built-in values and command line options to
+# arrive at the final values used by LVM.
+#
+# Refer to 'man lvmconfig' for information about displaying the built-in
+# and configured values used by LVM.
+#
+# If a default value is set in this file (not commented out), then a
+# new version of LVM using this file will continue using that value,
+# even if the new version of LVM changes the built-in default value.
+#
# To put this file in a different directory and override @DEFAULT_SYS_DIR@ set
# the environment variable LVM_SYSTEM_DIR before running the tools.
#
# N.B. Take care that each setting only appears once if uncommenting
# example settings in this file.
-# This section allows you to set the way the configuration settings are handled.
-config {
- # If enabled, any LVM2 configuration mismatch is reported.
- # This implies checking that the configuration key is understood
- # by LVM2 and that the value of the key is of a proper type.
- # If disabled, any configuration mismatch is ignored and default
- # value is used instead without any warning (a message about the
- # configuration key not being found is issued in verbose mode only).
- checks = 1
-
- # If enabled, any configuration mismatch aborts the LVM2 process.
- abort_on_errors = 0
+# Configuration section config.
+# How LVM configuration settings are handled.
+config {
- # Directory where LVM looks for configuration profiles.
- profile_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_PROFILE_SUBDIR@"
+ # Configuration option config/checks.
+ # If enabled, any LVM configuration mismatch is reported.
+ # This implies checking that the configuration key is understood
+ # by LVM and that the value of the key is the proper type.
+ # If disabled, any configuration mismatch is ignored and the default
+ # value is used without any warning (a message about the
+ # configuration key not being found is issued in verbose mode only).
+ checks = 1
+
+ # Configuration option config/abort_on_errors.
+ # Abort the LVM process if a configuration mismatch is found.
+ abort_on_errors = 0
+
+ # Configuration option config/profile_dir.
+ # Directory where LVM looks for configuration profiles.
+ profile_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_PROFILE_SUBDIR@"
}
-# This section allows you to configure which block devices should
-# be used by the LVM system.
+# Configuration section devices.
+# How LVM uses block devices.
devices {
- # Where do you want your volume groups to appear ?
- dir = "/dev"
-
- # An array of directories that contain the device nodes you wish
- # to use with LVM2.
- scan = [ "/dev" ]
-
- # If set, the cache of block device nodes with all associated symlinks
- # will be constructed out of the existing udev database content.
- # This avoids using and opening any inapplicable non-block devices or
- # subdirectories found in the device directory. This setting is applied
- # to udev-managed device directory only, other directories will be scanned
- # fully. LVM2 needs to be compiled with udev support for this setting to
- # take effect. N.B. Any device node or symlink not managed by udev in
- # udev directory will be ignored with this setting on.
- obtain_device_list_from_udev = 1
-
- # If several entries in the scanned directories correspond to the
- # same block device and the tools need to display a name for device,
- # all the pathnames are matched against each item in the following
- # list of regular expressions in turn and the first match is used.
-
- # By default no preferred names are defined.
- # preferred_names = [ ]
-
- # Try to avoid using undescriptive /dev/dm-N names, if present.
- # preferred_names = [ "^/dev/mpath/", "^/dev/mapper/mpath", "^/dev/[hs]d" ]
-
- # In case no prefererred name matches or if preferred_names are not
- # defined at all, builtin rules are used to determine the preference.
- #
- # The first builtin rule checks path prefixes and it gives preference
- # based on this ordering (where "dev" depends on devices/dev setting):
- # /dev/mapper > /dev/disk > /dev/dm-* > /dev/block
- #
- # If the ordering above cannot be applied, the path with fewer slashes
- # gets preference then.
- #
- # If the number of slashes is the same, a symlink gets preference.
- #
- # Finally, if all the rules mentioned above are not applicable,
- # lexicographical order is used over paths and the smallest one
- # of all gets preference.
-
-
- # A filter that tells LVM2 to only use a restricted set of devices.
- # The filter consists of an array of regular expressions. These
- # expressions can be delimited by a character of your choice, and
- # prefixed with either an 'a' (for accept) or 'r' (for reject).
- # The first expression found to match a device name determines if
- # the device will be accepted or rejected (ignored). Devices that
- # don't match any patterns are accepted.
-
- # Be careful if there there are symbolic links or multiple filesystem
- # entries for the same device as each name is checked separately against
- # the list of patterns. The effect is that if the first pattern in the
- # list to match a name is an 'a' pattern for any of the names, the device
- # is accepted; otherwise if the first pattern in the list to match a name
- # is an 'r' pattern for any of the names it is rejected; otherwise it is
- # accepted.
-
- # Don't have more than one filter line active at once: only one gets used.
-
- # Run vgscan after you change this parameter to ensure that
- # the cache file gets regenerated (see below).
- # If it doesn't do what you expect, check the output of 'vgscan -vvvv'.
-
- # If lvmetad is used, then see "A note about device filtering while
- # lvmetad is used" comment that is attached to global/use_lvmetad setting.
-
- # By default we accept every block device:
- # filter = [ "a/.*/" ]
-
- # Exclude the cdrom drive
- # filter = [ "r|/dev/cdrom|" ]
-
- # When testing I like to work with just loopback devices:
- # filter = [ "a/loop/", "r/.*/" ]
-
- # Or maybe all loops and ide drives except hdc:
- # filter =[ "a|loop|", "r|/dev/hdc|", "a|/dev/ide|", "r|.*|" ]
-
- # Use anchors if you want to be really specific
- # filter = [ "a|^/dev/hda8$|", "r/.*/" ]
-
- # Since "filter" is often overridden from command line, it is not suitable
- # for system-wide device filtering (udev rules, lvmetad). To hide devices
- # from LVM-specific udev processing and/or from lvmetad, you need to set
- # global_filter. The syntax is the same as for normal "filter"
- # above. Devices that fail the global_filter are not even opened by LVM.
-
- # global_filter = []
-
- # The results of the filtering are cached on disk to avoid
- # rescanning dud devices (which can take a very long time).
- # By default this cache is stored in the @DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@ directory
- # in a file called '.cache'.
- # It is safe to delete the contents: the tools regenerate it.
- # (The old setting 'cache' is still respected if neither of
- # these new ones is present.)
- # N.B. If obtain_device_list_from_udev is set to 1 the list of
- # devices is instead obtained from udev and any existing .cache
- # file is removed.
- cache_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@"
- cache_file_prefix = ""
-
- # You can turn off writing this cache file by setting this to 0.
- write_cache_state = 1
-
- # Advanced settings.
-
- # List of pairs of additional acceptable block device types found
- # in /proc/devices with maximum (non-zero) number of partitions.
- # types = [ "fd", 16 ]
-
- # If sysfs is mounted (2.6 kernels) restrict device scanning to
- # the block devices it believes are valid.
- # 1 enables; 0 disables.
- sysfs_scan = 1
-
- # By default, LVM2 will ignore devices used as component paths
- # of device-mapper multipath devices.
- # 1 enables; 0 disables.
- multipath_component_detection = 1
-
- # By default, LVM2 will ignore devices used as components of
- # software RAID (md) devices by looking for md superblocks.
- # 1 enables; 0 disables.
- md_component_detection = 1
-
- # By default, if a PV is placed directly upon an md device, LVM2
- # will align its data blocks with the md device's stripe-width.
- # 1 enables; 0 disables.
- md_chunk_alignment = 1
-
- # Default alignment of the start of a data area in MB. If set to 0,
- # a value of 64KB will be used. Set to 1 for 1MiB, 2 for 2MiB, etc.
- # default_data_alignment = @DEFAULT_DATA_ALIGNMENT@
-
- # By default, the start of a PV's data area will be a multiple of
- # the 'minimum_io_size' or 'optimal_io_size' exposed in sysfs.
- # - minimum_io_size - the smallest request the device can perform
- # w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
- # - optimal_io_size - the device's preferred unit of receiving I/O
- # (e.g. MD's stripe width)
- # minimum_io_size is used if optimal_io_size is undefined (0).
- # If md_chunk_alignment is enabled, that detects the optimal_io_size.
- # This setting takes precedence over md_chunk_alignment.
- # 1 enables; 0 disables.
- data_alignment_detection = 1
-
- # Alignment (in KB) of start of data area when creating a new PV.
- # md_chunk_alignment and data_alignment_detection are disabled if set.
- # Set to 0 for the default alignment (see: data_alignment_default)
- # or page size, if larger.
- data_alignment = 0
-
- # By default, the start of the PV's aligned data area will be shifted by
- # the 'alignment_offset' exposed in sysfs. This offset is often 0 but
- # may be non-zero; e.g.: certain 4KB sector drives that compensate for
- # windows partitioning will have an alignment_offset of 3584 bytes
- # (sector 7 is the lowest aligned logical block, the 4KB sectors start
- # at LBA -1, and consequently sector 63 is aligned on a 4KB boundary).
- # But note that pvcreate --dataalignmentoffset will skip this detection.
- # 1 enables; 0 disables.
- data_alignment_offset_detection = 1
-
- # If, while scanning the system for PVs, LVM2 encounters a device-mapper
- # device that has its I/O suspended, it waits for it to become accessible.
- # Set this to 1 to skip such devices. This should only be needed
- # in recovery situations.
- ignore_suspended_devices = 0
-
- # ignore_lvm_mirrors: Introduced in version 2.02.104
- # This setting determines whether logical volumes of "mirror" segment
- # type are scanned for LVM labels. This affects the ability of
- # mirrors to be used as physical volumes. If 'ignore_lvm_mirrors'
- # is set to '1', it becomes impossible to create volume groups on top
- # of mirror logical volumes - i.e. to stack volume groups on mirrors.
- #
- # Allowing mirror logical volumes to be scanned (setting the value to '0')
- # can potentially cause LVM processes and I/O to the mirror to become
- # blocked. This is due to the way that the "mirror" segment type handles
- # failures. In order for the hang to manifest itself, an LVM command must
- # be run just after a failure and before the automatic LVM repair process
- # takes place OR there must be failures in multiple mirrors in the same
- # volume group at the same time with write failures occurring moments
- # before a scan of the mirror's labels.
- #
- # Note that these scanning limitations do not apply to the LVM RAID
- # types, like "raid1". The RAID segment types handle failures in a
- # different way and are not subject to possible process or I/O blocking.
- #
- # It is encouraged that users set 'ignore_lvm_mirrors' to 1 if they
- # are using the "mirror" segment type. Users that require volume group
- # stacking on mirrored logical volumes should consider using the "raid1"
- # segment type. The "raid1" segment type is not available for
- # active/active clustered volume groups.
- #
- # Set to 1 to disallow stacking and thereby avoid a possible deadlock.
- ignore_lvm_mirrors = 1
-
- # During each LVM operation errors received from each device are counted.
- # If the counter of a particular device exceeds the limit set here, no
- # further I/O is sent to that device for the remainder of the respective
- # operation. Setting the parameter to 0 disables the counters altogether.
- disable_after_error_count = 0
-
- # Allow use of pvcreate --uuid without requiring --restorefile.
- require_restorefile_with_uuid = 1
-
- # Minimum size (in KB) of block devices which can be used as PVs.
- # In a clustered environment all nodes must use the same value.
- # Any value smaller than 512KB is ignored.
-
- # Ignore devices smaller than 2MB such as floppy drives.
- pv_min_size = 2048
-
- # The original built-in setting was 512 up to and including version 2.02.84.
- # pv_min_size = 512
-
- # Issue discards to a logical volumes's underlying physical volume(s) when
- # the logical volume is no longer using the physical volumes' space (e.g.
- # lvremove, lvreduce, etc). Discards inform the storage that a region is
- # no longer in use. Storage that supports discards advertise the protocol
- # specific way discards should be issued by the kernel (TRIM, UNMAP, or
- # WRITE SAME with UNMAP bit set). Not all storage will support or benefit
- # from discards but SSDs and thinly provisioned LUNs generally do. If set
- # to 1, discards will only be issued if both the storage and kernel provide
- # support.
- # 1 enables; 0 disables.
- issue_discards = 0
+ # Configuration option devices/dir.
+ # Directory in which to create volume group device nodes.
+ # Commands also accept this as a prefix on volume group names.
+ # This configuration option is advanced.
+ dir = "/dev"
+
+ # Configuration option devices/scan.
+ # Directories containing device nodes to use with LVM.
+ # This configuration option is advanced.
+ scan = [ "/dev" ]
+
+ # Configuration option devices/obtain_device_list_from_udev.
+ # Obtain the list of available devices from udev.
+ # This avoids opening or using any inapplicable non-block
+ # devices or subdirectories found in the udev directory.
+ # Any device node or symlink not managed by udev in the udev
+ # directory is ignored. This setting applies only to the
+ # udev-managed device directory; other directories will be
+ # scanned fully. LVM needs to be compiled with udev support
+ # for this setting to apply.
+ obtain_device_list_from_udev = 1
+
+ # Configuration option devices/external_device_info_source.
+ # Select an external device information source.
+ # Some information may already be available in the system and
+ # LVM can use this information to determine the exact type
+ # or use of devices it processes. Using an existing external
+ # device information source can speed up device processing
+ # as LVM does not need to run its own native routines to acquire
+ # this information. For example, this information is used to
+ # drive LVM filtering like MD component detection, multipath
+ # component detection, partition detection and others.
+ # Possible options are: none, udev.
+ # none - No external device information source is used.
+ # udev - Reuse existing udev database records. Applicable
+ # only if LVM is compiled with udev support.
+ external_device_info_source = "none"
+
+ # Configuration option devices/preferred_names.
+ # Select which path name to display for a block device.
+ # If multiple path names exist for a block device,
+ # and LVM needs to display a name for the device,
+ # the path names are matched against each item in
+ # this list of regular expressions. The first match is used.
+ # Try to avoid using undescriptive /dev/dm-N names, if present.
+ # If no preferred name matches, or if preferred_names are not
+ # defined, built-in rules are used until one produces a preference.
+ # Rule 1 checks path prefixes and gives preference in this order:
+ # /dev/mapper, /dev/disk, /dev/dm-*, /dev/block (/dev from devices/dev)
+ # Rule 2 prefers the path with the least slashes.
+ # Rule 3 prefers a symlink.
+ # Rule 4 prefers the path with least value in lexicographical order.
+ # Example:
+ # preferred_names = [ "^/dev/mpath/", "^/dev/mapper/mpath", "^/dev/[hs]d" ]
+ # This configuration option does not have a default value defined.
+
+ # Configuration option devices/filter.
+ # Limit the block devices that are used by LVM commands.
+ # This is a list of regular expressions used to accept or
+ # reject block device path names. Each regex is delimited
+ # by a vertical bar '|' (or any character) and is preceded
+ # by 'a' to accept the path, or by 'r' to reject the path.
+ # The first regex in the list to match the path is used,
+ # producing the 'a' or 'r' result for the device.
+ # When multiple path names exist for a block device, if any
+ # path name matches an 'a' pattern before an 'r' pattern,
+ # then the device is accepted. If all the path names match
+ # an 'r' pattern first, then the device is rejected.
+ # Unmatching path names do not affect the accept or reject
+ # decision. If no path names for a device match a pattern,
+ # then the device is accepted.
+ # Be careful mixing 'a' and 'r' patterns, as the combination
+ # might produce unexpected results (test any changes.)
+ # Run vgscan after changing the filter to regenerate the cache.
+ # See the use_lvmetad comment for a special case regarding filters.
+ # Example:
+ # Accept every block device.
+ # filter = [ "a|.*/|" ]
+ # Example:
+ # Reject the cdrom drive.
+ # filter = [ "r|/dev/cdrom|" ]
+ # Example:
+ # Work with just loopback devices, e.g. for testing.
+ # filter = [ "a|loop|", "r|.*|" ]
+ # Example:
+ # Accept all loop devices and ide drives except hdc.
+ # filter = [ "a|loop|", "r|/dev/hdc|", "a|/dev/ide|", "r|.*|" ]
+ # Example:
+ # Use anchors to be very specific.
+ # filter = [ "a|^/dev/hda8$|", "r|.*/|" ]
+ # filter = [ "a|.*/|" ]
+
+ # Configuration option devices/global_filter.
+ # Limit the block devices that are used by LVM system components.
+ # Because devices/filter may be overridden from the command line,
+ # it is not suitable for system-wide device filtering, e.g. udev
+ # and lvmetad. Use global_filter to hide devices from these LVM
+ # system components. The syntax is the same as devices/filter.
+ # Devices rejected by global_filter are not opened by LVM.
+ # global_filter = [ "a|.*/|" ]
+
+ # Configuration option devices/cache_dir.
+ # Directory in which to store the device cache file.
+ # The results of filtering are cached on disk to avoid
+ # rescanning dud devices (which can take a very long time).
+ # By default this cache is stored in a file named .cache.
+ # It is safe to delete this file; the tools regenerate it.
+ # If obtain_device_list_from_udev is enabled, the list of devices
+ # is obtained from udev and any existing .cache file is removed.
+ cache_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@"
+
+ # Configuration option devices/cache_file_prefix.
+ # A prefix used before the .cache file name. See devices/cache_dir.
+ cache_file_prefix = ""
+
+ # Configuration option devices/write_cache_state.
+ # Enable/disable writing the cache file. See devices/cache_dir.
+ write_cache_state = 1
+
+ # Configuration option devices/types.
+ # List of additional acceptable block device types.
+ # These are of device type names from /proc/devices,
+ # followed by the maximum number of partitions.
+ # Example:
+ # types = [ "fd", 16 ]
+ # This configuration option is advanced.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option devices/sysfs_scan.
+ # Restrict device scanning to block devices appearing in sysfs.
+ # This is a quick way of filtering out block devices that are
+ # not present on the system. sysfs must be part of the kernel
+ # and mounted.)
+ sysfs_scan = 1
+
+ # Configuration option devices/multipath_component_detection.
+ # Ignore devices that are components of DM multipath devices.
+ multipath_component_detection = 1
+
+ # Configuration option devices/md_component_detection.
+ # Ignore devices that are components of software RAID (md) devices.
+ md_component_detection = 1
+
+ # Configuration option devices/fw_raid_component_detection.
+ # Ignore devices that are components of firmware RAID devices.
+ # LVM must use an external_device_info_source other than none
+ # for this detection to execute.
+ fw_raid_component_detection = 0
+
+ # Configuration option devices/md_chunk_alignment.
+ # Align PV data blocks with md device's stripe-width.
+ # This applies if a PV is placed directly on an md device.
+ md_chunk_alignment = 1
+
+ # Configuration option devices/default_data_alignment.
+ # Default alignment of the start of a PV data area in MB.
+ # If set to 0, a value of 64KiB will be used.
+ # Set to 1 for 1MiB, 2 for 2MiB, etc.
+ # default_data_alignment = 1
+
+ # Configuration option devices/data_alignment_detection.
+ # Detect PV data alignment based on sysfs device information.
+ # The start of a PV data area will be a multiple of
+ # minimum_io_size or optimal_io_size exposed in sysfs.
+ # minimum_io_size is the smallest request the device can perform
+ # without incurring a read-modify-write penalty, e.g. MD chunk size.
+ # optimal_io_size is the device's preferred unit of receiving I/O,
+ # e.g. MD stripe width.
+ # minimum_io_size is used if optimal_io_size is undefined (0).
+ # If md_chunk_alignment is enabled, that detects the optimal_io_size.
+ # This setting takes precedence over md_chunk_alignment.
+ data_alignment_detection = 1
+
+ # Configuration option devices/data_alignment.
+ # Alignment of the start of a PV data area in KiB.
+ # If a PV is placed directly on an md device and
+ # md_chunk_alignment or data_alignment_detection are enabled,
+ # then this setting is ignored. Otherwise, md_chunk_alignment
+ # and data_alignment_detection are disabled if this is set.
+ # Set to 0 to use the default alignment or the page size, if larger.
+ data_alignment = 0
+
+ # Configuration option devices/data_alignment_offset_detection.
+ # Detect PV data alignment offset based on sysfs device information.
+ # The start of a PV aligned data area will be shifted by the
+ # alignment_offset exposed in sysfs. This offset is often 0, but
+ # may be non-zero. Certain 4KiB sector drives that compensate for
+ # windows partitioning will have an alignment_offset of 3584 bytes
+ # (sector 7 is the lowest aligned logical block, the 4KiB sectors start
+ # at LBA -1, and consequently sector 63 is aligned on a 4KiB boundary).
+ # pvcreate --dataalignmentoffset will skip this detection.
+ data_alignment_offset_detection = 1
+
+ # Configuration option devices/ignore_suspended_devices.
+ # Ignore DM devices that have I/O suspended while scanning devices.
+ # Otherwise, LVM waits for a suspended device to become accessible.
+ # This should only be needed in recovery situations.
+ ignore_suspended_devices = 0
+
+ # Configuration option devices/ignore_lvm_mirrors.
+ # Do not scan 'mirror' LVs to avoid possible deadlocks.
+ # This avoids possible deadlocks when using the 'mirror'
+ # segment type. This setting determines whether logical volumes
+ # using the 'mirror' segment type are scanned for LVM labels.
+ # This affects the ability of mirrors to be used as physical volumes.
+ # If this setting is enabled, it becomes impossible to create VGs
+ # on top of mirror LVs, i.e. to stack VGs on mirror LVs.
+ # If this setting is disabled, allowing mirror LVs to be scanned,
+ # it may cause LVM processes and I/O to the mirror to become blocked.
+ # This is due to the way that the mirror segment type handles failures.
+ # In order for the hang to occur, an LVM command must be run just after
+ # a failure and before the automatic LVM repair process takes place,
+ # or there must be failures in multiple mirrors in the same VG at the
+ # same time with write failures occurring moments before a scan of the
+ # mirror's labels.
+ # The 'mirror' scanning problems do not apply to LVM RAID types like
+ # 'raid1' which handle failures in a different way, making them a
+ # better choice for VG stacking.
+ ignore_lvm_mirrors = 1
+
+ # Configuration option devices/disable_after_error_count.
+ # Number of I/O errors after which a device is skipped.
+ # During each LVM operation, errors received from each device
+ # are counted. If the counter of a device exceeds the limit set
+ # here, no further I/O is sent to that device for the remainder
+ # of the operation.
+ # Setting this to 0 disables the counters altogether.
+ disable_after_error_count = 0
+
+ # Configuration option devices/require_restorefile_with_uuid.
+ # Allow use of pvcreate --uuid without requiring --restorefile.
+ require_restorefile_with_uuid = 1
+
+ # Configuration option devices/pv_min_size.
+ # Minimum size in KiB of block devices which can be used as PVs.
+ # In a clustered environment all nodes must use the same value.
+ # Any value smaller than 512KiB is ignored. The previous built-in
+ # value was 512.
+ pv_min_size = 2048
+
+ # Configuration option devices/issue_discards.
+ # Issue discards to PVs that are no longer used by an LV.
+ # Discards are sent to an LV's underlying physical volumes when
+ # the LV is no longer using the physical volumes' space, e.g.
+ # lvremove, lvreduce. Discards inform the storage that a region
+ # is no longer used. Storage that supports discards advertise
+ # the protocol-specific way discards should be issued by the
+ # kernel (TRIM, UNMAP, or WRITE SAME with UNMAP bit set).
+ # Not all storage will support or benefit from discards, but SSDs
+ # and thinly provisioned LUNs generally do. If enabled, discards
+ # will only be issued if both the storage and kernel provide support.
+ issue_discards = 0
}
-# This section allows you to configure the way in which LVM selects
-# free space for its Logical Volumes.
+# Configuration section allocation.
+# How LVM selects space and applies properties to LVs.
allocation {
- # When searching for free space to extend an LV, the "cling"
- # allocation policy will choose space on the same PVs as the last
- # segment of the existing LV. If there is insufficient space and a
- # list of tags is defined here, it will check whether any of them are
- # attached to the PVs concerned and then seek to match those PV tags
- # between existing extents and new extents.
- # Use the special tag "@*" as a wildcard to match any PV tag.
-
- # Example: LVs are mirrored between two sites within a single VG.
- # PVs are tagged with either @site1 or @site2 to indicate where
- # they are situated.
-
- # cling_tag_list = [ "@site1", "@site2" ]
- # cling_tag_list = [ "@*" ]
-
- # Changes made in version 2.02.85 extended the reach of the 'cling'
- # policies to detect more situations where data can be grouped
- # onto the same disks. Set this to 0 to revert to the previous
- # algorithm.
- maximise_cling = 1
-
- # Whether to use blkid library instead of native LVM2 code to detect
- # any existing signatures while creating new Physical Volumes and
- # Logical Volumes. LVM2 needs to be compiled with blkid wiping support
- # for this setting to take effect.
- #
- # LVM2 native detection code is currently able to recognize these signatures:
- # - MD device signature
- # - swap signature
- # - LUKS signature
- # To see the list of signatures recognized by blkid, check the output
- # of 'blkid -k' command. The blkid can recognize more signatures than
- # LVM2 native detection code, but due to this higher number of signatures
- # to be recognized, it can take more time to complete the signature scan.
- use_blkid_wiping = 1
-
- # Set to 1 to wipe any signatures found on newly-created Logical Volumes
- # automatically in addition to zeroing of the first KB on the LV
- # (controlled by the -Z/--zero y option).
- # The command line option -W/--wipesignatures takes precedence over this
- # setting.
- # The default is to wipe signatures when zeroing.
- #
- wipe_signatures_when_zeroing_new_lvs = 1
-
- # Set to 1 to guarantee that mirror logs will always be placed on
- # different PVs from the mirror images. This was the default
- # until version 2.02.85.
- mirror_logs_require_separate_pvs = 0
-
- # Set to 1 to guarantee that cache_pool metadata will always be
- # placed on different PVs from the cache_pool data.
- cache_pool_metadata_require_separate_pvs = 0
-
- # Specify the minimal chunk size (in kiB) for cache pool volumes.
- # Using a chunk_size that is too large can result in wasteful use of
- # the cache, where small reads and writes can cause large sections of
- # an LV to be mapped into the cache. However, choosing a chunk_size
- # that is too small can result in more overhead trying to manage the
- # numerous chunks that become mapped into the cache. The former is
- # more of a problem than the latter in most cases, so we default to
- # a value that is on the smaller end of the spectrum. Supported values
- # range from 32(kiB) to 1048576 in multiples of 32.
- # cache_pool_chunk_size = 64
-
- # Specify the default cache mode used for new cache pools.
- # Possible options are:
- # "writethrough" - Data blocks are immediately written from
- # the cache to disk.
- # "writeback" - Data blocks are written from the cache
- # back to disk after some delay to improve
- # performance.
- # cache_pool_cachemode = "writethrough"
-
- # Set to 1 to guarantee that thin pool metadata will always
- # be placed on different PVs from the pool data.
- thin_pool_metadata_require_separate_pvs = 0
-
- # Specify chunk size calculation policy for thin pool volumes.
- # Possible options are:
- # "generic" - if thin_pool_chunk_size is defined, use it.
- # Otherwise, calculate the chunk size based on
- # estimation and device hints exposed in sysfs:
- # the minimum_io_size. The chunk size is always
- # at least 64KiB.
- #
- # "performance" - if thin_pool_chunk_size is defined, use it.
- # Otherwise, calculate the chunk size for
- # performance based on device hints exposed in
- # sysfs: the optimal_io_size. The chunk size is
- # always at least 512KiB.
- # thin_pool_chunk_size_policy = "generic"
-
- # Specify the minimal chunk size (in KB) for thin pool volumes.
- # Use of the larger chunk size may improve performance for plain
- # thin volumes, however using them for snapshot volumes is less efficient,
- # as it consumes more space and takes extra time for copying.
- # When unset, lvm tries to estimate chunk size starting from 64KB
- # Supported values are in range from 64 to 1048576.
- # thin_pool_chunk_size = 64
-
- # Specify discards behaviour of the thin pool volume.
- # Select one of "ignore", "nopassdown", "passdown"
- # thin_pool_discards = "passdown"
-
- # Set to 0, to disable zeroing of thin pool data chunks before their
- # first use.
- # N.B. zeroing larger thin pool chunk size degrades performance.
- # thin_pool_zero = 1
-
- # Default physical extent size to use for newly created VGs (in KB).
- # physical_extent_size = 4096
+ # Configuration option allocation/cling_tag_list.
+ # Advise LVM which PVs to use when searching for new space.
+ # When searching for free space to extend an LV, the 'cling'
+ # allocation policy will choose space on the same PVs as the last
+ # segment of the existing LV. If there is insufficient space and a
+ # list of tags is defined here, it will check whether any of them are
+ # attached to the PVs concerned and then seek to match those PV tags
+ # between existing extents and new extents.
+ # Example:
+ # Use the special tag "@*" as a wildcard to match any PV tag.
+ # cling_tag_list = [ "@*" ]
+ # Example:
+ # LVs are mirrored between two sites within a single VG.
+ # PVs are tagged with either @site1 or @site2 to indicate where
+ # they are situated.
+ # cling_tag_list = [ "@site1", "@site2" ]
+ # This configuration option does not have a default value defined.
+
+ # Configuration option allocation/maximise_cling.
+ # Use a previous allocation algorithm.
+ # Changes made in version 2.02.85 extended the reach of the 'cling'
+ # policies to detect more situations where data can be grouped onto
+ # the same disks. This setting can be used to disable the changes
+ # and revert to the previous algorithm.
+ maximise_cling = 1
+
+ # Configuration option allocation/use_blkid_wiping.
+ # Use blkid to detect existing signatures on new PVs and LVs.
+ # The blkid library can detect more signatures than the
+ # native LVM detection code, but may take longer.
+ # LVM needs to be compiled with blkid wiping support for
+ # this setting to apply.
+ # LVM native detection code is currently able to recognize:
+ # MD device signatures, swap signature, and LUKS signatures.
+ # To see the list of signatures recognized by blkid, check the
+ # output of the 'blkid -k' command.
+ use_blkid_wiping = @DEFAULT_USE_BLKID_WIPING@
+
+ # Configuration option allocation/wipe_signatures_when_zeroing_new_lvs.
+ # Look for and erase any signatures while zeroing a new LV.
+ # Zeroing is controlled by the -Z/--zero option, and if not
+ # specified, zeroing is used by default if possible.
+ # Zeroing simply overwrites the first 4KiB of a new LV
+ # with zeroes and does no signature detection or wiping.
+ # Signature wiping goes beyond zeroing and detects exact
+ # types and positions of signatures within the whole LV.
+ # It provides a cleaner LV after creation as all known
+ # signatures are wiped. The LV is not claimed incorrectly
+ # by other tools because of old signatures from previous use.
+ # The number of signatures that LVM can detect depends on the
+ # detection code that is selected (see use_blkid_wiping.)
+ # Wiping each detected signature must be confirmed.
+ # The command line option -W/--wipesignatures takes precedence
+ # over this setting.
+ # When this setting is disabled, signatures on new LVs are
+ # not detected or erased unless the -W/--wipesignatures y
+ # option is used directly.
+ wipe_signatures_when_zeroing_new_lvs = 1
+
+ # Configuration option allocation/mirror_logs_require_separate_pvs.
+ # Mirror logs and images will always use different PVs.
+ # The default setting changed in version 2.02.85.
+ mirror_logs_require_separate_pvs = 0
+
+ # Configuration option allocation/cache_pool_metadata_require_separate_pvs.
+ # Cache pool metadata and data will always use different PVs.
+ cache_pool_metadata_require_separate_pvs = 0
+
+ # Configuration option allocation/cache_mode.
+ # The default cache mode used for new cache.
+ # Possible options are: writethrough, writeback.
+ # writethrough - Data blocks are immediately written from
+ # the cache to disk.
+ # writeback - Data blocks are written from the cache back
+ # to disk after some delay to improve performance.
+ # This setting replaces allocation/cache_pool_cachemode.
+ # cache_mode = "writethrough"
+
+ # Configuration option allocation/cache_policy.
+ # The default cache policy used for new cache volume.
+ # For the kernel 4.2 and newer the default policy is smq
+ # (Stochastic multique), otherwise the older mq (Multiqueue),
+ # policy is selected.
+ # This configuration option does not have a default value defined.
+
+ # Configuration section allocation/cache_settings.
+ # Individual settings for policies.
+ # See the help for individual policies for more info.
+ # cache_settings {
+ # }
+
+ # Configuration option allocation/cache_pool_chunk_size.
+ # The minimal chunk size in KiB for cache pool volumes.
+ # Using a chunk_size that is too large can result in wasteful
+ # use of the cache, where small reads and writes can cause
+ # large sections of an LV to be mapped into the cache. However,
+ # choosing a chunk_size that is too small can result in more
+ # overhead trying to manage the numerous chunks that become mapped
+ # into the cache. The former is more of a problem than the latter
+ # in most cases, so we default to a value that is on the smaller
+ # end of the spectrum. Supported values range from 32KiB to
+ # 1GiB in multiples of 32.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option allocation/thin_pool_metadata_require_separate_pvs.
+ # Thin pool metdata and data will always use different PVs.
+ thin_pool_metadata_require_separate_pvs = 0
+
+ # Configuration option allocation/thin_pool_zero.
+ # Thin pool data chunks are zeroed before they are first used.
+ # Zeroing with a larger thin pool chunk size reduces performance.
+ # thin_pool_zero = 1
+
+ # Configuration option allocation/thin_pool_discards.
+ # The discards behaviour of thin pool volumes.
+ # Possible options are: ignore, nopassdown, passdown.
+ # thin_pool_discards = "passdown"
+
+ # Configuration option allocation/thin_pool_chunk_size_policy.
+ # The chunk size calculation policy for thin pool volumes.
+ # Possible options are: generic, performance.
+ # generic - If thin_pool_chunk_size is defined, use it.
+ # Otherwise, calculate the chunk size based on estimation and
+ # device hints exposed in sysfs - the minimum_io_size.
+ # The chunk size is always at least 64KiB.
+ # performance - If thin_pool_chunk_size is defined, use it.
+ # Otherwise, calculate the chunk size for performance based on
+ # device hints exposed in sysfs - the optimal_io_size.
+ # The chunk size is always at least 512KiB.
+ # thin_pool_chunk_size_policy = "generic"
+
+ # Configuration option allocation/thin_pool_chunk_size.
+ # The minimal chunk size in KiB for thin pool volumes.
+ # Larger chunk sizes may improve performance for plain
+ # thin volumes, however using them for snapshot volumes
+ # is less efficient, as it consumes more space and takes
+ # extra time for copying. When unset, lvm tries to estimate
+ # chunk size starting from 64KiB. Supported values are in
+ # the range 64KiB to 1GiB.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option allocation/physical_extent_size.
+ # Default physical extent size in KiB to use for new VGs.
+ # physical_extent_size = 4096
}
-# This section that allows you to configure the nature of the
-# information that LVM2 reports.
+# Configuration section log.
+# How LVM log information is reported.
log {
- # Controls the messages sent to stdout or stderr.
- # There are three levels of verbosity, 3 being the most verbose.
- verbose = 0
-
- # Set to 1 to suppress all non-essential messages from stdout.
- # This has the same effect as -qq.
- # When this is set, the following commands still produce output:
- # dumpconfig, lvdisplay, lvmdiskscan, lvs, pvck, pvdisplay,
- # pvs, version, vgcfgrestore -l, vgdisplay, vgs.
- # Non-essential messages are shifted from log level 4 to log level 5
- # for syslog and lvm2_log_fn purposes.
- # Any 'yes' or 'no' questions not overridden by other arguments
- # are suppressed and default to 'no'.
- silent = 0
-
- # Should we send log messages through syslog?
- # 1 is yes; 0 is no.
- syslog = 1
-
- # Should we log error and debug messages to a file?
- # By default there is no log file.
- #file = "/var/log/lvm2.log"
-
- # Should we overwrite the log file each time the program is run?
- # By default we append.
- overwrite = 0
-
- # What level of log messages should we send to the log file and/or syslog?
- # There are 6 syslog-like log levels currently in use - 2 to 7 inclusive.
- # 7 is the most verbose (LOG_DEBUG).
- level = 0
-
- # Format of output messages
- # Whether or not (1 or 0) to indent messages according to their severity
- indent = 1
-
- # Whether or not (1 or 0) to display the command name on each line output
- command_names = 0
-
- # A prefix to use before the message text (but after the command name,
- # if selected). Default is two spaces, so you can see/grep the severity
- # of each message.
- prefix = " "
-
- # To make the messages look similar to the original LVM tools use:
- # indent = 0
- # command_names = 1
- # prefix = " -- "
-
- # Set this if you want log messages during activation.
- # Don't use this in low memory situations (can deadlock).
- # activation = 0
-
- # Some debugging messages are assigned to a class and only appear
- # in debug output if the class is listed here.
- # Classes currently available:
- # memory, devices, activation, allocation, lvmetad, metadata, cache,
- # locking
- # Use "all" to see everything.
- debug_classes = [ "memory", "devices", "activation", "allocation",
- "lvmetad", "metadata", "cache", "locking" ]
+ # Configuration option log/verbose.
+ # Controls the messages sent to stdout or stderr.
+ verbose = 0
+
+ # Configuration option log/silent.
+ # Suppress all non-essential messages from stdout.
+ # This has the same effect as -qq.
+ # When enabled, the following commands still produce output:
+ # dumpconfig, lvdisplay, lvmdiskscan, lvs, pvck, pvdisplay,
+ # pvs, version, vgcfgrestore -l, vgdisplay, vgs.
+ # Non-essential messages are shifted from log level 4 to log level 5
+ # for syslog and lvm2_log_fn purposes.
+ # Any 'yes' or 'no' questions not overridden by other arguments
+ # are suppressed and default to 'no'.
+ silent = 0
+
+ # Configuration option log/syslog.
+ # Send log messages through syslog.
+ syslog = 1
+
+ # Configuration option log/file.
+ # Write error and debug log messages to a file specified here.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option log/overwrite.
+ # Overwrite the log file each time the program is run.
+ overwrite = 0
+
+ # Configuration option log/level.
+ # The level of log messages that are sent to the log file or syslog.
+ # There are 6 syslog-like log levels currently in use: 2 to 7 inclusive.
+ # 7 is the most verbose (LOG_DEBUG).
+ level = 0
+
+ # Configuration option log/indent.
+ # Indent messages according to their severity.
+ indent = 1
+
+ # Configuration option log/command_names.
+ # Display the command name on each line of output.
+ command_names = 0
+
+ # Configuration option log/prefix.
+ # A prefix to use before the log message text.
+ # (After the command name, if selected).
+ # Two spaces allows you to see/grep the severity of each message.
+ # To make the messages look similar to the original LVM tools use:
+ # indent = 0, command_names = 1, prefix = " -- "
+ prefix = " "
+
+ # Configuration option log/activation.
+ # Log messages during activation.
+ # Don't use this in low memory situations (can deadlock).
+ activation = 0
+
+ # Configuration option log/debug_classes.
+ # Select log messages by class.
+ # Some debugging messages are assigned to a class
+ # and only appear in debug output if the class is
+ # listed here. Classes currently available:
+ # memory, devices, activation, allocation,
+ # lvmetad, metadata, cache, locking, lvmpolld.
+ # Use "all" to see everything.
+ debug_classes = [ "memory", "devices", "activation", "allocation", "lvmetad", "metadata", "cache", "locking", "lvmpolld" ]
}
-# Configuration of metadata backups and archiving. In LVM2 when we
-# talk about a 'backup' we mean making a copy of the metadata for the
-# *current* system. The 'archive' contains old metadata configurations.
-# Backups are stored in a human readable text format.
+# Configuration section backup.
+# How LVM metadata is backed up and archived.
+# In LVM, a 'backup' is a copy of the metadata for the
+# current system, and an 'archive' contains old metadata
+# configurations. They are stored in a human readable
+# text format.
backup {
- # Should we maintain a backup of the current metadata configuration ?
- # Use 1 for Yes; 0 for No.
- # Think very hard before turning this off!
- backup = 1
-
- # Where shall we keep it ?
- # Remember to back up this directory regularly!
- backup_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@"
-
- # Should we maintain an archive of old metadata configurations.
- # Use 1 for Yes; 0 for No.
- # On by default. Think very hard before turning this off.
- archive = 1
-
- # Where should archived files go ?
- # Remember to back up this directory regularly!
- archive_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@"
-
- # What is the minimum number of archive files you wish to keep ?
- retain_min = 10
-
- # What is the minimum time you wish to keep an archive file for ?
- retain_days = 30
+ # Configuration option backup/backup.
+ # Maintain a backup of the current metadata configuration.
+ # Think very hard before turning this off!
+ backup = 1
+
+ # Configuration option backup/backup_dir.
+ # Location of the metadata backup files.
+ # Remember to back up this directory regularly!
+ backup_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@"
+
+ # Configuration option backup/archive.
+ # Maintain an archive of old metadata configurations.
+ # Think very hard before turning this off.
+ archive = 1
+
+ # Configuration option backup/archive_dir.
+ # Location of the metdata archive files.
+ # Remember to back up this directory regularly!
+ archive_dir = "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@"
+
+ # Configuration option backup/retain_min.
+ # Minimum number of archives to keep.
+ retain_min = 10
+
+ # Configuration option backup/retain_days.
+ # Minimum number of days to keep archive files.
+ retain_days = 30
}
-# Settings for the running LVM2 in shell (readline) mode.
+# Configuration section shell.
+# Settings for running LVM in shell (readline) mode.
shell {
- # Number of lines of history to store in ~/.lvm_history
- history_size = 100
+ # Configuration option shell/history_size.
+ # Number of lines of history to store in ~/.lvm_history.
+ history_size = 100
}
-
-# Miscellaneous global LVM2 settings
+# Configuration section global.
+# Miscellaneous global LVM settings.
global {
- # The file creation mask for any files and directories created.
- # Interpreted as octal if the first digit is zero.
- umask = 077
-
- # Allow other users to read the files
- #umask = 022
-
- # Enabling test mode means that no changes to the on disk metadata
- # will be made. Equivalent to having the -t option on every
- # command. Defaults to off.
- test = 0
-
- # Default value for --units argument
- units = "h"
-
- # Since version 2.02.54, the tools distinguish between powers of
- # 1024 bytes (e.g. KiB, MiB, GiB) and powers of 1000 bytes (e.g.
- # KB, MB, GB).
- # If you have scripts that depend on the old behaviour, set this to 0
- # temporarily until you update them.
- si_unit_consistency = 1
-
- # Whether or not to display unit suffix for sizes. This setting has
- # no effect if the units are in human-readable form (global/units="h")
- # in which case the suffix is always displayed.
- suffix = 1
-
- # Whether or not to communicate with the kernel device-mapper.
- # Set to 0 if you want to use the tools to manipulate LVM metadata
- # without activating any logical volumes.
- # If the device-mapper kernel driver is not present in your kernel
- # setting this to 0 should suppress the error messages.
- activation = 1
-
- # If we can't communicate with device-mapper, should we try running
- # the LVM1 tools?
- # This option only applies to 2.4 kernels and is provided to help you
- # switch between device-mapper kernels and LVM1 kernels.
- # The LVM1 tools need to be installed with .lvm1 suffices
- # e.g. vgscan.lvm1 and they will stop working after you start using
- # the new lvm2 on-disk metadata format.
- # The default value is set when the tools are built.
- # fallback_to_lvm1 = 0
-
- # The default metadata format that commands should use - "lvm1" or "lvm2".
- # The command line override is -M1 or -M2.
- # Defaults to "lvm2".
- # format = "lvm2"
-
- # Location of proc filesystem
- proc = "/proc"
-
- # Type of locking to use. Defaults to local file-based locking (1).
- # Turn locking off by setting to 0 (dangerous: risks metadata corruption
- # if LVM2 commands get run concurrently).
- # Type 2 uses the external shared library locking_library.
- # Type 3 uses built-in clustered locking.
- # Type 4 uses read-only locking which forbids any operations that might
- # change metadata.
- # Type 5 offers dummy locking for tools that do not need any locks.
- # You should not need to set this directly: the tools will select when
- # to use it instead of the configured locking_type. Do not use lvmetad or
- # the kernel device-mapper driver with this locking type.
- # It is used by the --readonly option that offers read-only access to
- # Volume Group metadata that cannot be locked safely because it belongs to
- # an inaccessible domain and might be in use, for example a virtual machine
- # image or a disk that is shared by a clustered machine.
- #
- # N.B. Don't use lvmetad with locking type 3 as lvmetad is not yet
- # supported in clustered environment. If use_lvmetad=1 and locking_type=3
- # is set at the same time, LVM always issues a warning message about this
- # and then it automatically disables lvmetad use.
- locking_type = 1
-
- # Set to 0 to fail when a lock request cannot be satisfied immediately.
- wait_for_locks = 1
-
- # If using external locking (type 2) and initialisation fails,
- # with this set to 1 an attempt will be made to use the built-in
- # clustered locking.
- # If you are using a customised locking_library you should set this to 0.
- fallback_to_clustered_locking = 1
-
- # If an attempt to initialise type 2 or type 3 locking failed, perhaps
- # because cluster components such as clvmd are not running, with this set
- # to 1 an attempt will be made to use local file-based locking (type 1).
- # If this succeeds, only commands against local volume groups will proceed.
- # Volume Groups marked as clustered will be ignored.
- fallback_to_local_locking = 1
-
- # Local non-LV directory that holds file-based locks while commands are
- # in progress. A directory like /tmp that may get wiped on reboot is OK.
- locking_dir = "@DEFAULT_LOCK_DIR@"
-
- # Whenever there are competing read-only and read-write access requests for
- # a volume group's metadata, instead of always granting the read-only
- # requests immediately, delay them to allow the read-write requests to be
- # serviced. Without this setting, write access may be stalled by a high
- # volume of read-only requests.
- # NB. This option only affects locking_type = 1 viz. local file-based
- # locking.
- prioritise_write_locks = 1
-
- # Other entries can go here to allow you to load shared libraries
- # e.g. if support for LVM1 metadata was compiled as a shared library use
- # format_libraries = "liblvm2format1.so"
- # Full pathnames can be given.
-
- # Search this directory first for shared libraries.
- # library_dir = "/lib"
-
- # The external locking library to load if locking_type is set to 2.
- # locking_library = "liblvm2clusterlock.so"
-
- # Treat any internal errors as fatal errors, aborting the process that
- # encountered the internal error. Please only enable for debugging.
- abort_on_internal_errors = 0
-
- # Check whether CRC is matching when parsed VG is used multiple times.
- # This is useful to catch unexpected internal cached volume group
- # structure modification. Please only enable for debugging.
- detect_internal_vg_cache_corruption = 0
-
- # If set to 1, no operations that change on-disk metadata will be permitted.
- # Additionally, read-only commands that encounter metadata in need of repair
- # will still be allowed to proceed exactly as if the repair had been
- # performed (except for the unchanged vg_seqno).
- # Inappropriate use could mess up your system, so seek advice first!
- metadata_read_only = 0
-
- # 'mirror_segtype_default' defines which segtype will be used when the
- # shorthand '-m' option is used for mirroring. The possible options are:
- #
- # "mirror" - The original RAID1 implementation provided by LVM2/DM. It is
- # characterized by a flexible log solution (core, disk, mirrored)
- # and by the necessity to block I/O while reconfiguring in the
- # event of a failure.
- #
- # There is an inherent race in the dmeventd failure handling
- # logic with snapshots of devices using this type of RAID1 that
- # in the worst case could cause a deadlock.
- # Ref: https://bugzilla.redhat.com/show_bug.cgi?id=817130#c10
- #
- # "raid1" - This implementation leverages MD's RAID1 personality through
- # device-mapper. It is characterized by a lack of log options.
- # (A log is always allocated for every device and they are placed
- # on the same device as the image - no separate devices are
- # required.) This mirror implementation does not require I/O
- # to be blocked in the kernel in the event of a failure.
- # This mirror implementation is not cluster-aware and cannot be
- # used in a shared (active/active) fashion in a cluster.
- #
- # Specify the '--type <mirror|raid1>' option to override this default
- # setting.
- mirror_segtype_default = "@DEFAULT_MIRROR_SEGTYPE@"
-
- # 'raid10_segtype_default' determines the segment types used by default
- # when the '--stripes/-i' and '--mirrors/-m' arguments are both specified
- # during the creation of a logical volume.
- # Possible settings include:
- #
- # "raid10" - This implementation leverages MD's RAID10 personality through
- # device-mapper.
- #
- # "mirror" - LVM will layer the 'mirror' and 'stripe' segment types. It
- # will do this by creating a mirror on top of striped sub-LVs;
- # effectively creating a RAID 0+1 array. This is suboptimal
- # in terms of providing redundancy and performance. Changing to
- # this setting is not advised.
- # Specify the '--type <raid10|mirror>' option to override this default
- # setting.
- raid10_segtype_default = "@DEFAULT_RAID10_SEGTYPE@"
-
- # 'sparse_segtype_default' defines which segtype will be used when the
- # shorthand '-V and -L' option is used for sparse volume creation.
- #
- # "snapshot" - The original snapshot implementation provided by LVM2/DM.
- # It is using old snashot that mixes data and metadata within
- # a single COW storage volume and has poor performs when
- # the size of stored data passes hundereds of MB.
- #
- # "thin" - Newer implementation leverages thin provisioning target.
- # It has bigger minimal chunk size (64KiB) and uses separate volume
- # for metadata. It has better performance especially in case of
- # bigger data uses. This device type has also full snapshot support.
- #
- # Specify the '--type <snapshot|thin>' option to override this default
- # setting.
- sparse_segtype_default = "@DEFAULT_SPARSE_SEGTYPE@"
-
-
- # The default format for displaying LV names in lvdisplay was changed
- # in version 2.02.89 to show the LV name and path separately.
- # Previously this was always shown as /dev/vgname/lvname even when that
- # was never a valid path in the /dev filesystem.
- # Set to 1 to reinstate the previous format.
- #
- # lvdisplay_shows_full_device_path = 0
-
- # Whether to use (trust) a running instance of lvmetad. If this is set to
- # 0, all commands fall back to the usual scanning mechanisms. When set to 1
- # *and* when lvmetad is running (automatically instantiated by making use of
- # systemd's socket-based service activation or run as an initscripts service
- # or run manually), the volume group metadata and PV state flags are obtained
- # from the lvmetad instance and no scanning is done by the individual
- # commands. In a setup with lvmetad, lvmetad udev rules *must* be set up for
- # LVM to work correctly. Without proper udev rules, all changes in block
- # device configuration will be *ignored* until a manual 'pvscan --cache'
- # is performed. These rules are installed by default.
- #
- # If lvmetad has been running while use_lvmetad was 0, it MUST be stopped
- # before changing use_lvmetad to 1 and started again afterwards.
- #
- # If using lvmetad, volume activation is also switched to automatic
- # event-based mode. In this mode, the volumes are activated based on
- # incoming udev events that automatically inform lvmetad about new PVs that
- # appear in the system. Once a VG is complete (all the PVs are present), it
- # is auto-activated. The activation/auto_activation_volume_list setting
- # controls which volumes are auto-activated (all by default).
-
- # A note about device filtering while lvmetad is used:
-
- # When lvmetad is updated (either automatically based on udev events or
- # directly by a pvscan --cache <device> call), devices/filter is ignored and
- # all devices are scanned by default -- lvmetad always keeps unfiltered
- # information which is then provided to LVM commands and then each LVM
- # command does the filtering based on devices/filter setting itself. This
- # does not apply to non-regexp filters though: component filters such as
- # multipath and MD are checked at pvscan --cache time.
-
- # In order to completely prevent LVM from scanning a device, even when using
- # lvmetad, devices/global_filter must be used.
-
- # N.B. Don't use lvmetad with locking type 3 as lvmetad is not yet
- # supported in clustered environment. If use_lvmetad=1 and locking_type=3
- # is set at the same time, LVM always issues a warning message about this
- # and then it automatically disables use_lvmetad.
-
- use_lvmetad = 0
-
- # Full path of the utility called to check that a thin metadata device
- # is in a state that allows it to be used.
- # Each time a thin pool needs to be activated or after it is deactivated
- # this utility is executed. The activation will only proceed if the utility
- # has an exit status of 0.
- # Set to "" to skip this check. (Not recommended.)
- # The thin tools are available as part of the device-mapper-persistent-data
- # package from https://github.com/jthornber/thin-provisioning-tools.
- #
- # thin_check_executable = "@THIN_CHECK_CMD@"
-
- # Array of string options passed with thin_check command. By default,
- # option "-q" is for quiet output.
- # With thin_check version 2.1 or newer you can add "--ignore-non-fatal-errors"
- # to let it pass through ignorable errors and fix them later.
- # With thin_check version 3.2 or newer you should add
- # "--clear-needs-check-flag".
- #
- # thin_check_options = [ "-q", "--clear-needs-check-flag" ]
-
- # Full path of the utility called to repair a thin metadata device
- # is in a state that allows it to be used.
- # Each time a thin pool needs repair this utility is executed.
- # See thin_check_executable how to obtain binaries.
- #
- # thin_repair_executable = "@THIN_REPAIR_CMD@"
-
- # Array of extra string options passed with thin_repair command.
- # thin_repair_options = [ "" ]
-
- # Full path of the utility called to dump thin metadata content.
- # See thin_check_executable how to obtain binaries.
- #
- # thin_dump_executable = "@THIN_DUMP_CMD@"
-
- # If set, given features are not used by thin driver.
- # This can be helpful not just for testing, but i.e. allows to avoid
- # using problematic implementation of some thin feature.
- # Features:
- # block_size
- # discards
- # discards_non_power_2
- # external_origin
- # metadata_resize
- # external_origin_extend
- #
- # thin_disabled_features = [ "discards", "block_size" ]
-
- # Full path of the utility called to check that a cache metadata device
- # is in a state that allows it to be used.
- # Each time a cached LV needs to be used or after it is deactivated
- # this utility is executed. The activation will only proceed if the utility
- # has an exit status of 0.
- # Set to "" to skip this check. (Not recommended.)
- # The cache tools are available as part of the device-mapper-persistent-data
- # package from https://github.com/jthornber/thin-provisioning-tools.
- #
- # cache_check_executable = "@CACHE_CHECK_CMD@"
-
- # Array of string options passed with cache_check command. By default,
- # option "-q" is for quiet output.
- #
- # cache_check_options = [ "-q" ]
-
- # Full path of the utility called to repair a cache metadata device.
- # Each time a cache metadata needs repair this utility is executed.
- # See cache_check_executable how to obtain binaries.
- #
- # cache_repair_executable = "@CACHE_REPAIR_CMD@"
-
- # Array of extra string options passed with cache_repair command.
- # cache_repair_options = [ "" ]
-
- # Full path of the utility called to dump cache metadata content.
- # See cache_check_executable how to obtain binaries.
- #
- # cache_dump_executable = "@CACHE_DUMP_CMD@"
-}
-activation {
- # Set to 1 to perform internal checks on the operations issued to
- # libdevmapper. Useful for debugging problems with activation.
- # Some of the checks may be expensive, so it's best to use this
- # only when there seems to be a problem.
- checks = 0
-
- # Set to 0 to disable udev synchronisation (if compiled into the binaries).
- # Processes will not wait for notification from udev.
- # They will continue irrespective of any possible udev processing
- # in the background. You should only use this if udev is not running
- # or has rules that ignore the devices LVM2 creates.
- # The command line argument --nodevsync takes precedence over this setting.
- # If set to 1 when udev is not running, and there are LVM2 processes
- # waiting for udev, run 'dmsetup udevcomplete_all' manually to wake them up.
- udev_sync = 1
-
- # Set to 0 to disable the udev rules installed by LVM2 (if built with
- # --enable-udev_rules). LVM2 will then manage the /dev nodes and symlinks
- # for active logical volumes directly itself.
- # N.B. Manual intervention may be required if this setting is changed
- # while any logical volumes are active.
- udev_rules = 1
-
- # Set to 1 for LVM2 to verify operations performed by udev. This turns on
- # additional checks (and if necessary, repairs) on entries in the device
- # directory after udev has completed processing its events.
- # Useful for diagnosing problems with LVM2/udev interactions.
- verify_udev_operations = 0
-
- # If set to 1 and if deactivation of an LV fails, perhaps because
- # a process run from a quick udev rule temporarily opened the device,
- # retry the operation for a few seconds before failing.
- retry_deactivation = 1
-
- # How to fill in missing stripes if activating an incomplete volume.
- # Using "error" will make inaccessible parts of the device return
- # I/O errors on access. You can instead use a device path, in which
- # case, that device will be used to in place of missing stripes.
- # But note that using anything other than "error" with mirrored
- # or snapshotted volumes is likely to result in data corruption.
- missing_stripe_filler = "error"
-
- # The linear target is an optimised version of the striped target
- # that only handles a single stripe. Set this to 0 to disable this
- # optimisation and always use the striped target.
- use_linear_target = 1
-
- # How much stack (in KB) to reserve for use while devices suspended
- # Prior to version 2.02.89 this used to be set to 256KB
- reserved_stack = 64
-
- # How much memory (in KB) to reserve for use while devices suspended
- reserved_memory = 8192
-
- # Nice value used while devices suspended
- process_priority = -18
-
- # If volume_list is defined, each LV is only activated if there is a
- # match against the list.
- #
- # "vgname" and "vgname/lvname" are matched exactly.
- # "@tag" matches any tag set in the LV or VG.
- # "@*" matches if any tag defined on the host is also set in the LV or VG
- #
- # If any host tags exist but volume_list is not defined, a default
- # single-entry list containing "@*" is assumed.
- #
- # volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
-
- # If auto_activation_volume_list is defined, each LV that is to be
- # activated with the autoactivation option (--activate ay/-a ay) is
- # first checked against the list. There are two scenarios in which
- # the autoactivation option is used:
- #
- # - automatic activation of volumes based on incoming PVs. If all the
- # PVs making up a VG are present in the system, the autoactivation
- # is triggered. This requires lvmetad (global/use_lvmetad=1) and udev
- # to be running. In this case, "pvscan --cache -aay" is called
- # automatically without any user intervention while processing
- # udev events. Please, make sure you define auto_activation_volume_list
- # properly so only the volumes you want and expect are autoactivated.
- #
- # - direct activation on command line with the autoactivation option.
- # In this case, the user calls "vgchange --activate ay/-a ay" or
- # "lvchange --activate ay/-a ay" directly.
- #
- # By default, the auto_activation_volume_list is not defined and all
- # volumes will be activated either automatically or by using --activate ay/-a ay.
- #
- # N.B. The "activation/volume_list" is still honoured in all cases so even
- # if the VG/LV passes the auto_activation_volume_list, it still needs to
- # pass the volume_list for it to be activated in the end.
-
- # If auto_activation_volume_list is defined but empty, no volumes will be
- # activated automatically and --activate ay/-a ay will do nothing.
- #
- # auto_activation_volume_list = []
-
- # If auto_activation_volume_list is defined and it's not empty, only matching
- # volumes will be activated either automatically or by using --activate ay/-a ay.
- #
- # "vgname" and "vgname/lvname" are matched exactly.
- # "@tag" matches any tag set in the LV or VG.
- # "@*" matches if any tag defined on the host is also set in the LV or VG
- #
- # auto_activation_volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
-
- # If read_only_volume_list is defined, each LV that is to be activated
- # is checked against the list, and if it matches, it as activated
- # in read-only mode. (This overrides '--permission rw' stored in the
- # metadata.)
- #
- # "vgname" and "vgname/lvname" are matched exactly.
- # "@tag" matches any tag set in the LV or VG.
- # "@*" matches if any tag defined on the host is also set in the LV or VG
- #
- # read_only_volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
-
- # Each LV can have an 'activation skip' flag stored persistently against it.
- # During activation, this flag is used to decide whether such an LV is skipped.
- # The 'activation skip' flag can be set during LV creation and by default it
- # is automatically set for thin snapshot LVs. The 'auto_set_activation_skip'
- # enables or disables this automatic setting of the flag while LVs are created.
- # auto_set_activation_skip = 1
-
- # For RAID or 'mirror' segment types, 'raid_region_size' is the
- # size (in KiB) of each:
- # - synchronization operation when initializing
- # - each copy operation when performing a 'pvmove' (using 'mirror' segtype)
- # This setting has replaced 'mirror_region_size' since version 2.02.99
- raid_region_size = 512
-
- # Setting to use when there is no readahead value stored in the metadata.
- #
- # "none" - Disable readahead.
- # "auto" - Use default value chosen by kernel.
- readahead = "auto"
-
- # 'raid_fault_policy' defines how a device failure in a RAID logical
- # volume is handled. This includes logical volumes that have the following
- # segment types: raid1, raid4, raid5*, and raid6*.
- #
- # In the event of a failure, the following policies will determine what
- # actions are performed during the automated response to failures (when
- # dmeventd is monitoring the RAID logical volume) and when 'lvconvert' is
- # called manually with the options '--repair' and '--use-policies'.
- #
- # "warn" - Use the system log to warn the user that a device in the RAID
- # logical volume has failed. It is left to the user to run
- # 'lvconvert --repair' manually to remove or replace the failed
- # device. As long as the number of failed devices does not
- # exceed the redundancy of the logical volume (1 device for
- # raid4/5, 2 for raid6, etc) the logical volume will remain
- # usable.
- #
- # "allocate" - Attempt to use any extra physical volumes in the volume
- # group as spares and replace faulty devices.
- #
- raid_fault_policy = "warn"
-
- # 'mirror_image_fault_policy' and 'mirror_log_fault_policy' define
- # how a device failure affecting a mirror (of "mirror" segment type) is
- # handled. A mirror is composed of mirror images (copies) and a log.
- # A disk log ensures that a mirror does not need to be re-synced
- # (all copies made the same) every time a machine reboots or crashes.
- #
- # In the event of a failure, the specified policy will be used to determine
- # what happens. This applies to automatic repairs (when the mirror is being
- # monitored by dmeventd) and to manual lvconvert --repair when
- # --use-policies is given.
- #
- # "remove" - Simply remove the faulty device and run without it. If
- # the log device fails, the mirror would convert to using
- # an in-memory log. This means the mirror will not
- # remember its sync status across crashes/reboots and
- # the entire mirror will be re-synced. If a
- # mirror image fails, the mirror will convert to a
- # non-mirrored device if there is only one remaining good
- # copy.
- #
- # "allocate" - Remove the faulty device and try to allocate space on
- # a new device to be a replacement for the failed device.
- # Using this policy for the log is fast and maintains the
- # ability to remember sync state through crashes/reboots.
- # Using this policy for a mirror device is slow, as it
- # requires the mirror to resynchronize the devices, but it
- # will preserve the mirror characteristic of the device.
- # This policy acts like "remove" if no suitable device and
- # space can be allocated for the replacement.
- #
- # "allocate_anywhere" - Not yet implemented. Useful to place the log device
- # temporarily on same physical volume as one of the mirror
- # images. This policy is not recommended for mirror devices
- # since it would break the redundant nature of the mirror. This
- # policy acts like "remove" if no suitable device and space can
- # be allocated for the replacement.
-
- mirror_log_fault_policy = "allocate"
- mirror_image_fault_policy = "remove"
-
- # 'snapshot_autoextend_threshold' and 'snapshot_autoextend_percent' define
- # how to handle automatic snapshot extension. The former defines when the
- # snapshot should be extended: when its space usage exceeds this many
- # percent. The latter defines how much extra space should be allocated for
- # the snapshot, in percent of its current size.
- #
- # For example, if you set snapshot_autoextend_threshold to 70 and
- # snapshot_autoextend_percent to 20, whenever a snapshot exceeds 70% usage,
- # it will be extended by another 20%. For a 1G snapshot, using up 700M will
- # trigger a resize to 1.2G. When the usage exceeds 840M, the snapshot will
- # be extended to 1.44G, and so on.
- #
- # Setting snapshot_autoextend_threshold to 100 disables automatic
- # extensions. The minimum value is 50 (A setting below 50 will be treated
- # as 50).
-
- snapshot_autoextend_threshold = 100
- snapshot_autoextend_percent = 20
-
- # 'thin_pool_autoextend_threshold' and 'thin_pool_autoextend_percent' define
- # how to handle automatic pool extension. The former defines when the
- # pool should be extended: when its space usage exceeds this many
- # percent. The latter defines how much extra space should be allocated for
- # the pool, in percent of its current size.
- #
- # For example, if you set thin_pool_autoextend_threshold to 70 and
- # thin_pool_autoextend_percent to 20, whenever a pool exceeds 70% usage,
- # it will be extended by another 20%. For a 1G pool, using up 700M will
- # trigger a resize to 1.2G. When the usage exceeds 840M, the pool will
- # be extended to 1.44G, and so on.
- #
- # Setting thin_pool_autoextend_threshold to 100 disables automatic
- # extensions. The minimum value is 50 (A setting below 50 will be treated
- # as 50).
-
- thin_pool_autoextend_threshold = 100
- thin_pool_autoextend_percent = 20
-
- # While activating devices, I/O to devices being (re)configured is
- # suspended, and as a precaution against deadlocks, LVM2 needs to pin
- # any memory it is using so it is not paged out. Groups of pages that
- # are known not to be accessed during activation need not be pinned
- # into memory. Each string listed in this setting is compared against
- # each line in /proc/self/maps, and the pages corresponding to any
- # lines that match are not pinned. On some systems locale-archive was
- # found to make up over 80% of the memory used by the process.
- # mlock_filter = [ "locale/locale-archive", "gconv/gconv-modules.cache" ]
-
- # Set to 1 to revert to the default behaviour prior to version 2.02.62
- # which used mlockall() to pin the whole process's memory while activating
- # devices.
- use_mlockall = 0
-
- # Monitoring is enabled by default when activating logical volumes.
- # Set to 0 to disable monitoring or use the --ignoremonitoring option.
- monitoring = 1
-
- # When pvmove or lvconvert must wait for the kernel to finish
- # synchronising or merging data, they check and report progress
- # at intervals of this number of seconds. The default is 15 seconds.
- # If this is set to 0 and there is only one thing to wait for, there
- # are no progress reports, but the process is awoken immediately the
- # operation is complete.
- polling_interval = 15
-
- # 'activation_mode' determines how Logical Volumes are activated if
- # any devices are missing. Possible settings are:
- #
- # "complete" - Only allow activation of an LV if all of the Physical
- # Volumes it uses are present. Other PVs in the Volume
- # Group may be missing.
- #
- # "degraded" - Like "complete", but additionally RAID Logical Volumes of
- # segment type raid1, raid4, raid5, radid6 and raid10 will
- # be activated if there is no data loss, i.e. they have
- # sufficient redundancy to present the entire addressable
- # range of the Logical Volume.
- #
- # "partial" - Allows the activation of any Logical Volume even if
- # a missing or failed PV could cause data loss with a
- # portion of the Logical Volume inaccessible.
- # This setting should not normally be used, but may
- # sometimes assist with data recovery.
- #
- # This setting was introduced in LVM version 2.02.108. It corresponds
- # with the '--activationmode' option for lvchange and vgchange.
- activation_mode = "degraded"
+ # Configuration option global/umask.
+ # The file creation mask for any files and directories created.
+ # Interpreted as octal if the first digit is zero.
+ umask = 077
+
+ # Configuration option global/test.
+ # No on-disk metadata changes will be made in test mode.
+ # Equivalent to having the -t option on every command.
+ test = 0
+
+ # Configuration option global/units.
+ # Default value for --units argument.
+ units = "h"
+
+ # Configuration option global/si_unit_consistency.
+ # Distinguish between powers of 1024 and 1000 bytes.
+ # The LVM commands distinguish between powers of 1024 bytes,
+ # e.g. KiB, MiB, GiB, and powers of 1000 bytes, e.g. KB, MB, GB.
+ # If scripts depend on the old behaviour, disable
+ # this setting temporarily until they are updated.
+ si_unit_consistency = 1
+
+ # Configuration option global/suffix.
+ # Display unit suffix for sizes.
+ # This setting has no effect if the units are in human-readable
+ # form (global/units = "h") in which case the suffix is always
+ # displayed.
+ suffix = 1
+
+ # Configuration option global/activation.
+ # Enable/disable communication with the kernel device-mapper.
+ # Disable to use the tools to manipulate LVM metadata without
+ # activating any logical volumes. If the device-mapper driver
+ # is not present in the kernel, disabling this should suppress
+ # the error messages.
+ activation = 1
+
+ # Configuration option global/fallback_to_lvm1.
+ # Try running LVM1 tools if LVM cannot communicate with DM.
+ # This option only applies to 2.4 kernels and is provided to
+ # help switch between device-mapper kernels and LVM1 kernels.
+ # The LVM1 tools need to be installed with .lvm1 suffices,
+ # e.g. vgscan.lvm1. They will stop working once the lvm2
+ # on-disk metadata format is used.
+ # fallback_to_lvm1 = @DEFAULT_FALLBACK_TO_LVM1@
+
+ # Configuration option global/format.
+ # The default metadata format that commands should use.
+ # "lvm1" or "lvm2".
+ # The command line override is -M1 or -M2.
+ # format = "lvm2"
+
+ # Configuration option global/format_libraries.
+ # Shared libraries that process different metadata formats.
+ # If support for LVM1 metadata was compiled as a shared library use
+ # format_libraries = "liblvm2format1.so"
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/segment_libraries.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/proc.
+ # Location of proc filesystem.
+ # This configuration option is advanced.
+ proc = "/proc"
+
+ # Configuration option global/etc.
+ # Location of /etc system configuration directory.
+ etc = "@CONFDIR@"
+
+ # Configuration option global/locking_type.
+ # Type of locking to use.
+ # Type 0: turns off locking. Warning: this risks metadata
+ # corruption if commands run concurrently.
+ # Type 1: uses local file-based locking, the standard mode.
+ # Type 2: uses the external shared library locking_library.
+ # Type 3: uses built-in clustered locking with clvmd.
+ # This is incompatible with lvmetad. If use_lvmetad is enabled,
+ # lvm prints a warning and disables lvmetad use.
+ # Type 4: uses read-only locking which forbids any operations
+ # that might change metadata.
+ # Type 5: offers dummy locking for tools that do not need any locks.
+ # You should not need to set this directly; the tools will select
+ # when to use it instead of the configured locking_type.
+ # Do not use lvmetad or the kernel device-mapper driver with this
+ # locking type. It is used by the --readonly option that offers
+ # read-only access to Volume Group metadata that cannot be locked
+ # safely because it belongs to an inaccessible domain and might be
+ # in use, for example a virtual machine image or a disk that is
+ # shared by a clustered machine.
+ locking_type = 1
+
+ # Configuration option global/wait_for_locks.
+ # When disabled, fail if a lock request would block.
+ wait_for_locks = 1
+
+ # Configuration option global/fallback_to_clustered_locking.
+ # Attempt to use built-in cluster locking if locking_type 2 fails.
+ # If using external locking (type 2) and initialisation fails,
+ # with this enabled, an attempt will be made to use the built-in
+ # clustered locking.
+ # If you are using a customised locking_library you should disable this.
+ fallback_to_clustered_locking = 1
+
+ # Configuration option global/fallback_to_local_locking.
+ # Use locking_type 1 (local) if locking_type 2 or 3 fail.
+ # If an attempt to initialise type 2 or type 3 locking failed,
+ # perhaps because cluster components such as clvmd are not
+ # running, with this enabled, an attempt will be made to use
+ # local file-based locking (type 1). If this succeeds, only
+ # commands against local volume groups will proceed.
+ # Volume Groups marked as clustered will be ignored.
+ fallback_to_local_locking = 1
+
+ # Configuration option global/locking_dir.
+ # Directory to use for LVM command file locks.
+ # Local non-LV directory that holds file-based locks
+ # while commands are in progress. A directory like
+ # /tmp that may get wiped on reboot is OK.
+ locking_dir = "@DEFAULT_LOCK_DIR@"
+
+ # Configuration option global/prioritise_write_locks.
+ # Allow quicker VG write access during high volume read access.
+ # When there are competing read-only and read-write access
+ # requests for a volume group's metadata, instead of always
+ # granting the read-only requests immediately, delay them to
+ # allow the read-write requests to be serviced. Without this
+ # setting, write access may be stalled by a high volume of
+ # read-only requests.
+ # This option only affects locking_type 1 viz.
+ # local file-based locking.
+ prioritise_write_locks = 1
+
+ # Configuration option global/library_dir.
+ # Search this directory first for shared libraries.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/locking_library.
+ # The external locking library to use for locking_type 2.
+ # locking_library = "liblvm2clusterlock.so"
+
+ # Configuration option global/abort_on_internal_errors.
+ # Abort a command that encounters an internal error.
+ # Treat any internal errors as fatal errors, aborting
+ # the process that encountered the internal error.
+ # Please only enable for debugging.
+ abort_on_internal_errors = 0
+
+ # Configuration option global/detect_internal_vg_cache_corruption.
+ # Internal verification of VG structures.
+ # Check if CRC matches when a parsed VG is
+ # used multiple times. This is useful to catch
+ # unexpected changes to cached VG structures.
+ # Please only enable for debugging.
+ detect_internal_vg_cache_corruption = 0
+
+ # Configuration option global/metadata_read_only.
+ # No operations that change on-disk metadata are permitted.
+ # Additionally, read-only commands that encounter metadata
+ # in need of repair will still be allowed to proceed exactly
+ # as if the repair had been performed (except for the unchanged
+ # vg_seqno). Inappropriate use could mess up your system,
+ # so seek advice first!
+ metadata_read_only = 0
+
+ # Configuration option global/mirror_segtype_default.
+ # The segment type used by the short mirroring option -m.
+ # Possible options are: mirror, raid1.
+ # mirror - the original RAID1 implementation from LVM/DM.
+ # It is characterized by a flexible log solution (core,
+ # disk, mirrored), and by the necessity to block I/O while
+ # handling a failure.
+ # There is an inherent race in the dmeventd failure
+ # handling logic with snapshots of devices using this
+ # type of RAID1 that in the worst case could cause a
+ # deadlock. (Also see devices/ignore_lvm_mirrors.)
+ # raid1 - a newer RAID1 implementation using the MD RAID1
+ # personality through device-mapper. It is characterized
+ # by a lack of log options. (A log is always allocated for
+ # every device and they are placed on the same device as the
+ # image - no separate devices are required.) This mirror
+ # implementation does not require I/O to be blocked while
+ # handling a failure. This mirror implementation is not
+ # cluster-aware and cannot be used in a shared (active/active)
+ # fashion in a cluster.
+ # The '--type mirror|raid1' option overrides this setting.
+ mirror_segtype_default = "@DEFAULT_MIRROR_SEGTYPE@"
+
+ # Configuration option global/raid10_segtype_default.
+ # The segment type used by the -i -m combination.
+ # The --stripes/-i and --mirrors/-m options can both
+ # be specified during the creation of a logical volume
+ # to use both striping and mirroring for the LV.
+ # There are two different implementations.
+ # Possible options are: raid10, mirror.
+ # raid10 - LVM uses MD's RAID10 personality through DM.
+ # mirror - LVM layers the 'mirror' and 'stripe' segment types.
+ # The layering is done by creating a mirror LV on top of
+ # striped sub-LVs, effectively creating a RAID 0+1 array.
+ # The layering is suboptimal in terms of providing redundancy
+ # and performance. The 'raid10' option is perferred.
+ # The '--type raid10|mirror' option overrides this setting.
+ raid10_segtype_default = "@DEFAULT_RAID10_SEGTYPE@"
+
+ # Configuration option global/sparse_segtype_default.
+ # The segment type used by the -V -L combination.
+ # The combination of -V and -L options creates a
+ # sparse LV. There are two different implementations.
+ # Possible options are: snapshot, thin.
+ # snapshot - The original snapshot implementation from LVM/DM.
+ # It uses an old snapshot that mixes data and metadata within
+ # a single COW storage volume and performs poorly when the
+ # size of stored data passes hundreds of MB.
+ # thin - A newer implementation that uses thin provisioning.
+ # It has a bigger minimal chunk size (64KiB) and uses a separate
+ # volume for metadata. It has better performance, especially
+ # when more data is used. It also supports full snapshots.
+ # The '--type snapshot|thin' option overrides this setting.
+ sparse_segtype_default = "@DEFAULT_SPARSE_SEGTYPE@"
+
+ # Configuration option global/lvdisplay_shows_full_device_path.
+ # Enable this to reinstate the previous lvdisplay name format.
+ # The default format for displaying LV names in lvdisplay was changed
+ # in version 2.02.89 to show the LV name and path separately.
+ # Previously this was always shown as /dev/vgname/lvname even when that
+ # was never a valid path in the /dev filesystem.
+ # lvdisplay_shows_full_device_path = 0
+
+ # Configuration option global/use_lvmetad.
+ # Use lvmetad to cache metadata and reduce disk scanning.
+ # When enabled (and running), lvmetad provides LVM commands
+ # with VG metadata and PV state. LVM commands then avoid
+ # reading this information from disks which can be slow.
+ # When disabled (or not running), LVM commands fall back to
+ # scanning disks to obtain VG metadata.
+ # lvmetad is kept updated via udev rules which must be set
+ # up for LVM to work correctly. (The udev rules should be
+ # installed by default.) Without a proper udev setup, changes
+ # in the system's block device configuration will be unknown
+ # to LVM, and ignored until a manual 'pvscan --cache' is run.
+ # If lvmetad was running while use_lvmetad was disabled,
+ # it must be stopped, use_lvmetad enabled, and then started.
+ # When using lvmetad, LV activation is switched to an automatic,
+ # event-based mode. In this mode, LVs are activated based on
+ # incoming udev events that inform lvmetad when PVs appear on
+ # the system. When a VG is complete (all PVs present), it is
+ # auto-activated. The auto_activation_volume_list setting
+ # controls which LVs are auto-activated (all by default.)
+ # When lvmetad is updated (automatically by udev events, or
+ # directly by pvscan --cache), devices/filter is ignored and
+ # all devices are scanned by default. lvmetad always keeps
+ # unfiltered information which is provided to LVM commands.
+ # Each LVM command then filters based on devices/filter.
+ # This does not apply to other, non-regexp, filtering settings:
+ # component filters such as multipath and MD are checked
+ # during pvscan --cache.
+ # To filter a device and prevent scanning from the LVM system
+ # entirely, including lvmetad, use devices/global_filter.
+ # lvmetad is not compatible with locking_type 3 (clustering).
+ # LVM prints warnings and ignores lvmetad if this combination
+ # is seen.
+ use_lvmetad = @DEFAULT_USE_LVMETAD@
+
+ # Configuration option global/use_lvmlockd.
+ # Use lvmlockd for locking among hosts using LVM on shared storage.
+ use_lvmlockd = 0
+
+ # Configuration option global/lvmlockd_lock_retries.
+ # Retry lvmlockd lock requests this many times.
+ # lvmlockd_lock_retries = 3
+
+ # Configuration option global/sanlock_lv_extend.
+ # Size in MiB to extend the internal LV holding sanlock locks.
+ # The internal LV holds locks for each LV in the VG, and after
+ # enough LVs have been created, the internal LV needs to be extended.
+ # lvcreate will automatically extend the internal LV when needed by
+ # the amount specified here. Setting this to 0 disables the
+ # automatic extension and can cause lvcreate to fail.
+ # sanlock_lv_extend = 256
+
+ # Configuration option global/thin_check_executable.
+ # The full path to the thin_check command.
+ # LVM uses this command to check that a thin metadata
+ # device is in a usable state.
+ # When a thin pool is activated and after it is deactivated,
+ # this command is run. Activation will only proceed if the
+ # command has an exit status of 0.
+ # Set to "" to skip this check. (Not recommended.)
+ # Also see thin_check_options.
+ # The thin tools are available from the package
+ # device-mapper-persistent-data.
+ # thin_check_executable = "@THIN_CHECK_CMD@"
+
+ # Configuration option global/thin_dump_executable.
+ # The full path to the thin_dump command.
+ # LVM uses this command to dump thin pool metadata.
+ # (For thin tools, see thin_check_executable.)
+ # thin_dump_executable = "@THIN_DUMP_CMD@"
+
+ # Configuration option global/thin_repair_executable.
+ # The full path to the thin_repair command.
+ # LVM uses this command to repair a thin metadata device
+ # if it is in an unusable state.
+ # Also see thin_repair_options.
+ # (For thin tools, see thin_check_executable.)
+ # thin_repair_executable = "@THIN_REPAIR_CMD@"
+
+ # Configuration option global/thin_check_options.
+ # List of options passed to the thin_check command.
+ # With thin_check version 2.1 or newer you can add
+ # --ignore-non-fatal-errors to let it pass through
+ # ignorable errors and fix them later.
+ # With thin_check version 3.2 or newer you should add
+ # --clear-needs-check-flag.
+ # thin_check_options = [ "-q", "--clear-needs-check-flag" ]
+
+ # Configuration option global/thin_repair_options.
+ # List of options passed to the thin_repair command.
+ # thin_repair_options = [ "" ]
+
+ # Configuration option global/thin_disabled_features.
+ # Features to not use in the thin driver.
+ # This can be helpful for testing, or to avoid
+ # using a feature that is causing problems.
+ # Features: block_size, discards, discards_non_power_2,
+ # external_origin, metadata_resize, external_origin_extend,
+ # error_if_no_space.
+ # Example:
+ # thin_disabled_features = [ "discards", "block_size" ]
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/cache_disabled_features.
+ # Features to not use in the cache driver.
+ # This can be helpful for testing, or to avoid
+ # using a feature that is causing problems.
+ # Features: policy_mq, policy_smq.
+ # Example:
+ # cache_disabled_features = [ "policy_smq" ]
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/cache_check_executable.
+ # The full path to the cache_check command.
+ # LVM uses this command to check that a cache metadata
+ # device is in a usable state.
+ # When a cached LV is activated and after it is deactivated,
+ # this command is run. Activation will only proceed if the
+ # command has an exit status of 0.
+ # Set to "" to skip this check. (Not recommended.)
+ # Also see cache_check_options.
+ # The cache tools are available from the package
+ # device-mapper-persistent-data.
+ # With cache_check version 5.0 or newer you should add
+ # --clear-needs-check-flag.
+ # cache_check_executable = "@CACHE_CHECK_CMD@"
+
+ # Configuration option global/cache_dump_executable.
+ # The full path to the cache_dump command.
+ # LVM uses this command to dump cache pool metadata.
+ # (For cache tools, see cache_check_executable.)
+ # cache_dump_executable = "@CACHE_DUMP_CMD@"
+
+ # Configuration option global/cache_repair_executable.
+ # The full path to the cache_repair command.
+ # LVM uses this command to repair a cache metadata device
+ # if it is in an unusable state.
+ # Also see cache_repair_options.
+ # (For cache tools, see cache_check_executable.)
+ # cache_repair_executable = "@CACHE_REPAIR_CMD@"
+
+ # Configuration option global/cache_check_options.
+ # List of options passed to the cache_check command.
+ # cache_check_options = [ "-q", "--clear-needs-check-flag" ]
+
+ # Configuration option global/cache_repair_options.
+ # List of options passed to the cache_repair command.
+ # cache_repair_options = [ "" ]
+
+ # Configuration option global/system_id_source.
+ # The method LVM uses to set the local system ID.
+ # Volume Groups can also be given a system ID (by
+ # vgcreate, vgchange, or vgimport.)
+ # A VG on shared storage devices is accessible only
+ # to the host with a matching system ID.
+ # See 'man lvmsystemid' for information on limitations
+ # and correct usage.
+ # Possible options are: none, lvmlocal, uname, machineid, file.
+ # none - The host has no system ID.
+ # lvmlocal - Obtain the system ID from the system_id setting in the
+ # 'local' section of an lvm configuration file, e.g. lvmlocal.conf.
+ # uname - Set the system ID from the hostname (uname) of the system.
+ # System IDs beginning localhost are not permitted.
+ # machineid - Use the contents of the machine-id file to set the
+ # system ID. Some systems create this file at installation time.
+ # See 'man machine-id' and global/etc.
+ # file - Use the contents of another file (system_id_file) to set
+ # the system ID.
+ system_id_source = "none"
+
+ # Configuration option global/system_id_file.
+ # The full path to the file containing a system ID.
+ # This is used when system_id_source is set to 'file'.
+ # Comments starting with the character # are ignored.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option global/use_lvmpolld.
+ # Use lvmpolld to supervise long running LVM commands.
+ # When enabled, control of long running LVM commands is transferred
+ # from the original LVM command to the lvmpolld daemon. This allows
+ # the operation to continue independent of the original LVM command.
+ # After lvmpolld takes over, the LVM command displays the progress
+ # of the ongoing operation. lvmpolld itself runs LVM commands to manage
+ # the progress of ongoing operations. lvmpolld can be used as a native
+ # systemd service, which allows it to be started on demand, and to use
+ # its own control group. When this option is disabled, LVM commands will
+ # supervise long running operations by forking themselves.
+ use_lvmpolld = @DEFAULT_USE_LVMPOLLD@
}
-# Report settings.
-#
-# report {
- # If compact output is enabled, fields which don't have value
- # set for any of the rows reported are skipped on output. Compact
- # output is applicable only if report is buffered (report/buffered=1).
- # compact_output=0
-
- # Align columns on report output.
- # aligned=1
-
- # When buffered reporting is used, the report's content is appended
- # incrementally to include each object being reported until the report
- # is flushed to output which normally happens at the end of command
- # execution. Otherwise, if buffering is not used, each object is
- # reported as soon as its processing is finished.
- # buffered=1
-
- # Show headings for columns on report.
- # headings=1
-
- # A separator to use on report after each field.
- # separator=" "
-
- # A separator to use for list items when reported.
- # list_item_separator=","
-
- # Use a field name prefix for each field reported.
- # prefixes=0
-
- # Quote field values when using field name prefixes.
- # quoted=1
-
- # Output each column as a row. If set, this also implies report/prefixes=1.
- # colums_as_rows=0
-
- # Use binary values "0" or "1" instead of descriptive literal values for
- # columns that have exactly two valid values to report (not counting the
- # "unknown" value which denotes that the value could not be determined).
- #
- # binary_values_as_numeric = 0
-
- # Comma separated list of columns to sort by when reporting 'lvm devtypes' command.
- # See 'lvm devtypes -o help' for the list of possible fields.
- # devtypes_sort="devtype_name"
-
- # Comma separated list of columns to report for 'lvm devtypes' command.
- # See 'lvm devtypes -o help' for the list of possible fields.
- # devtypes_cols="devtype_name,devtype_max_partitions,devtype_description"
-
- # Comma separated list of columns to report for 'lvm devtypes' command in verbose mode.
- # See 'lvm devtypes -o help' for the list of possible fields.
- # devtypes_cols_verbose="devtype_name,devtype_max_partitions,devtype_description"
-
- # Comma separated list of columns to sort by when reporting 'lvs' command.
- # See 'lvs -o help' for the list of possible fields.
- # lvs_sort="vg_name,lv_name"
-
- # Comma separated list of columns to report for 'lvs' command.
- # See 'lvs -o help' for the list of possible fields.
- # lvs_cols="lv_name,vg_name,lv_attr,lv_size,pool_lv,origin,data_percent,metadata_percent,move_pv,mirror_log,copy_percent,convert_lv"
-
- # Comma separated list of columns to report for 'lvs' command in verbose mode.
- # See 'lvs -o help' for the list of possible fields.
- # lvs_cols_verbose="lv_name,vg_name,seg_count,lv_attr,lv_size,lv_major,lv_minor,lv_kernel_major,lv_kernel_minor,pool_lv,origin,data_percent,metadata_percent,move_pv,copy_percent,mirror_log,convert
-
- # Comma separated list of columns to sort by when reporting 'vgs' command.
- # See 'vgs -o help' for the list of possible fields.
- # vgs_sort="vg_name"
-
- # Comma separated list of columns to report for 'vgs' command.
- # See 'vgs -o help' for the list of possible fields.
- # vgs_cols="vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free"
-
- # Comma separated list of columns to report for 'vgs' command in verbose mode.
- # See 'vgs -o help' for the list of possible fields.
- # vgs_cols_verbose="vg_name,vg_attr,vg_extent_size,pv_count,lv_count,snap_count,vg_size,vg_free,vg_uuid,vg_profile"
-
- # Comma separated list of columns to sort by when reporting 'pvs' command.
- # See 'pvs -o help' for the list of possible fields.
- # pvs_sort="pv_name"
-
- # Comma separated list of columns to report for 'pvs' command.
- # See 'pvs -o help' for the list of possible fields.
- # pvs_cols="pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free"
-
- # Comma separated list of columns to report for 'pvs' command in verbose mode.
- # See 'pvs -o help' for the list of possible fields.
- # pvs_cols_verbose="pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,dev_size,pv_uuid"
-
- # Comma separated list of columns to sort by when reporting 'lvs --segments' command.
- # See 'lvs --segments -o help' for the list of possible fields.
- # segs_sort="vg_name,lv_name,seg_start"
-
- # Comma separated list of columns to report for 'lvs --segments' command.
- # See 'lvs --segments -o help' for the list of possible fields.
- # segs_cols="lv_name,vg_name,lv_attr,stripes,segtype,seg_size"
-
- # Comma separated list of columns to report for 'lvs --segments' command in verbose mode.
- # See 'lvs --segments -o help' for the list of possible fields.
- # segs_cols_verbose="lv_name,vg_name,lv_attr,seg_start,seg_size,stripes,segtype,stripesize,chunksize"
-
- # Comma separated list of columns to sort by when reporting 'pvs --segments' command.
- # See 'pvs --segments -o help' for the list of possible fields.
- # pvsegs_sort="pv_name,pvseg_start"
+# Configuration section activation.
+activation {
- # Comma separated list of columns to sort by when reporting 'pvs --segments' command.
- # See 'pvs --segments -o help' for the list of possible fields.
- # pvsegs_cols="pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size"
+ # Configuration option activation/checks.
+ # Perform internal checks of libdevmapper operations.
+ # Useful for debugging problems with activation.
+ # Some of the checks may be expensive, so it's best to use
+ # this only when there seems to be a problem.
+ checks = 0
+
+ # Configuration option activation/udev_sync.
+ # Use udev notifications to synchronize udev and LVM.
+ # When disabled, LVM commands will not wait for notifications
+ # from udev, but continue irrespective of any possible udev
+ # processing in the background. Only use this if udev is not
+ # running or has rules that ignore the devices LVM creates.
+ # If enabled when udev is not running, and LVM processes
+ # are waiting for udev, run 'dmsetup udevcomplete_all' to
+ # wake them up.
+ # The '--nodevsync' option overrides this setting.
+ udev_sync = 1
+
+ # Configuration option activation/udev_rules.
+ # Use udev rules to manage LV device nodes and symlinks.
+ # When disabled, LVM will manage the device nodes and
+ # symlinks for active LVs itself.
+ # Manual intervention may be required if this setting is
+ # changed while LVs are active.
+ udev_rules = 1
+
+ # Configuration option activation/verify_udev_operations.
+ # Use extra checks in LVM to verify udev operations.
+ # This enables additional checks (and if necessary,
+ # repairs) on entries in the device directory after
+ # udev has completed processing its events.
+ # Useful for diagnosing problems with LVM/udev interactions.
+ verify_udev_operations = 0
+
+ # Configuration option activation/retry_deactivation.
+ # Retry failed LV deactivation.
+ # If LV deactivation fails, LVM will retry for a few
+ # seconds before failing. This may happen because a
+ # process run from a quick udev rule temporarily opened
+ # the device.
+ retry_deactivation = 1
+
+ # Configuration option activation/missing_stripe_filler.
+ # Method to fill missing stripes when activating an incomplete LV.
+ # Using 'error' will make inaccessible parts of the device return
+ # I/O errors on access. You can instead use a device path, in which
+ # case, that device will be used in place of missing stripes.
+ # Using anything other than 'error' with mirrored or snapshotted
+ # volumes is likely to result in data corruption.
+ # This configuration option is advanced.
+ missing_stripe_filler = "error"
+
+ # Configuration option activation/use_linear_target.
+ # Use the linear target to optimize single stripe LVs.
+ # When disabled, the striped target is used. The linear
+ # target is an optimised version of the striped target
+ # that only handles a single stripe.
+ use_linear_target = 1
+
+ # Configuration option activation/reserved_stack.
+ # Stack size in KiB to reserve for use while devices are suspended.
+ # Insufficent reserve risks I/O deadlock during device suspension.
+ reserved_stack = 64
+
+ # Configuration option activation/reserved_memory.
+ # Memory size in KiB to reserve for use while devices are suspended.
+ # Insufficent reserve risks I/O deadlock during device suspension.
+ reserved_memory = 8192
+
+ # Configuration option activation/process_priority.
+ # Nice value used while devices are suspended.
+ # Use a high priority so that LVs are suspended
+ # for the shortest possible time.
+ process_priority = -18
+
+ # Configuration option activation/volume_list.
+ # Only LVs selected by this list are activated.
+ # If this list is defined, an LV is only activated
+ # if it matches an entry in this list.
+ # If this list is undefined, it imposes no limits
+ # on LV activation (all are allowed).
+ # Possible options are: vgname, vgname/lvname, @tag, @*
+ # vgname is matched exactly and selects all LVs in the VG.
+ # vgname/lvname is matched exactly and selects the LV.
+ # @tag selects if tag matches a tag set on the LV or VG.
+ # @* selects if a tag defined on the host is also set on
+ # the LV or VG. See tags/hosttags.
+ # If any host tags exist but volume_list is not defined,
+ # a default single-entry list containing '@*' is assumed.
+ # Example:
+ # volume_list = [ "vg1", "vg2/lvol1", "@tag1", "@*" ]
+ # This configuration option does not have a default value defined.
+
+ # Configuration option activation/auto_activation_volume_list.
+ # Only LVs selected by this list are auto-activated.
+ # This list works like volume_list, but it is used
+ # only by auto-activation commands. It does not apply
+ # to direct activation commands.
+ # If this list is defined, an LV is only auto-activated
+ # if it matches an entry in this list.
+ # If this list is undefined, it imposes no limits
+ # on LV auto-activation (all are allowed.)
+ # If this list is defined and empty, i.e. "[]",
+ # then no LVs are selected for auto-activation.
+ # An LV that is selected by this list for
+ # auto-activation, must also be selected by
+ # volume_list (if defined) before it is activated.
+ # Auto-activation is an activation command that
+ # includes the 'a' argument: --activate ay or -a ay,
+ # e.g. vgchange -a ay, or lvchange -a ay vgname/lvname.
+ # The 'a' (auto) argument for auto-activation is
+ # meant to be used by activation commands that are
+ # run automatically by the system, as opposed to
+ # LVM commands run directly by a user. A user may
+ # also use the 'a' flag directly to perform auto-
+ # activation.
+ # An example of a system-generated auto-activation
+ # command is 'pvscan --cache -aay' which is generated
+ # when udev and lvmetad detect a new VG has appeared
+ # on the system, and want LVs in it to be auto-activated.
+ # Possible options are: vgname, vgname/lvname, @tag, @*
+ # See volume_list for how these options are matched to LVs.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option activation/read_only_volume_list.
+ # LVs in this list are activated in read-only mode.
+ # If this list is defined, each LV that is to be activated
+ # is checked against this list, and if it matches, it is
+ # activated in read-only mode.
+ # This overrides the permission setting stored in the
+ # metadata, e.g. from --permission rw.
+ # Possible options are: vgname, vgname/lvname, @tag, @*
+ # See volume_list for how these options are matched to LVs.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option activation/raid_region_size.
+ # Size in KiB of each raid or mirror synchronization region.
+ # For raid or mirror segment types, this is the amount of
+ # data that is copied at once when initializing, or moved
+ # at once by pvmove.
+ raid_region_size = 512
+
+ # Configuration option activation/error_when_full.
+ # Return errors if a thin pool runs out of space.
+ # When enabled, writes to thin LVs immediately return
+ # an error if the thin pool is out of data space.
+ # When disabled, writes to thin LVs are queued if the
+ # thin pool is out of space, and processed when the
+ # thin pool data space is extended.
+ # New thin pools are assigned the behavior defined here.
+ # The '--errorwhenfull y|n' option overrides this setting.
+ # error_when_full = 0
+
+ # Configuration option activation/readahead.
+ # Setting to use when there is no readahead setting in metadata.
+ # Possible options are: none, auto.
+ # none - Disable readahead.
+ # auto - Use default value chosen by kernel.
+ readahead = "auto"
+
+ # Configuration option activation/raid_fault_policy.
+ # Defines how a device failure in a RAID LV is handled.
+ # This includes LVs that have the following segment types:
+ # raid1, raid4, raid5*, and raid6*.
+ # If a device in the LV fails, the policy determines the
+ # steps perfomed by dmeventd automatically, and the steps
+ # perfomed by 'lvconvert --repair --use-policies' run manually.
+ # Automatic handling requires dmeventd to be monitoring the LV.
+ # Possible options are: warn, allocate.
+ # warn - Use the system log to warn the user that a device
+ # in the RAID LV has failed. It is left to the user to run
+ # 'lvconvert --repair' manually to remove or replace the failed
+ # device. As long as the number of failed devices does not
+ # exceed the redundancy of the logical volume (1 device for
+ # raid4/5, 2 for raid6, etc) the LV will remain usable.
+ # allocate - Attempt to use any extra physical volumes in the
+ # volume group as spares and replace faulty devices.
+ raid_fault_policy = "warn"
+
+ # Configuration option activation/mirror_image_fault_policy.
+ # Defines how a device failure in a 'mirror' LV is handled.
+ # An LV with the 'mirror' segment type is composed of mirror
+ # images (copies) and a mirror log.
+ # A disk log ensures that a mirror LV does not need to be
+ # re-synced (all copies made the same) every time a machine
+ # reboots or crashes.
+ # If a device in the LV fails, this policy determines the
+ # steps perfomed by dmeventd automatically, and the steps
+ # performed by 'lvconvert --repair --use-policies' run manually.
+ # Automatic handling requires dmeventd to be monitoring the LV.
+ # Possible options are: remove, allocate, allocate_anywhere.
+ # remove - Simply remove the faulty device and run without it.
+ # If the log device fails, the mirror would convert to using
+ # an in-memory log. This means the mirror will not
+ # remember its sync status across crashes/reboots and
+ # the entire mirror will be re-synced.
+ # If a mirror image fails, the mirror will convert to a
+ # non-mirrored device if there is only one remaining good copy.
+ # allocate - Remove the faulty device and try to allocate space
+ # on a new device to be a replacement for the failed device.
+ # Using this policy for the log is fast and maintains the
+ # ability to remember sync state through crashes/reboots.
+ # Using this policy for a mirror device is slow, as it
+ # requires the mirror to resynchronize the devices, but it
+ # will preserve the mirror characteristic of the device.
+ # This policy acts like 'remove' if no suitable device and
+ # space can be allocated for the replacement.
+ # allocate_anywhere - Not yet implemented. Useful to place
+ # the log device temporarily on the same physical volume as
+ # one of the mirror images. This policy is not recommended
+ # for mirror devices since it would break the redundant nature
+ # of the mirror. This policy acts like 'remove' if no suitable
+ # device and space can be allocated for the replacement.
+ mirror_image_fault_policy = "remove"
+
+ # Configuration option activation/mirror_log_fault_policy.
+ # Defines how a device failure in a 'mirror' log LV is handled.
+ # The mirror_image_fault_policy description for mirrored LVs
+ # also applies to mirrored log LVs.
+ mirror_log_fault_policy = "allocate"
+
+ # Configuration option activation/snapshot_autoextend_threshold.
+ # Auto-extend a snapshot when its usage exceeds this percent.
+ # Setting this to 100 disables automatic extension.
+ # The minimum value is 50 (a smaller value is treated as 50.)
+ # Also see snapshot_autoextend_percent.
+ # Automatic extension requires dmeventd to be monitoring the LV.
+ # Example:
+ # With snapshot_autoextend_threshold 70 and
+ # snapshot_autoextend_percent 20, whenever a snapshot
+ # exceeds 70% usage, it will be extended by another 20%.
+ # For a 1G snapshot, using 700M will trigger a resize to 1.2G.
+ # When the usage exceeds 840M, the snapshot will be extended
+ # to 1.44G, and so on.
+ snapshot_autoextend_threshold = 100
+
+ # Configuration option activation/snapshot_autoextend_percent.
+ # Auto-extending a snapshot adds this percent extra space.
+ # The amount of additional space added to a snapshot is this
+ # percent of its current size.
+ # Also see snapshot_autoextend_threshold.
+ snapshot_autoextend_percent = 20
+
+ # Configuration option activation/thin_pool_autoextend_threshold.
+ # Auto-extend a thin pool when its usage exceeds this percent.
+ # Setting this to 100 disables automatic extension.
+ # The minimum value is 50 (a smaller value is treated as 50.)
+ # Also see thin_pool_autoextend_percent.
+ # Automatic extension requires dmeventd to be monitoring the LV.
+ # Example:
+ # With thin_pool_autoextend_threshold 70 and
+ # thin_pool_autoextend_percent 20, whenever a thin pool
+ # exceeds 70% usage, it will be extended by another 20%.
+ # For a 1G thin pool, using up 700M will trigger a resize to 1.2G.
+ # When the usage exceeds 840M, the thin pool will be extended
+ # to 1.44G, and so on.
+ thin_pool_autoextend_threshold = 100
+
+ # Configuration option activation/thin_pool_autoextend_percent.
+ # Auto-extending a thin pool adds this percent extra space.
+ # The amount of additional space added to a thin pool is this
+ # percent of its current size.
+ thin_pool_autoextend_percent = 20
+
+ # Configuration option activation/mlock_filter.
+ # Do not mlock these memory areas.
+ # While activating devices, I/O to devices being
+ # (re)configured is suspended. As a precaution against
+ # deadlocks, LVM pins memory it is using so it is not
+ # paged out, and will not require I/O to reread.
+ # Groups of pages that are known not to be accessed during
+ # activation do not need to be pinned into memory.
+ # Each string listed in this setting is compared against
+ # each line in /proc/self/maps, and the pages corresponding
+ # to lines that match are not pinned. On some systems,
+ # locale-archive was found to make up over 80% of the memory
+ # used by the process.
+ # Example:
+ # mlock_filter = [ "locale/locale-archive", "gconv/gconv-modules.cache" ]
+ # This configuration option is advanced.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option activation/use_mlockall.
+ # Use the old behavior of mlockall to pin all memory.
+ # Prior to version 2.02.62, LVM used mlockall() to pin
+ # the whole process's memory while activating devices.
+ use_mlockall = 0
+
+ # Configuration option activation/monitoring.
+ # Monitor LVs that are activated.
+ # When enabled, LVM will ask dmeventd to monitor LVs
+ # that are activated.
+ # The '--ignoremonitoring' option overrides this setting.
+ monitoring = 1
+
+ # Configuration option activation/polling_interval.
+ # Check pvmove or lvconvert progress at this interval (seconds).
+ # When pvmove or lvconvert must wait for the kernel to finish
+ # synchronising or merging data, they check and report progress
+ # at intervals of this number of seconds.
+ # If this is set to 0 and there is only one thing to wait for,
+ # there are no progress reports, but the process is awoken
+ # immediately once the operation is complete.
+ polling_interval = 15
+
+ # Configuration option activation/auto_set_activation_skip.
+ # Set the activation skip flag on new thin snapshot LVs.
+ # An LV can have a persistent 'activation skip' flag.
+ # The flag causes the LV to be skipped during normal activation.
+ # The lvchange/vgchange -K option is required to activate LVs
+ # that have the activation skip flag set.
+ # When this setting is enabled, the activation skip flag is
+ # set on new thin snapshot LVs.
+ # The '--setactivationskip y|n' option overrides this setting.
+ # auto_set_activation_skip = 1
+
+ # Configuration option activation/activation_mode.
+ # How LVs with missing devices are activated.
+ # Possible options are: complete, degraded, partial.
+ # complete - Only allow activation of an LV if all of
+ # the Physical Volumes it uses are present. Other PVs
+ # in the Volume Group may be missing.
+ # degraded - Like complete, but additionally RAID LVs of
+ # segment type raid1, raid4, raid5, radid6 and raid10 will
+ # be activated if there is no data loss, i.e. they have
+ # sufficient redundancy to present the entire addressable
+ # range of the Logical Volume.
+ # partial - Allows the activation of any LV even if a
+ # missing or failed PV could cause data loss with a
+ # portion of the Logical Volume inaccessible.
+ # This setting should not normally be used, but may
+ # sometimes assist with data recovery.
+ # The '--activationmode' option overrides this setting.
+ activation_mode = "degraded"
+
+ # Configuration option activation/lock_start_list.
+ # Locking is started only for VGs selected by this list.
+ # The rules are the same as those for LVs in volume_list.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option activation/auto_lock_start_list.
+ # Locking is auto-started only for VGs selected by this list.
+ # The rules are the same as those for LVs in auto_activation_volume_list.
+ # This configuration option does not have a default value defined.
+}
- # Comma separated list of columns to sort by when reporting 'pvs --segments' command in verbose mode.
- # See 'pvs --segments -o help' for the list of possible fields.
- # pvsegs_cols_verbose="pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size,lv_name,seg_start_pe,segtype,seg_pe_ranges"
-#}
+# Configuration section metadata.
+# metadata {
-####################
-# Advanced section #
-####################
+ # Configuration option metadata/pvmetadatacopies.
+ # Number of copies of metadata to store on each PV.
+ # Possible options are: 0, 1, 2.
+ # If set to 2, two copies of the VG metadata are stored on
+ # the PV, one at the front of the PV, and one at the end.
+ # If set to 1, one copy is stored at the front of the PV.
+ # If set to 0, no copies are stored on the PV. This may
+ # be useful with VGs containing large numbers of PVs.
+ # The '--pvmetadatacopies' option overrides this setting.
+ # This configuration option is advanced.
+ # pvmetadatacopies = 1
+
+ # Configuration option metadata/vgmetadatacopies.
+ # Number of copies of metadata to maintain for each VG.
+ # If set to a non-zero value, LVM automatically chooses which of
+ # the available metadata areas to use to achieve the requested
+ # number of copies of the VG metadata. If you set a value larger
+ # than the the total number of metadata areas available, then
+ # metadata is stored in them all.
+ # The value 0 (unmanaged) disables this automatic management
+ # and allows you to control which metadata areas are used at
+ # the individual PV level using 'pvchange --metadataignore y|n'.
+ # The '--vgmetadatacopies' option overrides this setting.
+ # vgmetadatacopies = 0
+
+ # Configuration option metadata/pvmetadatasize.
+ # Approximate number of sectors to use for each metadata copy.
+ # VGs with large numbers of PVs or LVs, or VGs containing
+ # complex LV structures, may need additional space for VG
+ # metadata. The metadata areas are treated as circular buffers,
+ # so unused space becomes filled with an archive of the most
+ # recent previous versions of the metadata.
+ # pvmetadatasize = 255
+
+ # Configuration option metadata/pvmetadataignore.
+ # Ignore metadata areas on a new PV.
+ # If metadata areas on a PV are ignored, LVM will not store
+ # metadata in them.
+ # The '--metadataignore' option overrides this setting.
+ # This configuration option is advanced.
+ # pvmetadataignore = 0
+
+ # Configuration option metadata/stripesize.
+ # This configuration option is advanced.
+ # stripesize = 64
+
+ # Configuration option metadata/dirs.
+ # Directories holding live copies of text format metadata.
+ # These directories must not be on logical volumes!
+ # It's possible to use LVM with a couple of directories here,
+ # preferably on different (non-LV) filesystems, and with no other
+ # on-disk metadata (pvmetadatacopies = 0). Or this can be in
+ # addition to on-disk metadata areas.
+ # The feature was originally added to simplify testing and is not
+ # supported under low memory situations - the machine could lock up.
+ # Never edit any files in these directories by hand unless you
+ # you are absolutely sure you know what you are doing! Use
+ # the supplied toolset to make changes (e.g. vgcfgrestore).
+ # Example:
+ # dirs = [ "/etc/lvm/metadata", "/mnt/disk2/lvm/metadata2" ]
+ # This configuration option is advanced.
+ # This configuration option does not have a default value defined.
+# }
+
+# Configuration section report.
+# LVM report command output formatting.
+# report {
-# Metadata settings
-#
-# metadata {
- # Default number of copies of metadata to hold on each PV. 0, 1 or 2.
- # You might want to override it from the command line with 0
- # when running pvcreate on new PVs which are to be added to large VGs.
-
- # pvmetadatacopies = 1
-
- # Default number of copies of metadata to maintain for each VG.
- # If set to a non-zero value, LVM automatically chooses which of
- # the available metadata areas to use to achieve the requested
- # number of copies of the VG metadata. If you set a value larger
- # than the the total number of metadata areas available then
- # metadata is stored in them all.
- # The default value of 0 ("unmanaged") disables this automatic
- # management and allows you to control which metadata areas
- # are used at the individual PV level using 'pvchange
- # --metadataignore y/n'.
-
- # vgmetadatacopies = 0
-
- # Approximate default size of on-disk metadata areas in sectors.
- # You should increase this if you have large volume groups or
- # you want to retain a large on-disk history of your metadata changes.
-
- # pvmetadatasize = 255
-
- # List of directories holding live copies of text format metadata.
- # These directories must not be on logical volumes!
- # It's possible to use LVM2 with a couple of directories here,
- # preferably on different (non-LV) filesystems, and with no other
- # on-disk metadata (pvmetadatacopies = 0). Or this can be in
- # addition to on-disk metadata areas.
- # The feature was originally added to simplify testing and is not
- # supported under low memory situations - the machine could lock up.
- #
- # Never edit any files in these directories by hand unless you
- # you are absolutely sure you know what you are doing! Use
- # the supplied toolset to make changes (e.g. vgcfgrestore).
-
- # dirs = [ "/etc/lvm/metadata", "/mnt/disk2/lvm/metadata2" ]
-#}
-
-# Event daemon
-#
+ # Configuration option report/compact_output.
+ # Do not print empty report fields.
+ # Fields that don't have a value set for any of the rows
+ # reported are skipped and not printed. Compact output is
+ # applicable only if report/buffered is enabled.
+ # compact_output = 0
+
+ # Configuration option report/aligned.
+ # Align columns in report output.
+ # aligned = 1
+
+ # Configuration option report/buffered.
+ # Buffer report output.
+ # When buffered reporting is used, the report's content is appended
+ # incrementally to include each object being reported until the report
+ # is flushed to output which normally happens at the end of command
+ # execution. Otherwise, if buffering is not used, each object is
+ # reported as soon as its processing is finished.
+ # buffered = 1
+
+ # Configuration option report/headings.
+ # Show headings for columns on report.
+ # headings = 1
+
+ # Configuration option report/separator.
+ # A separator to use on report after each field.
+ # separator = " "
+
+ # Configuration option report/list_item_separator.
+ # A separator to use for list items when reported.
+ # list_item_separator = ","
+
+ # Configuration option report/prefixes.
+ # Use a field name prefix for each field reported.
+ # prefixes = 0
+
+ # Configuration option report/quoted.
+ # Quote field values when using field name prefixes.
+ # quoted = 1
+
+ # Configuration option report/colums_as_rows.
+ # Output each column as a row.
+ # If set, this also implies report/prefixes=1.
+ # colums_as_rows = 0
+
+ # Configuration option report/binary_values_as_numeric.
+ # Use binary values 0 or 1 instead of descriptive literal values.
+ # For columns that have exactly two valid values to report
+ # (not counting the 'unknown' value which denotes that the
+ # value could not be determined).
+ # binary_values_as_numeric = 0
+
+ # Configuration option report/time_format.
+ # Set time format for fields reporting time values.
+ # Format specification is a string which may contain special character
+ # sequences and ordinary character sequences. Ordinary character sequences
+ # are copied verbatim. Each special character sequence is introduced by '%'
+ # character and such sequence is then substituted with a value as described below:
+ # %a The abbreviated name of the day of the week according to the
+ # current locale.
+ # %A The full name of the day of the week according to the current locale.
+ # %b The abbreviated month name according to the current locale.
+ # %B The full month name according to the current locale.
+ # %c The preferred date and time representation for the current locale. (alt E)
+ # %C The century number (year/100) as a 2-digit integer. (alt E)
+ # %d The day of the month as a decimal number (range 01 to 31). (alt O)
+ # %D Equivalent to %m/%d/%y. (For Americans only. Americans should
+ # note that in other countries%d/%m/%y is rather common. This means
+ # that in international context this format is ambiguous and should not
+ # be used.
+ # %e Like %d, the day of the month as a decimal number, but a leading zero
+ # is replaced by a space. (alt O)
+ # %E Modifier: use alternative local-dependent representation if available.
+ # %F Equivalent to %Y-%m-%d (the ISO 8601 date format).
+ # %G The ISO 8601 week-based year with century as adecimal number. The 4-digit
+ # year corresponding to the ISO week number (see %V). This has the same
+ # format and value as %Y, except that if the ISO week number belongs to
+ # the previous or next year, that year is used instead.
+ # %g Like %G, but without century, that is, with a 2-digit year (00-99).
+ # %h Equivalent to %b.
+ # %H The hour as a decimal number using a 24-hour clock (range 00 to 23). (alt O)
+ # %I The hour as a decimal number using a 12-hour clock (range 01 to 12). (alt O)
+ # %j The day of the year as a decimal number (range 001 to 366).
+ # %k The hour (24-hour clock) as a decimal number (range 0 to 23);
+ # single digits are preceded by a blank. (See also %H.)
+ # %l The hour (12-hour clock) as a decimal number (range 1 to 12);
+ # single digits are preceded by a blank. (See also %I.)
+ # %m The month as a decimal number (range 01 to 12). (alt O)
+ # %M The minute as a decimal number (range 00 to 59). (alt O)
+ # %O Modifier: use alternative numeric symbols.
+ # %p Either "AM" or "PM" according to the given time value,
+ # or the corresponding strings for the current locale. Noon is
+ # treated as "PM" and midnight as "AM".
+ # %P Like %p but in lowercase: "am" or "pm" or a corresponding
+ # string for the current locale.
+ # %r The time in a.m. or p.m. notation. In the POSIX locale this is
+ # equivalent to %I:%M:%S %p.
+ # %R The time in 24-hour notation (%H:%M). For a version including
+ # the seconds, see %T below.
+ # %s The number of seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC)
+ # %S The second as a decimal number (range 00 to 60).
+ # (The range is up to 60 to allow for occasional leap seconds.) (alt O)
+ # %t A tab character.
+ # %T The time in 24-hour notation (%H:%M:%S).
+ # %u The day of the week as a decimal, range 1 to 7, Monday being 1.
+ # See also %w. (alt O)
+ # %U The week number of the current year as a decimal number,
+ # range 00 to 53, starting with the first Sunday as the first
+ # day of week 01. See also %V and %W. (alt O)
+ # %V The ISO 8601 week number of the current year as a decimal number,
+ # range 01 to 53, where week 1 is the first week that has at least 4 days
+ # in the new year. See also %U and %W. (alt O)
+ # %w The day of the week as a decimal, range 0 to 6, Sunday being 0.
+ # See also %u. (alt O)
+ # %W The week number of the current year as a decimal number, range 00 to 53,
+ # starting with the first Monday as the first day of week 01. (alt O)
+ # %x The preferred date representation for the current locale without the time. (alt E)
+ # %X The preferred time representation for the current locale without the date. (alt E)
+ # %y The year as a decimal number without a century (range 00 to 99). (alt E, alt O)
+ # %Y The year as a decimal number including the century. (alt E)
+ # %z The +hhmm or -hhmm numeric timezone (that is, the hour and minute
+ # offset from UTC).
+ # %Z The timezone name or abbreviation.
+ # %% A literal '%' character.
+ # time_format = "%Y-%m-%d %T %z"
+
+ # Configuration option report/devtypes_sort.
+ # List of columns to sort by when reporting 'lvm devtypes' command.
+ # See 'lvm devtypes -o help' for the list of possible fields.
+ # devtypes_sort = "devtype_name"
+
+ # Configuration option report/devtypes_cols.
+ # List of columns to report for 'lvm devtypes' command.
+ # See 'lvm devtypes -o help' for the list of possible fields.
+ # devtypes_cols = "devtype_name,devtype_max_partitions,devtype_description"
+
+ # Configuration option report/devtypes_cols_verbose.
+ # List of columns to report for 'lvm devtypes' command in verbose mode.
+ # See 'lvm devtypes -o help' for the list of possible fields.
+ # devtypes_cols_verbose = "devtype_name,devtype_max_partitions,devtype_description"
+
+ # Configuration option report/lvs_sort.
+ # List of columns to sort by when reporting 'lvs' command.
+ # See 'lvs -o help' for the list of possible fields.
+ # lvs_sort = "vg_name,lv_name"
+
+ # Configuration option report/lvs_cols.
+ # List of columns to report for 'lvs' command.
+ # See 'lvs -o help' for the list of possible fields.
+ # lvs_cols = "lv_name,vg_name,lv_attr,lv_size,pool_lv,origin,data_percent,metadata_percent,move_pv,mirror_log,copy_percent,convert_lv"
+
+ # Configuration option report/lvs_cols_verbose.
+ # List of columns to report for 'lvs' command in verbose mode.
+ # See 'lvs -o help' for the list of possible fields.
+ # lvs_cols_verbose = "lv_name,vg_name,seg_count,lv_attr,lv_size,lv_major,lv_minor,lv_kernel_major,lv_kernel_minor,pool_lv,origin,data_percent,metadata_percent,move_pv,copy_percent,mirror_log,convert_lv,lv_uuid,lv_profile"
+
+ # Configuration option report/vgs_sort.
+ # List of columns to sort by when reporting 'vgs' command.
+ # See 'vgs -o help' for the list of possible fields.
+ # vgs_sort = "vg_name"
+
+ # Configuration option report/vgs_cols.
+ # List of columns to report for 'vgs' command.
+ # See 'vgs -o help' for the list of possible fields.
+ # vgs_cols = "vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free"
+
+ # Configuration option report/vgs_cols_verbose.
+ # List of columns to report for 'vgs' command in verbose mode.
+ # See 'vgs -o help' for the list of possible fields.
+ # vgs_cols_verbose = "vg_name,vg_attr,vg_extent_size,pv_count,lv_count,snap_count,vg_size,vg_free,vg_uuid,vg_profile"
+
+ # Configuration option report/pvs_sort.
+ # List of columns to sort by when reporting 'pvs' command.
+ # See 'pvs -o help' for the list of possible fields.
+ # pvs_sort = "pv_name"
+
+ # Configuration option report/pvs_cols.
+ # List of columns to report for 'pvs' command.
+ # See 'pvs -o help' for the list of possible fields.
+ # pvs_cols = "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free"
+
+ # Configuration option report/pvs_cols_verbose.
+ # List of columns to report for 'pvs' command in verbose mode.
+ # See 'pvs -o help' for the list of possible fields.
+ # pvs_cols_verbose = "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,dev_size,pv_uuid"
+
+ # Configuration option report/segs_sort.
+ # List of columns to sort by when reporting 'lvs --segments' command.
+ # See 'lvs --segments -o help' for the list of possible fields.
+ # segs_sort = "vg_name,lv_name,seg_start"
+
+ # Configuration option report/segs_cols.
+ # List of columns to report for 'lvs --segments' command.
+ # See 'lvs --segments -o help' for the list of possible fields.
+ # segs_cols = "lv_name,vg_name,lv_attr,stripes,segtype,seg_size"
+
+ # Configuration option report/segs_cols_verbose.
+ # List of columns to report for 'lvs --segments' command in verbose mode.
+ # See 'lvs --segments -o help' for the list of possible fields.
+ # segs_cols_verbose = "lv_name,vg_name,lv_attr,seg_start,seg_size,stripes,segtype,stripesize,chunksize"
+
+ # Configuration option report/pvsegs_sort.
+ # List of columns to sort by when reporting 'pvs --segments' command.
+ # See 'pvs --segments -o help' for the list of possible fields.
+ # pvsegs_sort = "pv_name,pvseg_start"
+
+ # Configuration option report/pvsegs_cols.
+ # List of columns to sort by when reporting 'pvs --segments' command.
+ # See 'pvs --segments -o help' for the list of possible fields.
+ # pvsegs_cols = "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size"
+
+ # Configuration option report/pvsegs_cols_verbose.
+ # List of columns to sort by when reporting 'pvs --segments' command in verbose mode.
+ # See 'pvs --segments -o help' for the list of possible fields.
+ # pvsegs_cols_verbose = "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size,lv_name,seg_start_pe,segtype,seg_pe_ranges"
+# }
+
+# Configuration section dmeventd.
+# Settings for the LVM event daemon.
dmeventd {
- # mirror_library is the library used when monitoring a mirror device.
- #
- # "libdevmapper-event-lvm2mirror.so" attempts to recover from
- # failures. It removes failed devices from a volume group and
- # reconfigures a mirror as necessary. If no mirror library is
- # provided, mirrors are not monitored through dmeventd.
-
- mirror_library = "libdevmapper-event-lvm2mirror.so"
-
- # snapshot_library is the library used when monitoring a snapshot device.
- #
- # "libdevmapper-event-lvm2snapshot.so" monitors the filling of
- # snapshots and emits a warning through syslog when the use of
- # the snapshot exceeds 80%. The warning is repeated when 85%, 90% and
- # 95% of the snapshot is filled.
-
- snapshot_library = "libdevmapper-event-lvm2snapshot.so"
-
- # thin_library is the library used when monitoring a thin device.
- #
- # "libdevmapper-event-lvm2thin.so" monitors the filling of
- # pool and emits a warning through syslog when the use of
- # the pool exceeds 80%. The warning is repeated when 85%, 90% and
- # 95% of the pool is filled.
-
- thin_library = "libdevmapper-event-lvm2thin.so"
-
- # Full path of the dmeventd binary.
- #
- # executable = "@DMEVENTD_PATH@"
+
+ # Configuration option dmeventd/mirror_library.
+ # The library dmeventd uses when monitoring a mirror device.
+ # libdevmapper-event-lvm2mirror.so attempts to recover from
+ # failures. It removes failed devices from a volume group and
+ # reconfigures a mirror as necessary. If no mirror library is
+ # provided, mirrors are not monitored through dmeventd.
+ mirror_library = "libdevmapper-event-lvm2mirror.so"
+
+ # Configuration option dmeventd/raid_library.
+ # raid_library = "libdevmapper-event-lvm2raid.so"
+
+ # Configuration option dmeventd/snapshot_library.
+ # The library dmeventd uses when monitoring a snapshot device.
+ # libdevmapper-event-lvm2snapshot.so monitors the filling of
+ # snapshots and emits a warning through syslog when the usage
+ # exceeds 80%. The warning is repeated when 85%, 90% and
+ # 95% of the snapshot is filled.
+ snapshot_library = "libdevmapper-event-lvm2snapshot.so"
+
+ # Configuration option dmeventd/thin_library.
+ # The library dmeventd uses when monitoring a thin device.
+ # libdevmapper-event-lvm2thin.so monitors the filling of
+ # a pool and emits a warning through syslog when the usage
+ # exceeds 80%. The warning is repeated when 85%, 90% and
+ # 95% of the pool is filled.
+ thin_library = "libdevmapper-event-lvm2thin.so"
+
+ # Configuration option dmeventd/executable.
+ # The full path to the dmeventd binary.
+ # executable = "@DMEVENTD_PATH@"
}
+
+# Configuration section tags.
+# Host tag settings.
+# tags {
+
+ # Configuration option tags/hosttags.
+ # Create a host tag using the machine name.
+ # The machine name is nodename returned by uname(2).
+ # hosttags = 0
+
+ # Configuration section tags/<tag>.
+ # Replace this subsection name with a custom tag name.
+ # Multiple subsections like this can be created.
+ # The '@' prefix for tags is optional.
+ # This subsection can contain host_list, which is a
+ # list of machine names. If the name of the local
+ # machine is found in host_list, then the name of
+ # this subsection is used as a tag and is applied
+ # to the local machine as a 'host tag'.
+ # If this subsection is empty (has no host_list), then
+ # the subsection name is always applied as a 'host tag'.
+ # Example:
+ # The host tag foo is given to all hosts, and the host tag
+ # bar is given to the hosts named machine1 and machine2.
+ # tags { foo { } bar { host_list = [ "machine1", "machine2" ] } }
+ # This configuration section has variable name.
+ # tag {
+
+ # Configuration option tags/<tag>/host_list.
+ # A list of machine names.
+ # These machine names are compared to the nodename
+ # returned by uname(2). If the local machine name
+ # matches an entry in this list, the name of the
+ # subsection is applied to the machine as a 'host tag'.
+ # This configuration option does not have a default value defined.
+ # }
+# }
diff --git a/conf/lvmlocal.conf.base b/conf/lvmlocal.conf.base
new file mode 100644
index 000000000..e2a9e2fe5
--- /dev/null
+++ b/conf/lvmlocal.conf.base
@@ -0,0 +1,19 @@
+# This is a local configuration file template for the LVM2 system
+# which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
+#
+# Refer to 'man lvm.conf' for information about the file layout.
+#
+# To put this file in a different directory and override
+# @DEFAULT_SYS_DIR@ set the environment variable LVM_SYSTEM_DIR before
+# running the tools.
+#
+# The lvmlocal.conf file is normally expected to contain only the
+# "local" section which contains settings that should not be shared or
+# repeated among different hosts. (But if other sections are present,
+# they *will* get processed. Settings in this file override equivalent
+# ones in lvm.conf and are in turn overridden by ones in any enabled
+# lvm_<tag>.conf files.)
+#
+# Please take care that each setting only appears once if uncommenting
+# example settings in this file and never copy this file between hosts.
+
diff --git a/conf/lvmlocal.conf.in b/conf/lvmlocal.conf.in
new file mode 100644
index 000000000..c3f6ac158
--- /dev/null
+++ b/conf/lvmlocal.conf.in
@@ -0,0 +1,57 @@
+# This is a local configuration file template for the LVM2 system
+# which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
+#
+# Refer to 'man lvm.conf' for information about the file layout.
+#
+# To put this file in a different directory and override
+# @DEFAULT_SYS_DIR@ set the environment variable LVM_SYSTEM_DIR before
+# running the tools.
+#
+# The lvmlocal.conf file is normally expected to contain only the
+# "local" section which contains settings that should not be shared or
+# repeated among different hosts. (But if other sections are present,
+# they *will* get processed. Settings in this file override equivalent
+# ones in lvm.conf and are in turn overridden by ones in any enabled
+# lvm_<tag>.conf files.)
+#
+# Please take care that each setting only appears once if uncommenting
+# example settings in this file and never copy this file between hosts.
+
+
+# Configuration section local.
+# LVM settings that are specific to the local host.
+local {
+
+ # Configuration option local/system_id.
+ # Defines the local system ID for lvmlocal mode.
+ # This is used when global/system_id_source is set
+ # to 'lvmlocal' in the main configuration file,
+ # e.g. lvm.conf.
+ # When used, it must be set to a unique value
+ # among all hosts sharing access to the storage,
+ # e.g. a host name.
+ # Example:
+ # Set no system ID.
+ # system_id = ""
+ # Example:
+ # Set the system_id to the string 'host1'.
+ # system_id = "host1"
+ # system_id = ""
+
+ # Configuration option local/extra_system_ids.
+ # A list of extra VG system IDs the local host can access.
+ # VGs with the system IDs listed here (in addition
+ # to the host's own system ID) can be fully accessed
+ # by the local host. (These are system IDs that the
+ # host sees in VGs, not system IDs that identify the
+ # local host, which is determined by system_id_source.)
+ # Use this only after consulting 'man lvmsystemid'
+ # to be certain of correct usage and possible dangers.
+ # This configuration option does not have a default value defined.
+
+ # Configuration option local/host_id.
+ # The lvmlockd sanlock host_id.
+ # This must be a unique among all hosts,
+ # and must be between 1 and 2000.
+ # host_id = 0
+}
diff --git a/conf/metadata_profile_template.profile.in b/conf/metadata_profile_template.profile.in
index 81633b237..b08d32c39 100644
--- a/conf/metadata_profile_template.profile.in
+++ b/conf/metadata_profile_template.profile.in
@@ -16,7 +16,7 @@ allocation {
thin_pool_zero=1
thin_pool_discards="passdown"
thin_pool_chunk_size_policy="generic"
-# thin_pool_chunk_size=64
+# thin_pool_chunk_size=128
}
activation {
thin_pool_autoextend_threshold=100
diff --git a/configure b/configure
index 9d1d32f5a..57bec6986 100755
--- a/configure
+++ b/configure
@@ -620,6 +620,7 @@ ac_includes_default="\
#endif"
ac_header_list=
+ac_func_list=
ac_default_prefix=/usr
ac_subst_vars='LTLIBOBJS
usrsbindir
@@ -636,9 +637,12 @@ kerneldir
interface
CMIRRORD_PIDFILE
CLVMD_PIDFILE
+LVMLOCKD_PIDFILE
+LVMPOLLD_PIDFILE
LVMETAD_PIDFILE
DMEVENTD_PIDFILE
WRITE_INSTALL
+VALGRIND_POOL
UDEV_HAS_BUILTIN_BLKID
UDEV_RULE_EXEC_DETECTION
UDEV_SYSTEMD_BACKGROUND_JOBS
@@ -646,6 +650,7 @@ UDEV_SYNC
UDEV_RULES
UDEV_PC
THIN
+TESTSUITE_DATA
TESTING
STATIC_LINK
STATICDIR
@@ -654,11 +659,13 @@ SELINUX_PC
SELINUX_LIBS
REPLICATORS
READLINE_LIBS
+RT_PC
RAID
PYTHON_LIBDIRS
PYTHON_INCDIRS
PYTHON_BINDINGS
PTHREAD_LIBS
+M_LIBS
POOL
PKGCONFIG
OCFDIR
@@ -675,12 +682,11 @@ LVM_LIBAPI
LVM_VERSION
LVM1_FALLBACK
LVM1
-LOCALEDIR
LIB_SUFFIX
LDDEPS
JOBS
-INTL_PACKAGE
INTL
+HAVE_VALGRIND
HAVE_REALTIME
HAVE_LIBDL
BLKDEACTIVATE
@@ -688,15 +694,14 @@ FSADM
ELDFLAGS
DM_LIB_PATCHLEVEL
DM_LIB_VERSION
-DM_IOCTLS
-DM_DEVICE_UID
-DM_DEVICE_MODE
-DM_DEVICE_GID
-DM_COMPAT
DMEVENTD_PATH
DMEVENTD
DL_LIBS
DEVMAPPER
+DEFAULT_USE_LVMLOCKD
+DEFAULT_USE_LVMPOLLD
+DEFAULT_USE_LVMETAD
+DEFAULT_USE_BLKID_WIPING
DEFAULT_SYS_DIR
DEFAULT_SPARSE_SEGTYPE
DEFAULT_RUN_DIR
@@ -704,6 +709,7 @@ DEFAULT_RAID10_SEGTYPE
DEFAULT_PROFILE_SUBDIR
DEFAULT_PID_DIR
DEFAULT_MIRROR_SEGTYPE
+DEFAULT_FALLBACK_TO_LVM1
DEFAULT_LOCK_DIR
DEFAULT_DM_RUN_DIR
DEFAULT_DATA_ALIGNMENT
@@ -722,10 +728,13 @@ CLDWHOLEARCHIVE
CLDNOWHOLEARCHIVE
CLDFLAGS
CACHE
+BUILD_LOCKDDLM
+BUILD_LOCKDSANLOCK
+BUILD_LVMLOCKD
+BUILD_LVMPOLLD
BUILD_LVMETAD
BUILD_DMEVENTD
BUILD_CMIRRORD
-BLKID_WIPING
BLKID_PC
APPLIB
MODPROBE_CMD
@@ -740,7 +749,10 @@ SYSTEMD_LIBS
SYSTEMD_CFLAGS
BLKID_LIBS
BLKID_CFLAGS
-VALGRIND_POOL
+LOCKD_DLM_LIBS
+LOCKD_DLM_CFLAGS
+LOCKD_SANLOCK_LIBS
+LOCKD_SANLOCK_CFLAGS
VALGRIND_LIBS
VALGRIND_CFLAGS
CUNIT_LIBS
@@ -748,6 +760,7 @@ CUNIT_CFLAGS
GENPNG
GENHTML
LCOV
+HAVE_WSYNCNAND
HAVE_WCLOBBERED
HAVE_WJUMP
SACKPT_LIBS
@@ -798,6 +811,9 @@ INSTALL_PROGRAM
EGREP
GREP
CPP
+ac_ct_CXX
+CXXFLAGS
+CXX
OBJEXT
EXEEXT
ac_ct_CC
@@ -876,7 +892,7 @@ with_snapshots
with_mirrors
with_raid
with_default_mirror_segtype
-with_default_raid10r_segtype
+with_default_raid10_segtype
with_replicators
with_default_sparse_segtype
with_thin
@@ -890,6 +906,7 @@ with_cache_check
with_cache_dump
with_cache_repair
with_cache_restore
+enable_cache_check_needs_check
enable_readline
enable_realtime
enable_ocf
@@ -908,7 +925,15 @@ enable_testing
enable_valgrind_pool
enable_devmapper
enable_lvmetad
+enable_lvmpolld
+enable_lockd_sanlock
+enable_lockd_dlm
+enable_use_lvmlockd
+with_lvmlockd_pidfile
+enable_use_lvmetad
with_lvmetad_pidfile
+enable_use_lvmpolld
+with_lvmpolld_pidfile
enable_blkid_wiping
enable_udev_systemd_background_jobs
enable_udev_sync
@@ -956,6 +981,9 @@ CFLAGS
LDFLAGS
LIBS
CPPFLAGS
+CXX
+CXXFLAGS
+CCC
CPP
PKG_CONFIG
PKG_CONFIG_PATH
@@ -984,6 +1012,10 @@ CUNIT_CFLAGS
CUNIT_LIBS
VALGRIND_CFLAGS
VALGRIND_LIBS
+LOCKD_SANLOCK_CFLAGS
+LOCKD_SANLOCK_LIBS
+LOCKD_DLM_CFLAGS
+LOCKD_DLM_LIBS
BLKID_CFLAGS
BLKID_LIBS
SYSTEMD_CFLAGS
@@ -1608,8 +1640,10 @@ Optional Features:
device-mapper is missing from the kernel
--disable-thin_check_needs_check
required if thin_check version is < 0.3.0
+ --disable-cache_check_needs_check
+ required if cache_check version is < 0.5
--disable-readline disable readline support
- --enable-realtime enable realtime clock support
+ --disable-realtime disable realtime clock support
--enable-ocf enable Open Cluster Framework (OCF) compliant
resource agents
--enable-cmirrord enable the cluster mirror log daemon
@@ -1619,6 +1653,12 @@ Optional Features:
--enable-valgrind-pool enable valgrind awareness of pools
--disable-devmapper disable LVM2 device-mapper interaction
--enable-lvmetad enable the LVM Metadata Daemon
+ --enable-lvmpolld enable the LVM Polling Daemon
+ --enable-lockd-sanlock enable the LVM lock daemon using sanlock
+ --enable-lockd-dlm enable the LVM lock daemon using dlm
+ --disable-use-lvmlockd disable usage of LVM lock daemon
+ --disable-use-lvmetad disable usage of LVM Metadata Daemon
+ --disable-use-lvmpolld disable usage of LVM Poll Daemon
--disable-blkid_wiping disable libblkid detection of signatures when wiping
and use native code instead
--disable-udev-systemd-background-jobs
@@ -1690,11 +1730,11 @@ Optional Packages:
--with-ocfdir=DIR install OCF files in
[PREFIX/lib/ocf/resource.d/lvm2]
--with-default-pid-dir=PID_DIR
- Default directory to keep PID files in. [/var/run]
+ Default directory to keep PID files in. [autodetect]
--with-default-dm-run-dir=DM_RUN_DIR
- Default DM run directory. [/var/run]
+ Default DM run directory. [autodetect]
--with-default-run-dir=RUN_DIR
- Default LVM run directory. [/var/run/lvm]
+ Default LVM run directory. [autodetect_run_dir/lvm]
--with-clvmd=TYPE build cluster LVM Daemon
The following cluster manager combinations are valid:
* cman (RHEL5 or equivalent)
@@ -1708,9 +1748,13 @@ Optional Packages:
--with-cmirrord-pidfile=PATH
cmirrord pidfile [PID_DIR/cmirrord.pid]
--with-optimisation=OPT C optimisation flag [OPT=-O2]
+ --with-lvmlockd-pidfile=PATH
+ lvmlockd pidfile [PID_DIR/lvmlockd.pid]
--with-lvmetad-pidfile=PATH
lvmetad pidfile [PID_DIR/lvmetad.pid]
- --with-localedir=DIR translation files in DIR [PREFIX/share/locale]
+ --with-lvmpolld-pidfile=PATH
+ lvmpolld pidfile [PID_DIR/lvmpolld.pid]
+ --with-localedir=DIR locale-dependent data [DATAROOTDIR/locale]
--with-confdir=DIR configuration files in DIR [/etc]
--with-staticdir=DIR static binaries in DIR [EPREFIX/sbin]
--with-usrlibdir=DIR usrlib in DIR [PREFIX/lib]
@@ -1751,6 +1795,8 @@ Some influential environment variables:
LIBS libraries to pass to the linker, e.g. -l<library>
CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
+ CXX C++ compiler command
+ CXXFLAGS C++ compiler flags
CPP C preprocessor
PKG_CONFIG path to pkg-config utility
PKG_CONFIG_PATH
@@ -1792,6 +1838,14 @@ Some influential environment variables:
C compiler flags for VALGRIND, overriding pkg-config
VALGRIND_LIBS
linker flags for VALGRIND, overriding pkg-config
+ LOCKD_SANLOCK_CFLAGS
+ C compiler flags for LOCKD_SANLOCK, overriding pkg-config
+ LOCKD_SANLOCK_LIBS
+ linker flags for LOCKD_SANLOCK, overriding pkg-config
+ LOCKD_DLM_CFLAGS
+ C compiler flags for LOCKD_DLM, overriding pkg-config
+ LOCKD_DLM_LIBS
+ linker flags for LOCKD_DLM, overriding pkg-config
BLKID_CFLAGS
C compiler flags for BLKID, overriding pkg-config
BLKID_LIBS linker flags for BLKID, overriding pkg-config
@@ -1920,6 +1974,44 @@ fi
} # ac_fn_c_try_compile
+# ac_fn_cxx_try_compile LINENO
+# ----------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_cxx_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_compile
+
# ac_fn_c_try_cpp LINENO
# ----------------------
# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
@@ -2163,116 +2255,116 @@ $as_echo "$ac_res" >&6; }
} # ac_fn_c_check_header_compile
-# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES
-# ----------------------------------------------------
-# Tries to find if the field MEMBER exists in type AGGR, after including
-# INCLUDES, setting cache variable VAR accordingly.
-ac_fn_c_check_member ()
+# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
+# -------------------------------------------
+# Tests whether TYPE exists after having included INCLUDES, setting cache
+# variable VAR accordingly.
+ac_fn_c_check_type ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
-$as_echo_n "checking for $2.$3... " >&6; }
-if eval \${$4+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
+ eval "$3=no"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-$5
+$4
int
main ()
{
-static $2 ac_aggr;
-if (ac_aggr.$3)
-return 0;
+if (sizeof ($2))
+ return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- eval "$4=yes"
-else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-$5
+$4
int
main ()
{
-static $2 ac_aggr;
-if (sizeof ac_aggr.$3)
-return 0;
+if (sizeof (($2)))
+ return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- eval "$4=yes"
+
else
- eval "$4=no"
+ eval "$3=yes"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-eval ac_res=\$$4
+eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-} # ac_fn_c_check_member
+} # ac_fn_c_check_type
-# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
-# -------------------------------------------
-# Tests whether TYPE exists after having included INCLUDES, setting cache
-# variable VAR accordingly.
-ac_fn_c_check_type ()
+# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES
+# ----------------------------------------------------
+# Tries to find if the field MEMBER exists in type AGGR, after including
+# INCLUDES, setting cache variable VAR accordingly.
+ac_fn_c_check_member ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
+$as_echo_n "checking for $2.$3... " >&6; }
+if eval \${$4+:} false; then :
$as_echo_n "(cached) " >&6
else
- eval "$3=no"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-$4
+$5
int
main ()
{
-if (sizeof ($2))
- return 0;
+static $2 ac_aggr;
+if (ac_aggr.$3)
+return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$4=yes"
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-$4
+$5
int
main ()
{
-if (sizeof (($2)))
- return 0;
+static $2 ac_aggr;
+if (sizeof ac_aggr.$3)
+return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
-
+ eval "$4=yes"
else
- eval "$3=yes"
+ eval "$4=no"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
-eval ac_res=\$$3
+eval ac_res=\$$4
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-} # ac_fn_c_check_type
+} # ac_fn_c_check_member
# ac_fn_c_find_intX_t LINENO BITS VAR
# -----------------------------------
@@ -2470,6 +2562,52 @@ $as_echo "$ac_res" >&6; }
eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_func
+
+# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES
+# ---------------------------------------------
+# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR
+# accordingly.
+ac_fn_c_check_decl ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ as_decl_name=`echo $2|sed 's/ *(.*//'`
+ as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'`
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5
+$as_echo_n "checking whether $as_decl_name is declared... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+int
+main ()
+{
+#ifndef $as_decl_name
+#ifdef __cplusplus
+ (void) $as_decl_use;
+#else
+ (void) $as_decl_name;
+#endif
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_decl
cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
@@ -2754,8 +2892,10 @@ $as_echo "$as_me: creating cache $cache_file" >&6;}
>$cache_file
fi
-as_fn_append ac_header_list " stdlib.h"
+as_fn_append ac_header_list " sys/time.h"
as_fn_append ac_header_list " unistd.h"
+as_fn_append ac_func_list " alarm"
+as_fn_append ac_header_list " stdlib.h"
as_fn_append ac_header_list " sys/param.h"
# Check that the precious variables saved in the cache have kept the same
# value.
@@ -2972,10 +3112,11 @@ test -n "$target_alias" &&
NONENONEs,x,x, &&
program_prefix=${target_alias}-
+if test -z "$CFLAGS"; then :
+ COPTIMISE_FLAG="-O2"
+fi
case "$host_os" in
linux*)
- CFLAGS="$CFLAGS"
- COPTIMISE_FLAG="-O2"
CLDFLAGS="$CLDFLAGS -Wl,--version-script,.export.sym"
ELDFLAGS="-Wl,--export-dynamic"
# FIXME Generate list and use --dynamic-list=.dlopen.sym
@@ -2985,6 +3126,10 @@ case "$host_os" in
LIB_SUFFIX=so
DEVMAPPER=yes
LVMETAD=no
+ LVMPOLLD=no
+ LVMLOCKD=no
+ LOCKDSANLOCK=no
+ LOCKDDLM=no
ODIRECT=yes
DM_IOCTLS=yes
SELINUX=yes
@@ -2994,7 +3139,6 @@ case "$host_os" in
;;
darwin*)
CFLAGS="$CFLAGS -no-cpp-precomp -fno-common"
- COPTIMISE_FLAG="-O2"
CLDFLAGS="$CLDFLAGS"
ELDFLAGS=
CLDWHOLEARCHIVE="-all_load"
@@ -3122,6 +3266,8 @@ fi
test -n "$AWK" && break
done
+save_CFLAGS=$CFLAGS
+save_CXXFLAGS=$CXXFLAGS
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -3911,6 +4057,265 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+ if test -n "$CCC"; then
+ CXX=$CCC
+ else
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CXX"; then
+ ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
+$as_echo "$CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CXX" && break
+ done
+fi
+if test -z "$CXX"; then
+ ac_ct_CXX=$CXX
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CXX"; then
+ ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CXX="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
+$as_echo "$ac_ct_CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CXX" && break
+done
+
+ if test "x$ac_ct_CXX" = x; then
+ CXX="g++"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CXX=$ac_ct_CXX
+ fi
+fi
+
+ fi
+fi
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
+$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
+$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GXX=yes
+else
+ GXX=
+fi
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
+$as_echo_n "checking whether $CXX accepts -g... " >&6; }
+if ${ac_cv_prog_cxx_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ ac_cxx_werror_flag=yes
+ ac_cv_prog_cxx_g=no
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+else
+ CXXFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
+$as_echo "$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+ CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+ if test "$GXX" = yes; then
+ CXXFLAGS="-g -O2"
+ else
+ CXXFLAGS="-g"
+ fi
+else
+ if test "$GXX" = yes; then
+ CXXFLAGS="-O2"
+ else
+ CXXFLAGS=
+ fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+CFLAGS=$save_CFLAGS
+CXXFLAGS=$save_CXXFLAGS
ac_ext=c
@@ -5027,6 +5432,99 @@ fi
fi
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5
+$as_echo_n "checking for stdbool.h that conforms to C99... " >&6; }
+if ${ac_cv_header_stdbool_h+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <stdbool.h>
+ #ifndef bool
+ "error: bool is not defined"
+ #endif
+ #ifndef false
+ "error: false is not defined"
+ #endif
+ #if false
+ "error: false is not 0"
+ #endif
+ #ifndef true
+ "error: true is not defined"
+ #endif
+ #if true != 1
+ "error: true is not 1"
+ #endif
+ #ifndef __bool_true_false_are_defined
+ "error: __bool_true_false_are_defined is not defined"
+ #endif
+
+ struct s { _Bool s: 1; _Bool t; } s;
+
+ char a[true == 1 ? 1 : -1];
+ char b[false == 0 ? 1 : -1];
+ char c[__bool_true_false_are_defined == 1 ? 1 : -1];
+ char d[(bool) 0.5 == true ? 1 : -1];
+ /* See body of main program for 'e'. */
+ char f[(_Bool) 0.0 == false ? 1 : -1];
+ char g[true];
+ char h[sizeof (_Bool)];
+ char i[sizeof s.t];
+ enum { j = false, k = true, l = false * true, m = true * 256 };
+ /* The following fails for
+ HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]. */
+ _Bool n[m];
+ char o[sizeof n == m * sizeof n[0] ? 1 : -1];
+ char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1];
+ /* Catch a bug in an HP-UX C compiler. See
+ http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html
+ http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html
+ */
+ _Bool q = true;
+ _Bool *pq = &q;
+
+int
+main ()
+{
+
+ bool e = &s;
+ *pq |= q;
+ *pq |= ! q;
+ /* Refer to every declared value, to avoid compiler optimizations. */
+ return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l
+ + !m + !n + !o + !p + !q + !pq);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_header_stdbool_h=yes
+else
+ ac_cv_header_stdbool_h=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5
+$as_echo "$ac_cv_header_stdbool_h" >&6; }
+ ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default"
+if test "x$ac_cv_type__Bool" = xyes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BOOL 1
+_ACEOF
+
+
+fi
+
+
+if test $ac_cv_header_stdbool_h = yes; then
+
+$as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
+
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
if ${ac_cv_header_stdc+:} false; then :
@@ -5216,9 +5714,12 @@ $as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h
fi
-for ac_header in locale.h stddef.h syslog.h sys/file.h sys/time.h assert.h \
- langinfo.h libgen.h signal.h sys/mman.h sys/resource.h sys/utsname.h \
- sys/wait.h time.h
+for ac_header in assert.h ctype.h dirent.h errno.h fcntl.h float.h \
+ getopt.h inttypes.h langinfo.h libgen.h limits.h locale.h paths.h \
+ signal.h stdarg.h stddef.h stdio.h stdlib.h string.h sys/file.h \
+ sys/ioctl.h syslog.h sys/mman.h sys/param.h sys/resource.h sys/stat.h \
+ sys/time.h sys/types.h sys/utsname.h sys/wait.h time.h \
+ unistd.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@@ -5234,9 +5735,7 @@ fi
done
-case "$host_os" in
- linux*)
- for ac_header in asm/byteorder.h linux/fs.h malloc.h
+for ac_header in termios.h sys/statvfs.h sys/timerfd.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@@ -5245,14 +5744,14 @@ if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
_ACEOF
-else
- as_fn_error $? "bailing out" "$LINENO" 5
fi
done
- ;;
- darwin*)
- for ac_header in machine/endian.h sys/disk.h
+
+
+case "$host_os" in
+ linux*)
+ for ac_header in asm/byteorder.h linux/fs.h malloc.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@@ -5267,11 +5766,8 @@ fi
done
;;
-esac
-
-for ac_header in ctype.h dirent.h errno.h fcntl.h getopt.h inttypes.h limits.h \
- stdarg.h stdio.h stdlib.h string.h sys/ioctl.h sys/param.h sys/stat.h \
- sys/types.h unistd.h
+ darwin*)
+ for ac_header in machine/endian.h sys/disk.h
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
@@ -5285,20 +5781,8 @@ else
fi
done
-
-for ac_header in termios.h sys/statvfs.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
-if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
- cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
+ ;;
+esac
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5
@@ -5434,6 +5918,51 @@ _ACEOF
fi
+ac_fn_c_check_type "$LINENO" "ptrdiff_t" "ac_cv_type_ptrdiff_t" "$ac_includes_default"
+if test "x$ac_cv_type_ptrdiff_t" = xyes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_PTRDIFF_T 1
+_ACEOF
+
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct tm is in sys/time.h or time.h" >&5
+$as_echo_n "checking whether struct tm is in sys/time.h or time.h... " >&6; }
+if ${ac_cv_struct_tm+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/types.h>
+#include <time.h>
+
+int
+main ()
+{
+struct tm tm;
+ int *p = &tm.tm_sec;
+ return !p;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_struct_tm=time.h
+else
+ ac_cv_struct_tm=sys/time.h
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_struct_tm" >&5
+$as_echo "$ac_cv_struct_tm" >&6; }
+if test $ac_cv_struct_tm = sys/time.h; then
+
+$as_echo "#define TM_IN_SYS_TIME 1" >>confdefs.h
+
+fi
+
ac_fn_c_check_type "$LINENO" "off_t" "ac_cv_type_off_t" "$ac_includes_default"
if test "x$ac_cv_type_off_t" = xyes; then :
@@ -5650,57 +6179,12 @@ _ACEOF
;;
esac
-ac_fn_c_check_member "$LINENO" "struct stat" "st_rdev" "ac_cv_member_struct_stat_st_rdev" "$ac_includes_default"
-if test "x$ac_cv_member_struct_stat_st_rdev" = xyes; then :
-
-cat >>confdefs.h <<_ACEOF
-#define HAVE_STRUCT_STAT_ST_RDEV 1
-_ACEOF
-
-
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct tm is in sys/time.h or time.h" >&5
-$as_echo_n "checking whether struct tm is in sys/time.h or time.h... " >&6; }
-if ${ac_cv_struct_tm+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <sys/types.h>
-#include <time.h>
-
-int
-main ()
-{
-struct tm tm;
- int *p = &tm.tm_sec;
- return !p;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_cv_struct_tm=time.h
-else
- ac_cv_struct_tm=sys/time.h
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_struct_tm" >&5
-$as_echo "$ac_cv_struct_tm" >&6; }
-if test $ac_cv_struct_tm = sys/time.h; then
-
-$as_echo "#define TM_IN_SYS_TIME 1" >>confdefs.h
-
-fi
-
################################################################################
-for ac_func in ftruncate gethostname getpagesize \
- gettimeofday memset mkdir mkfifo rmdir munmap nl_langinfo setenv setlocale \
- strcasecmp strchr strcspn strspn strdup strncasecmp strerror strrchr \
- strstr strtol strtoul uname
+for ac_func in ftruncate gethostname getpagesize gettimeofday localtime_r \
+ memchr memset mkdir mkfifo munmap nl_langinfo realpath rmdir setenv \
+ setlocale strcasecmp strchr strcspn strdup strerror strncasecmp strndup \
+ strrchr strspn strstr strtol strtoul uname
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
@@ -6481,6 +6965,250 @@ done
+ for ac_func in $ac_func_list
+do :
+ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working mktime" >&5
+$as_echo_n "checking for working mktime... " >&6; }
+if ${ac_cv_func_working_mktime+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test "$cross_compiling" = yes; then :
+ ac_cv_func_working_mktime=no
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+/* Test program from Paul Eggert and Tony Leneis. */
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif
+
+#include <limits.h>
+#include <stdlib.h>
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifndef HAVE_ALARM
+# define alarm(X) /* empty */
+#endif
+
+/* Work around redefinition to rpl_putenv by other config tests. */
+#undef putenv
+
+static time_t time_t_max;
+static time_t time_t_min;
+
+/* Values we'll use to set the TZ environment variable. */
+static const char *tz_strings[] = {
+ (const char *) 0, "TZ=GMT0", "TZ=JST-9",
+ "TZ=EST+3EDT+2,M10.1.0/00:00:00,M2.3.0/00:00:00"
+};
+#define N_STRINGS (sizeof (tz_strings) / sizeof (tz_strings[0]))
+
+/* Return 0 if mktime fails to convert a date in the spring-forward gap.
+ Based on a problem report from Andreas Jaeger. */
+static int
+spring_forward_gap ()
+{
+ /* glibc (up to about 1998-10-07) failed this test. */
+ struct tm tm;
+
+ /* Use the portable POSIX.1 specification "TZ=PST8PDT,M4.1.0,M10.5.0"
+ instead of "TZ=America/Vancouver" in order to detect the bug even
+ on systems that don't support the Olson extension, or don't have the
+ full zoneinfo tables installed. */
+ putenv ((char*) "TZ=PST8PDT,M4.1.0,M10.5.0");
+
+ tm.tm_year = 98;
+ tm.tm_mon = 3;
+ tm.tm_mday = 5;
+ tm.tm_hour = 2;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ tm.tm_isdst = -1;
+ return mktime (&tm) != (time_t) -1;
+}
+
+static int
+mktime_test1 (time_t now)
+{
+ struct tm *lt;
+ return ! (lt = localtime (&now)) || mktime (lt) == now;
+}
+
+static int
+mktime_test (time_t now)
+{
+ return (mktime_test1 (now)
+ && mktime_test1 ((time_t) (time_t_max - now))
+ && mktime_test1 ((time_t) (time_t_min + now)));
+}
+
+static int
+irix_6_4_bug ()
+{
+ /* Based on code from Ariel Faigon. */
+ struct tm tm;
+ tm.tm_year = 96;
+ tm.tm_mon = 3;
+ tm.tm_mday = 0;
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ tm.tm_isdst = -1;
+ mktime (&tm);
+ return tm.tm_mon == 2 && tm.tm_mday == 31;
+}
+
+static int
+bigtime_test (int j)
+{
+ struct tm tm;
+ time_t now;
+ tm.tm_year = tm.tm_mon = tm.tm_mday = tm.tm_hour = tm.tm_min = tm.tm_sec = j;
+ now = mktime (&tm);
+ if (now != (time_t) -1)
+ {
+ struct tm *lt = localtime (&now);
+ if (! (lt
+ && lt->tm_year == tm.tm_year
+ && lt->tm_mon == tm.tm_mon
+ && lt->tm_mday == tm.tm_mday
+ && lt->tm_hour == tm.tm_hour
+ && lt->tm_min == tm.tm_min
+ && lt->tm_sec == tm.tm_sec
+ && lt->tm_yday == tm.tm_yday
+ && lt->tm_wday == tm.tm_wday
+ && ((lt->tm_isdst < 0 ? -1 : 0 < lt->tm_isdst)
+ == (tm.tm_isdst < 0 ? -1 : 0 < tm.tm_isdst))))
+ return 0;
+ }
+ return 1;
+}
+
+static int
+year_2050_test ()
+{
+ /* The correct answer for 2050-02-01 00:00:00 in Pacific time,
+ ignoring leap seconds. */
+ unsigned long int answer = 2527315200UL;
+
+ struct tm tm;
+ time_t t;
+ tm.tm_year = 2050 - 1900;
+ tm.tm_mon = 2 - 1;
+ tm.tm_mday = 1;
+ tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+ tm.tm_isdst = -1;
+
+ /* Use the portable POSIX.1 specification "TZ=PST8PDT,M4.1.0,M10.5.0"
+ instead of "TZ=America/Vancouver" in order to detect the bug even
+ on systems that don't support the Olson extension, or don't have the
+ full zoneinfo tables installed. */
+ putenv ((char*) "TZ=PST8PDT,M4.1.0,M10.5.0");
+
+ t = mktime (&tm);
+
+ /* Check that the result is either a failure, or close enough
+ to the correct answer that we can assume the discrepancy is
+ due to leap seconds. */
+ return (t == (time_t) -1
+ || (0 < t && answer - 120 <= t && t <= answer + 120));
+}
+
+int
+main ()
+{
+ time_t t, delta;
+ int i, j;
+
+ /* This test makes some buggy mktime implementations loop.
+ Give up after 60 seconds; a mktime slower than that
+ isn't worth using anyway. */
+ alarm (60);
+
+ for (;;)
+ {
+ t = (time_t_max << 1) + 1;
+ if (t <= time_t_max)
+ break;
+ time_t_max = t;
+ }
+ time_t_min = - ((time_t) ~ (time_t) 0 == (time_t) -1) - time_t_max;
+
+ delta = time_t_max / 997; /* a suitable prime number */
+ for (i = 0; i < N_STRINGS; i++)
+ {
+ if (tz_strings[i])
+ putenv ((char*) tz_strings[i]);
+
+ for (t = 0; t <= time_t_max - delta; t += delta)
+ if (! mktime_test (t))
+ return 1;
+ if (! (mktime_test ((time_t) 1)
+ && mktime_test ((time_t) (60 * 60))
+ && mktime_test ((time_t) (60 * 60 * 24))))
+ return 1;
+
+ for (j = 1; ; j <<= 1)
+ if (! bigtime_test (j))
+ return 1;
+ else if (INT_MAX / 2 < j)
+ break;
+ if (! bigtime_test (INT_MAX))
+ return 1;
+ }
+ return ! (irix_6_4_bug () && spring_forward_gap () && year_2050_test ());
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+ ac_cv_func_working_mktime=yes
+else
+ ac_cv_func_working_mktime=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_working_mktime" >&5
+$as_echo "$ac_cv_func_working_mktime" >&6; }
+if test $ac_cv_func_working_mktime = no; then
+ case " $LIBOBJS " in
+ *" mktime.$ac_objext "* ) ;;
+ *) LIBOBJS="$LIBOBJS mktime.$ac_objext"
+ ;;
+esac
+
+fi
+
+
+
+
+
+
for ac_func in getpagesize
do :
ac_fn_c_check_func "$LINENO" "getpagesize" "ac_cv_func_getpagesize"
@@ -6998,6 +7726,9 @@ $as_echo "$ac_cv_flag_HAVE_FULL_RELRO" >&6; }
################################################################################
+if test "$prefix" = NONE; then
+ datarootdir=${ac_default_prefix}/share
+fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking file owner" >&5
@@ -7040,6 +7771,11 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $DM_DEVICE_UID" >&5
$as_echo "$DM_DEVICE_UID" >&6; }
+cat >>confdefs.h <<_ACEOF
+#define DM_DEVICE_UID $DM_DEVICE_UID
+_ACEOF
+
+
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking device node gid" >&5
$as_echo_n "checking device node gid... " >&6; }
@@ -7055,6 +7791,11 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $DM_DEVICE_GID" >&5
$as_echo "$DM_DEVICE_GID" >&6; }
+cat >>confdefs.h <<_ACEOF
+#define DM_DEVICE_GID $DM_DEVICE_GID
+_ACEOF
+
+
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking device node mode" >&5
$as_echo_n "checking device node mode... " >&6; }
@@ -7070,6 +7811,11 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $DM_DEVICE_MODE" >&5
$as_echo "$DM_DEVICE_MODE" >&6; }
+cat >>confdefs.h <<_ACEOF
+#define DM_DEVICE_MODE $DM_DEVICE_MODE
+_ACEOF
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking when to create device nodes" >&5
$as_echo_n "checking when to create device nodes... " >&6; }
@@ -7131,11 +7877,19 @@ fi
$as_echo "$LVM1_FALLBACK" >&6; }
if test "$LVM1_FALLBACK" = yes; then
+ DEFAULT_FALLBACK_TO_LVM1=1
$as_echo "#define LVM1_FALLBACK 1" >>confdefs.h
+else
+ DEFAULT_FALLBACK_TO_LVM1=0
fi
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_FALLBACK_TO_LVM1 $DEFAULT_FALLBACK_TO_LVM1
+_ACEOF
+
+
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to include support for lvm1 metadata" >&5
$as_echo_n "checking whether to include support for lvm1 metadata... " >&6; }
@@ -7268,9 +8022,9 @@ else
fi
-# Check whether --with-default-raid10r-segtype was given.
-if test "${with_default_raid10r_segtype+set}" = set; then :
- withval=$with_default_raid10r_segtype; DEFAULT_RAID10_SEGTYPE=$withval
+# Check whether --with-default-raid10-segtype was given.
+if test "${with_default_raid10_segtype+set}" = set; then :
+ withval=$with_default_raid10_segtype; DEFAULT_RAID10_SEGTYPE=$withval
else
DEFAULT_RAID10_SEGTYPE="raid10"
fi
@@ -7940,6 +8694,14 @@ $as_echo "#define CACHE_INTERNAL 1" >>confdefs.h
*) as_fn_error $? "--with-cache parameter invalid" "$LINENO" 5 ;;
esac
+# Check whether --enable-cache_check_needs_check was given.
+if test "${enable_cache_check_needs_check+set}" = set; then :
+ enableval=$enable_cache_check_needs_check; CACHE_CHECK_NEEDS_CHECK=$enableval
+else
+ CACHE_CHECK_NEEDS_CHECK=yes
+fi
+
+
# Test if necessary cache tools are available
# if not - use plain defaults and warn user
case "$CACHE" in
@@ -8051,6 +8813,30 @@ $as_echo "$as_me: WARNING: cache_check not found in path $PATH" >&2;}
CACHE_CONFIGURE_WARN=y
fi
fi
+ if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+ $CACHE_CHECK_CMD -V 2>/dev/null >conftest.tmp
+ read -r CACHE_CHECK_VSN < conftest.tmp
+ IFS=. read -r CACHE_CHECK_VSN_MAJOR CACHE_CHECK_VSN_MINOR CACHE_CHECK_VSN_PATCH < conftest.tmp
+ rm -f conftest.tmp
+
+ # Require version >= 0.5.4 for --clear-needs-check-flag
+ if test -z "$CACHE_CHECK_VSN_MAJOR" \
+ || test -z "$CACHE_CHECK_VSN_MINOR" \
+ || test -z "$CACHE_CHECK_VSN_PATCH"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $CACHE_CHECK_CMD: Bad version \"$CACHE_CHECK_VSN\" found" >&5
+$as_echo "$as_me: WARNING: $CACHE_CHECK_CMD: Bad version \"$CACHE_CHECK_VSN\" found" >&2;}
+ CACHE_CHECK_VERSION_WARN=y
+ CACHE_CHECK_NEEDS_CHECK=no
+ elif test "$CACHE_CHECK_VSN_MAJOR" -eq 0 ; then
+ if test "$CACHE_CHECK_VSN_MINOR" -lt 5 \
+ || test "$CACHE_CHECK_VSN_MINOR" -eq 5 -a "$CACHE_CHECK_VSN_PATCH" -lt 4; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $CACHE_CHECK_CMD: Old version \"$CACHE_CHECK_VSN\" found" >&5
+$as_echo "$as_me: WARNING: $CACHE_CHECK_CMD: Old version \"$CACHE_CHECK_VSN\" found" >&2;}
+ CACHE_CHECK_VERSION_WARN=y
+ CACHE_CHECK_NEEDS_CHECK=no
+ fi
+ fi
+ fi
# Empty means a config way to ignore cache dumping
if test "$CACHE_DUMP_CMD" = "autodetect"; then
if test -n "$ac_tool_prefix"; then
@@ -8372,6 +9158,16 @@ $as_echo "$as_me: WARNING: cache_restore not found in path $PATH" >&2;}
CACHE_CONFIGURE_WARN=y
}
fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cache_check supports the needs-check flag" >&5
+$as_echo_n "checking whether cache_check supports the needs-check flag... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CACHE_CHECK_NEEDS_CHECK" >&5
+$as_echo "$CACHE_CHECK_NEEDS_CHECK" >&6; }
+ if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+
+$as_echo "#define CACHE_CHECK_NEEDS_CHECK 1" >>confdefs.h
+
+ fi
;;
esac
@@ -8419,6 +9215,8 @@ $as_echo_n "checking whether to enable realtime support... " >&6; }
# Check whether --enable-realtime was given.
if test "${enable_realtime+set}" = set; then :
enableval=$enable_realtime; REALTIME=$enableval
+else
+ REALTIME=yes
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $REALTIME" >&5
@@ -8646,13 +9444,19 @@ fi
}
################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for default run directory" >&5
+$as_echo_n "checking for default run directory... " >&6; }
+RUN_DIR="/run"
+test -d "/run" || RUN_DIR="/var/run"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $RUN_DIR" >&5
+$as_echo "$RUN_DIR" >&6; }
# Check whether --with-default-pid-dir was given.
if test "${with_default_pid_dir+set}" = set; then :
withval=$with_default_pid_dir; DEFAULT_PID_DIR="$withval"
else
- DEFAULT_PID_DIR="/var/run"
+ DEFAULT_PID_DIR=$RUN_DIR
fi
@@ -8667,7 +9471,7 @@ _ACEOF
if test "${with_default_dm_run_dir+set}" = set; then :
withval=$with_default_dm_run_dir; DEFAULT_DM_RUN_DIR="$withval"
else
- DEFAULT_DM_RUN_DIR="/var/run"
+ DEFAULT_DM_RUN_DIR=$RUN_DIR
fi
@@ -8682,7 +9486,7 @@ _ACEOF
if test "${with_default_run_dir+set}" = set; then :
withval=$with_default_run_dir; DEFAULT_RUN_DIR="$withval"
else
- DEFAULT_RUN_DIR="/var/run/lvm"
+ DEFAULT_RUN_DIR="$RUN_DIR/lvm"
fi
@@ -9910,6 +10714,44 @@ $as_echo "$ac_cv_flag_HAVE_WCLOBBERED" >&6; }
+
+ ac_save_CFLAGS=$CFLAGS
+ CFLAGS=-Wsync-nand
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -Wsync-nand flag" >&5
+$as_echo_n "checking whether $CC accepts -Wsync-nand flag... " >&6; }
+if ${ac_cv_flag_HAVE_WSYNCNAND+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_flag_HAVE_WSYNCNAND=yes
+else
+ ac_cv_flag_HAVE_WSYNCNAND=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_flag_HAVE_WSYNCNAND" >&5
+$as_echo "$ac_cv_flag_HAVE_WSYNCNAND" >&6; }
+ CFLAGS=$ac_save_CFLAGS
+ HAVE_WSYNCNAND=$ac_cv_flag_HAVE_WSYNCNAND
+ if test "HAVE_WSYNCNAND" = yes; then
+ :
+ else
+ :
+ fi
+
+
+
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C optimisation flag" >&5
$as_echo_n "checking for C optimisation flag... " >&6; }
@@ -10358,6 +11200,16 @@ fi
fi
################################################################################
+TESTSUITE_DATA='${datarootdir}/lvm2-testsuite'
+# double eval needed ${datarootdir} -> ${prefix}/share -> real path
+
+cat >>confdefs.h <<_ACEOF
+#define TESTSUITE_DATA "$(eval echo $(eval echo $TESTSUITE_DATA))"
+_ACEOF
+
+
+
+################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable valgrind awareness of pools" >&5
$as_echo_n "checking whether to enable valgrind awareness of pools... " >&6; }
# Check whether --enable-valgrind_pool was given.
@@ -10370,8 +11222,7 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $VALGRIND_POOL" >&5
$as_echo "$VALGRIND_POOL" >&6; }
-if test "$VALGRIND_POOL" = yes; then
- pkg_config_init
+pkg_config_init
pkg_failed=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for VALGRIND" >&5
@@ -10431,22 +11282,29 @@ fi
# Put the nasty error message in config.log where it belongs
echo "$VALGRIND_PKG_ERRORS" >&5
- as_fn_error $? "bailing out" "$LINENO" 5
+ if test x$VALGRIND_POOL = xyes; then as_fn_error $? "bailing out" "$LINENO" 5; fi
elif test $pkg_failed = untried; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- as_fn_error $? "bailing out" "$LINENO" 5
+ if test x$VALGRIND_POOL = xyes; then as_fn_error $? "bailing out" "$LINENO" 5; fi
else
VALGRIND_CFLAGS=$pkg_cv_VALGRIND_CFLAGS
VALGRIND_LIBS=$pkg_cv_VALGRIND_LIBS
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-
+ HAVE_VALGRIND=yes
fi
-$as_echo "#define VALGRIND_POOL 1" >>confdefs.h
+if test x$HAVE_VALGRIND = xyes; then
+
+$as_echo "#define HAVE_VALGRIND 1" >>confdefs.h
+
+fi
+
+if test x$VALGRIND_POOL = xyes; then
+$as_echo "#define VALGRIND_POOL 1" >>confdefs.h
fi
@@ -10480,7 +11338,281 @@ $as_echo "$LVMETAD" >&6; }
BUILD_LVMETAD=$LVMETAD
+################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmpolld" >&5
+$as_echo_n "checking whether to build lvmpolld... " >&6; }
+# Check whether --enable-lvmpolld was given.
+if test "${enable_lvmpolld+set}" = set; then :
+ enableval=$enable_lvmpolld; LVMPOLLD=$enableval
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $LVMPOLLD" >&5
+$as_echo "$LVMPOLLD" >&6; }
+
+BUILD_LVMPOLLD=$LVMPOLLD
+
+################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lockdsanlock" >&5
+$as_echo_n "checking whether to build lockdsanlock... " >&6; }
+# Check whether --enable-lockd-sanlock was given.
+if test "${enable_lockd_sanlock+set}" = set; then :
+ enableval=$enable_lockd_sanlock; LOCKDSANLOCK=$enableval
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $LOCKDSANLOCK" >&5
+$as_echo "$LOCKDSANLOCK" >&6; }
+
+BUILD_LOCKDSANLOCK=$LOCKDSANLOCK
+
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+
+$as_echo "#define LOCKDSANLOCK_SUPPORT 1" >>confdefs.h
+
+fi
+
+################################################################################
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LOCKD_SANLOCK" >&5
+$as_echo_n "checking for LOCKD_SANLOCK... " >&6; }
+
+if test -n "$LOCKD_SANLOCK_CFLAGS"; then
+ pkg_cv_LOCKD_SANLOCK_CFLAGS="$LOCKD_SANLOCK_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsanlock_client\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libsanlock_client") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LOCKD_SANLOCK_CFLAGS=`$PKG_CONFIG --cflags "libsanlock_client" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$LOCKD_SANLOCK_LIBS"; then
+ pkg_cv_LOCKD_SANLOCK_LIBS="$LOCKD_SANLOCK_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsanlock_client\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libsanlock_client") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LOCKD_SANLOCK_LIBS=`$PKG_CONFIG --libs "libsanlock_client" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ LOCKD_SANLOCK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsanlock_client" 2>&1`
+ else
+ LOCKD_SANLOCK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsanlock_client" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$LOCKD_SANLOCK_PKG_ERRORS" >&5
+
+ $bailout
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ $bailout
+else
+ LOCKD_SANLOCK_CFLAGS=$pkg_cv_LOCKD_SANLOCK_CFLAGS
+ LOCKD_SANLOCK_LIBS=$pkg_cv_LOCKD_SANLOCK_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ HAVE_LOCKD_SANLOCK=yes
+fi
+ BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lockddlm" >&5
+$as_echo_n "checking whether to build lockddlm... " >&6; }
+# Check whether --enable-lockd-dlm was given.
+if test "${enable_lockd_dlm+set}" = set; then :
+ enableval=$enable_lockd_dlm; LOCKDDLM=$enableval
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $LOCKDDLM" >&5
+$as_echo "$LOCKDDLM" >&6; }
+
+BUILD_LOCKDDLM=$LOCKDDLM
+
+if test "$BUILD_LOCKDDLM" = yes; then
+
+$as_echo "#define LOCKDDLM_SUPPORT 1" >>confdefs.h
+
+fi
+
+################################################################################
+if test "$BUILD_LOCKDDLM" = yes; then
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LOCKD_DLM" >&5
+$as_echo_n "checking for LOCKD_DLM... " >&6; }
+
+if test -n "$LOCKD_DLM_CFLAGS"; then
+ pkg_cv_LOCKD_DLM_CFLAGS="$LOCKD_DLM_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdlm\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libdlm") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LOCKD_DLM_CFLAGS=`$PKG_CONFIG --cflags "libdlm" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$LOCKD_DLM_LIBS"; then
+ pkg_cv_LOCKD_DLM_LIBS="$LOCKD_DLM_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libdlm\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libdlm") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LOCKD_DLM_LIBS=`$PKG_CONFIG --libs "libdlm" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ LOCKD_DLM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libdlm" 2>&1`
+ else
+ LOCKD_DLM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libdlm" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$LOCKD_DLM_PKG_ERRORS" >&5
+
+ $bailout
+elif test $pkg_failed = untried; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ $bailout
+else
+ LOCKD_DLM_CFLAGS=$pkg_cv_LOCKD_DLM_CFLAGS
+ LOCKD_DLM_LIBS=$pkg_cv_LOCKD_DLM_LIBS
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+ HAVE_LOCKD_DLM=yes
+fi
+ BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmlockd" >&5
+$as_echo_n "checking whether to build lvmlockd... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BUILD_LVMLOCKD" >&5
+$as_echo "$BUILD_LVMLOCKD" >&6; }
+
+if test "$BUILD_LVMLOCKD" = yes; then
+ if test -n "$BUILD_LVMPOLLD"; then :
+ BUILD_LVMPOLLD=yes; { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Enabling lvmpolld - required by lvmlockd." >&5
+$as_echo "$as_me: WARNING: Enabling lvmpolld - required by lvmlockd." >&2;}
+fi
+ if test -n "$BUILD_LVMETAD"; then :
+ BUILD_LVMETAD=yes; { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Enabling lvmetad - required by lvmlockd." >&5
+$as_echo "$as_me: WARNING: Enabling lvmetad - required by lvmlockd." >&2;}
+fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking defaults for use_lvmlockd" >&5
+$as_echo_n "checking defaults for use_lvmlockd... " >&6; }
+ # Check whether --enable-use_lvmlockd was given.
+if test "${enable_use_lvmlockd+set}" = set; then :
+ enableval=$enable_use_lvmlockd; case ${enableval} in
+ yes) DEFAULT_USE_LVMLOCKD=1 ;;
+ *) DEFAULT_USE_LVMLOCKD=0 ;;
+ esac
+else
+ DEFAULT_USE_LVMLOCKD=1
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DEFAULT_USE_LVMLOCKD" >&5
+$as_echo "$DEFAULT_USE_LVMLOCKD" >&6; }
+
+$as_echo "#define LVMLOCKD_SUPPORT 1" >>confdefs.h
+
+
+
+# Check whether --with-lvmlockd-pidfile was given.
+if test "${with_lvmlockd_pidfile+set}" = set; then :
+ withval=$with_lvmlockd_pidfile; LVMLOCKD_PIDFILE=$withval
+else
+ LVMLOCKD_PIDFILE="$DEFAULT_PID_DIR/lvmlockd.pid"
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define LVMLOCKD_PIDFILE "$LVMLOCKD_PIDFILE"
+_ACEOF
+
+else
+ DEFAULT_USE_LVMLOCKD=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_USE_LVMLOCKD $DEFAULT_USE_LVMLOCKD
+_ACEOF
+
+
+################################################################################
if test "$BUILD_LVMETAD" = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking defaults for use_lvmetad" >&5
+$as_echo_n "checking defaults for use_lvmetad... " >&6; }
+ # Check whether --enable-use_lvmetad was given.
+if test "${enable_use_lvmetad+set}" = set; then :
+ enableval=$enable_use_lvmetad; case ${enableval} in
+ yes) DEFAULT_USE_LVMETAD=1 ;;
+ *) DEFAULT_USE_LVMETAD=0 ;;
+ esac
+else
+ DEFAULT_USE_LVMETAD=1
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DEFAULT_USE_LVMETAD" >&5
+$as_echo "$DEFAULT_USE_LVMETAD" >&6; }
$as_echo "#define LVMETAD_SUPPORT 1" >>confdefs.h
@@ -10498,9 +11630,59 @@ cat >>confdefs.h <<_ACEOF
#define LVMETAD_PIDFILE "$LVMETAD_PIDFILE"
_ACEOF
+else
+ DEFAULT_USE_LVMETAD=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_USE_LVMETAD $DEFAULT_USE_LVMETAD
+_ACEOF
+
+
+################################################################################
+if test "$BUILD_LVMPOLLD" = yes; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking defaults for use_lvmpolld" >&5
+$as_echo_n "checking defaults for use_lvmpolld... " >&6; }
+ # Check whether --enable-use_lvmpolld was given.
+if test "${enable_use_lvmpolld+set}" = set; then :
+ enableval=$enable_use_lvmpolld; case ${enableval} in
+ yes) DEFAULT_USE_LVMPOLLD=1 ;;
+ *) DEFAULT_USE_LVMPOLLD=0 ;;
+ esac
+else
+ DEFAULT_USE_LVMPOLLD=1
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DEFAULT_USE_LVMPOLLD" >&5
+$as_echo "$DEFAULT_USE_LVMPOLLD" >&6; }
+
+$as_echo "#define LVMPOLLD_SUPPORT 1" >>confdefs.h
+
+
+
+# Check whether --with-lvmpolld-pidfile was given.
+if test "${with_lvmpolld_pidfile+set}" = set; then :
+ withval=$with_lvmpolld_pidfile; LVMPOLLD_PIDFILE=$withval
+else
+ LVMPOLLD_PIDFILE="$DEFAULT_PID_DIR/lvmpolld.pid"
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define LVMPOLLD_PIDFILE "$LVMPOLLD_PIDFILE"
+_ACEOF
+
+else
+ DEFAULT_USE_LVMPOLLD=0
fi
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_USE_LVMPOLLD $DEFAULT_USE_LVMPOLLD
+_ACEOF
+
+
################################################################################
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable libblkid detection of signatures when wiping" >&5
$as_echo_n "checking whether to enable libblkid detection of signatures when wiping... " >&6; }
# Check whether --enable-blkid_wiping was given.
@@ -10596,12 +11778,22 @@ $as_echo "yes" >&6; }
fi
if test "$BLKID_WIPING" = yes; then
BLKID_PC="blkid"
+ DEFAULT_USE_BLKID_WIPING=1
$as_echo "#define BLKID_WIPING_SUPPORT 1" >>confdefs.h
+ else
+ DEFAULT_USE_BLKID_WIPING=1
fi
+else
+ DEFAULT_USE_BLKID_WIPING=0
fi
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_USE_BLKID_WIPING $DEFAULT_USE_BLKID_WIPING
+_ACEOF
+
+
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use udev-systemd protocol for jobs in background" >&5
$as_echo_n "checking whether to use udev-systemd protocol for jobs in background... " >&6; }
@@ -10855,8 +12047,11 @@ else
fi
-if test "$DM_COMPAT" = yes; then
- as_fn_error $? "--enable-compat is not currently supported.
+if test "$DM_COMPAT" = yes; then :
+
+$as_echo "#define DM_COMPAT 1" >>confdefs.h
+
+ as_fn_error $? "--enable-compat is not currently supported.
Since device-mapper version 1.02.66, only one version (4) of the device-mapper
ioctl protocol is supported." "$LINENO" 5
fi
@@ -10882,6 +12077,11 @@ if test "${enable_ioctl+set}" = set; then :
enableval=$enable_ioctl; DM_IOCTLS=$enableval
fi
+if test "$DM_IOCTLS" = yes; then :
+
+$as_echo "#define DM_IOCTLS 1" >>confdefs.h
+
+fi
################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable O_DIRECT" >&5
@@ -11371,6 +12571,50 @@ if [ \( "$LVM1" = shared -o "$POOL" = shared -o "$CLUSTER" = shared \
fi
################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for log10 in -lm" >&5
+$as_echo_n "checking for log10 in -lm... " >&6; }
+if ${ac_cv_lib_m_log10+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char log10 ();
+int
+main ()
+{
+return log10 ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_m_log10=yes
+else
+ ac_cv_lib_m_log10=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_log10" >&5
+$as_echo "$ac_cv_lib_m_log10" >&6; }
+if test "x$ac_cv_lib_m_log10" = xyes; then :
+ M_LIBS="-lm"
+else
+ hard_bailout
+fi
+
+
+################################################################################
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_mutex_lock in -lpthread" >&5
$as_echo_n "checking for pthread_mutex_lock in -lpthread... " >&6; }
if ${ac_cv_lib_pthread_pthread_mutex_lock+:} false; then :
@@ -11603,10 +12847,47 @@ fi
$as_echo "#define HAVE_REALTIME 1" >>confdefs.h
LIBS="-lrt $LIBS"
+ RT_PC="librt"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Disabling realtime clock" >&5
$as_echo "$as_me: WARNING: Disabling realtime clock" >&2;}
fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $HAVE_REALTIME" >&5
+$as_echo "$HAVE_REALTIME" >&6; }
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for struct stat has st_ctim." >&5
+$as_echo_n "checking for struct stat has st_ctim.... " >&6; }
+if ${ac_cv_stat_st_ctim+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <sys/stat.h>
+long bar(void) { struct stat s; return (long)(s.st_ctim.tv_sec + s.st_ctim.tv_nsec);}
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_stat_st_ctim=yes
+else
+ ac_cv_stat_st_ctim=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_stat_st_ctim" >&5
+$as_echo "$ac_cv_stat_st_ctim" >&6; }
+
+if test $ac_cv_stat_st_ctim = yes; then :
+
+$as_echo "#define HAVE_STAT_ST_CTIM 1" >>confdefs.h
+
fi
################################################################################
@@ -11926,18 +13207,30 @@ else
MSGFMT="$ac_cv_path_MSGFMT"
fi
- if [ -z "$MSGFMT" ]; then
- as_fn_error $? "msgfmt not found in path $PATH" "$LINENO" 5
- fi
+
+ if test -z "$MSGFMT"; then :
+ as_fn_error $? "msgfmt not found in path $PATH" "$LINENO" 5
+fi
# Check whether --with-localedir was given.
if test "${with_localedir+set}" = set; then :
- withval=$with_localedir; LOCALEDIR=$withval
+ withval=$with_localedir; localedir=$withval
else
- LOCALEDIR='${prefix}/share/locale'
+ localedir=${localedir-'${datarootdir}/locale'}
fi
+
+cat >>confdefs.h <<_ACEOF
+#define INTL_PACKAGE "$INTL_PACKAGE"
+_ACEOF
+
+ # double eval needed ${datarootdir} -> ${prefix}/share -> real path
+
+cat >>confdefs.h <<_ACEOF
+#define LOCALEDIR "$(eval echo $(eval echo $localedir))"
+_ACEOF
+
fi
################################################################################
@@ -11950,6 +13243,11 @@ else
fi
+cat >>confdefs.h <<_ACEOF
+#define DEFAULT_ETC_DIR "$CONFDIR"
+_ACEOF
+
+
# Check whether --with-staticdir was given.
if test "${with_staticdir+set}" = set; then :
@@ -12041,6 +13339,147 @@ done
fi
+if test "$BUILD_CMIRRORD" = yes; then
+ for ac_func in atexit
+do :
+ ac_fn_c_check_func "$LINENO" "atexit" "ac_cv_func_atexit"
+if test "x$ac_cv_func_atexit" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_ATEXIT 1
+_ACEOF
+
+else
+ hard_bailout
+fi
+done
+
+fi
+
+if test "$BUILD_LVMLOCKD" = yes; then
+ for ac_func in clock_gettime strtoull
+do :
+ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+else
+ hard_bailout
+fi
+done
+
+fi
+
+if test "$BUILD_LVMPOLLD" = yes; then
+ for ac_func in strpbrk
+do :
+ ac_fn_c_check_func "$LINENO" "strpbrk" "ac_cv_func_strpbrk"
+if test "x$ac_cv_func_strpbrk" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_STRPBRK 1
+_ACEOF
+
+else
+ hard_bailout
+fi
+done
+
+ ac_fn_c_check_decl "$LINENO" "strerror_r" "ac_cv_have_decl_strerror_r" "$ac_includes_default"
+if test "x$ac_cv_have_decl_strerror_r" = xyes; then :
+ ac_have_decl=1
+else
+ ac_have_decl=0
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_R $ac_have_decl
+_ACEOF
+
+for ac_func in strerror_r
+do :
+ ac_fn_c_check_func "$LINENO" "strerror_r" "ac_cv_func_strerror_r"
+if test "x$ac_cv_func_strerror_r" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_STRERROR_R 1
+_ACEOF
+
+fi
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether strerror_r returns char *" >&5
+$as_echo_n "checking whether strerror_r returns char *... " >&6; }
+if ${ac_cv_func_strerror_r_char_p+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ ac_cv_func_strerror_r_char_p=no
+ if test $ac_cv_have_decl_strerror_r = yes; then
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+
+ char buf[100];
+ char x = *strerror_r (0, buf, sizeof buf);
+ char *p = strerror_r (0, buf, sizeof buf);
+ return !p || x;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_func_strerror_r_char_p=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ else
+ # strerror_r is not declared. Choose between
+ # systems that have relatively inaccessible declarations for the
+ # function. BeOS and DEC UNIX 4.0 fall in this category, but the
+ # former has a strerror_r that returns char*, while the latter
+ # has a strerror_r that returns `int'.
+ # This test should segfault on the DEC system.
+ if test "$cross_compiling" = yes; then :
+ :
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$ac_includes_default
+ extern char *strerror_r ();
+int
+main ()
+{
+char buf[100];
+ char x = *strerror_r (0, buf, sizeof buf);
+ return ! isalpha (x);
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+ ac_cv_func_strerror_r_char_p=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+ fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strerror_r_char_p" >&5
+$as_echo "$ac_cv_func_strerror_r_char_p" >&6; }
+if test $ac_cv_func_strerror_r_char_p = yes; then
+
+$as_echo "#define STRERROR_R_CHAR_P 1" >>confdefs.h
+
+fi
+
+fi
+
if test "$CLVMD" != none; then
for ac_header in mntent.h netdb.h netinet/in.h pthread.h search.h sys/mount.h sys/socket.h sys/uio.h sys/un.h utmpx.h
do :
@@ -12751,8 +14190,17 @@ LVM_LIBAPI=`echo "$VER" | $AWK -F '[()]' '{print $2}'`
+
+
+
+
+
+
+
+
+
################################################################################
-ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/lvmetad/Makefile conf/Makefile conf/example.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/replicator/Makefile lib/misc/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile"
+ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile lib/replicator/Makefile lib/misc/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@@ -13461,8 +14909,11 @@ do
"daemons/dmeventd/plugins/snapshot/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/dmeventd/plugins/snapshot/Makefile" ;;
"daemons/dmeventd/plugins/thin/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/dmeventd/plugins/thin/Makefile" ;;
"daemons/lvmetad/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmetad/Makefile" ;;
+ "daemons/lvmpolld/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmpolld/Makefile" ;;
+ "daemons/lvmlockd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/lvmlockd/Makefile" ;;
"conf/Makefile") CONFIG_FILES="$CONFIG_FILES conf/Makefile" ;;
"conf/example.conf") CONFIG_FILES="$CONFIG_FILES conf/example.conf" ;;
+ "conf/lvmlocal.conf") CONFIG_FILES="$CONFIG_FILES conf/lvmlocal.conf" ;;
"conf/command_profile_template.profile") CONFIG_FILES="$CONFIG_FILES conf/command_profile_template.profile" ;;
"conf/metadata_profile_template.profile") CONFIG_FILES="$CONFIG_FILES conf/metadata_profile_template.profile" ;;
"include/.symlinks") CONFIG_FILES="$CONFIG_FILES include/.symlinks" ;;
@@ -13503,6 +14954,11 @@ do
"scripts/lvm2_lvmetad_init_red_hat") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmetad_init_red_hat" ;;
"scripts/lvm2_lvmetad_systemd_red_hat.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmetad_systemd_red_hat.service" ;;
"scripts/lvm2_lvmetad_systemd_red_hat.socket") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmetad_systemd_red_hat.socket" ;;
+ "scripts/lvm2_lvmpolld_init_red_hat") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmpolld_init_red_hat" ;;
+ "scripts/lvm2_lvmpolld_systemd_red_hat.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmpolld_systemd_red_hat.service" ;;
+ "scripts/lvm2_lvmpolld_systemd_red_hat.socket") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmpolld_systemd_red_hat.socket" ;;
+ "scripts/lvm2_lvmlockd_systemd_red_hat.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmlockd_systemd_red_hat.service" ;;
+ "scripts/lvm2_lvmlocking_systemd_red_hat.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_lvmlocking_systemd_red_hat.service" ;;
"scripts/lvm2_monitoring_init_red_hat") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_monitoring_init_red_hat" ;;
"scripts/lvm2_monitoring_systemd_red_hat.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_monitoring_systemd_red_hat.service" ;;
"scripts/lvm2_pvscan_systemd_red_hat@.service") CONFIG_FILES="$CONFIG_FILES scripts/lvm2_pvscan_systemd_red_hat@.service" ;;
@@ -14109,14 +15565,22 @@ $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi
-test -n "$THIN_CONFIGURE_WARN" && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Support for thin provisioning is limited since some thin provisioning tools are missing!" >&5
+if test -n "$THIN_CONFIGURE_WARN"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Support for thin provisioning is limited since some thin provisioning tools are missing!" >&5
$as_echo "$as_me: WARNING: Support for thin provisioning is limited since some thin provisioning tools are missing!" >&2;}
+fi
-test -n "$THIN_CHECK_VERSION_WARN" && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning" >&5
+if test -n "$THIN_CHECK_VERSION_WARN"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning" >&5
$as_echo "$as_me: WARNING: You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning" >&2;}
+fi
-test -n "$CACHE_CONFIGURE_WARN" && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Support for cache is limited since some cache tools are missing!" >&5
+if test -n "$CACHE_CONFIGURE_WARN"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Support for cache is limited since some cache tools are missing!" >&5
$as_echo "$as_me: WARNING: Support for cache is limited since some cache tools are missing!" >&2;}
+fi
-test "$ODIRECT" = yes || { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: O_DIRECT disabled: low-memory pvmove may lock up" >&5
+if test "$ODIRECT" != yes; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: O_DIRECT disabled: low-memory pvmove may lock up" >&5
$as_echo "$as_me: WARNING: O_DIRECT disabled: low-memory pvmove may lock up" >&2;}
+fi
diff --git a/configure.in b/configure.in
index 107bb293b..2c89d8c6a 100644
--- a/configure.in
+++ b/configure.in
@@ -1,6 +1,6 @@
###############################################################################
## Copyright (C) 2000-2004 Sistina Software, Inc. All rights reserved.
-## Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+## Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
##
## This copyrighted material is made available to anyone wishing to use,
## modify, copy, or redistribute it subject to the terms and conditions
@@ -26,10 +26,9 @@ AC_CONFIG_AUX_DIR(autoconf)
dnl -- Get system type
AC_CANONICAL_TARGET([])
+AS_IF([test -z "$CFLAGS"], [COPTIMISE_FLAG="-O2"])
case "$host_os" in
linux*)
- CFLAGS="$CFLAGS"
- COPTIMISE_FLAG="-O2"
CLDFLAGS="$CLDFLAGS -Wl,--version-script,.export.sym"
ELDFLAGS="-Wl,--export-dynamic"
# FIXME Generate list and use --dynamic-list=.dlopen.sym
@@ -39,6 +38,10 @@ case "$host_os" in
LIB_SUFFIX=so
DEVMAPPER=yes
LVMETAD=no
+ LVMPOLLD=no
+ LVMLOCKD=no
+ LOCKDSANLOCK=no
+ LOCKDDLM=no
ODIRECT=yes
DM_IOCTLS=yes
SELINUX=yes
@@ -48,7 +51,6 @@ case "$host_os" in
;;
darwin*)
CFLAGS="$CFLAGS -no-cpp-precomp -fno-common"
- COPTIMISE_FLAG="-O2"
CLDFLAGS="$CLDFLAGS"
ELDFLAGS=
CLDWHOLEARCHIVE="-all_load"
@@ -68,7 +70,12 @@ esac
dnl -- Checks for programs.
AC_PROG_SED
AC_PROG_AWK
+save_CFLAGS=$CFLAGS
+save_CXXFLAGS=$CXXFLAGS
AC_PROG_CC
+AC_PROG_CXX
+CFLAGS=$save_CFLAGS
+CXXFLAGS=$save_CXXFLAGS
dnl probably no longer needed in 2008, but...
AC_PROG_GCC_TRADITIONAL
@@ -84,14 +91,19 @@ AC_PATH_TOOL(CSCOPE_CMD, cscope)
dnl -- Check for header files.
AC_HEADER_DIRENT
AC_HEADER_MAJOR
+AC_HEADER_STDBOOL
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
AC_HEADER_TIME
-AC_CHECK_HEADERS([locale.h stddef.h syslog.h sys/file.h sys/time.h assert.h \
- langinfo.h libgen.h signal.h sys/mman.h sys/resource.h sys/utsname.h \
- sys/wait.h time.h], ,
- [AC_MSG_ERROR(bailing out)])
+AC_CHECK_HEADERS([assert.h ctype.h dirent.h errno.h fcntl.h float.h \
+ getopt.h inttypes.h langinfo.h libgen.h limits.h locale.h paths.h \
+ signal.h stdarg.h stddef.h stdio.h stdlib.h string.h sys/file.h \
+ sys/ioctl.h syslog.h sys/mman.h sys/param.h sys/resource.h sys/stat.h \
+ sys/time.h sys/types.h sys/utsname.h sys/wait.h time.h \
+ unistd.h], , [AC_MSG_ERROR(bailing out)])
+
+AC_CHECK_HEADERS(termios.h sys/statvfs.h sys/timerfd.h)
case "$host_os" in
linux*)
@@ -100,16 +112,13 @@ case "$host_os" in
AC_CHECK_HEADERS(machine/endian.h sys/disk.h,,AC_MSG_ERROR(bailing out)) ;;
esac
-AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h getopt.h inttypes.h limits.h \
- stdarg.h stdio.h stdlib.h string.h sys/ioctl.h sys/param.h sys/stat.h \
- sys/types.h unistd.h], , [AC_MSG_ERROR(bailing out)])
-AC_CHECK_HEADERS(termios.h sys/statvfs.h)
-
################################################################################
dnl -- Check for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_C_INLINE
AC_CHECK_MEMBERS([struct stat.st_rdev])
+AC_CHECK_TYPES([ptrdiff_t])
+AC_STRUCT_TM
AC_TYPE_OFF_T
AC_TYPE_PID_T
AC_TYPE_SIGNAL
@@ -125,15 +134,13 @@ AC_TYPE_UINT8_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
-AC_CHECK_MEMBERS([struct stat.st_rdev])
-AC_STRUCT_TM
################################################################################
dnl -- Check for functions
-AC_CHECK_FUNCS([ftruncate gethostname getpagesize \
- gettimeofday memset mkdir mkfifo rmdir munmap nl_langinfo setenv setlocale \
- strcasecmp strchr strcspn strspn strdup strncasecmp strerror strrchr \
- strstr strtol strtoul uname], , [AC_MSG_ERROR(bailing out)])
+AC_CHECK_FUNCS([ftruncate gethostname getpagesize gettimeofday localtime_r \
+ memchr memset mkdir mkfifo munmap nl_langinfo realpath rmdir setenv \
+ setlocale strcasecmp strchr strcspn strdup strerror strncasecmp strndup \
+ strrchr strspn strstr strtol strtoul uname], , [AC_MSG_ERROR(bailing out)])
AC_FUNC_ALLOCA
AC_FUNC_CLOSEDIR_VOID
AC_FUNC_CHOWN
@@ -141,6 +148,7 @@ AC_FUNC_FORK
AC_FUNC_LSTAT
AC_FUNC_MALLOC
AC_FUNC_MEMCMP
+AC_FUNC_MKTIME
AC_FUNC_MMAP
AC_FUNC_REALLOC
AC_FUNC_STAT
@@ -167,6 +175,9 @@ AC_SUBST(HAVE_FULL_RELRO)
################################################################################
dnl -- Prefix is /usr by default, the exec_prefix default is setup later
AC_PREFIX_DEFAULT(/usr)
+if test "$prefix" = NONE; then
+ datarootdir=${ac_default_prefix}/share
+fi
################################################################################
dnl -- Setup the ownership of the files
@@ -197,6 +208,7 @@ AC_ARG_WITH(device-uid,
[set the owner used for new device nodes [UID=0]]),
DM_DEVICE_UID=$withval, DM_DEVICE_UID=0)
AC_MSG_RESULT($DM_DEVICE_UID)
+AC_DEFINE_UNQUOTED([DM_DEVICE_UID], [$DM_DEVICE_UID], [Define default owner for device node])
################################################################################
dnl -- Setup device group ownership
@@ -207,6 +219,7 @@ AC_ARG_WITH(device-gid,
[set the group used for new device nodes [GID=0]]),
DM_DEVICE_GID=$withval, DM_DEVICE_GID=0)
AC_MSG_RESULT($DM_DEVICE_GID)
+AC_DEFINE_UNQUOTED([DM_DEVICE_GID], [$DM_DEVICE_GID], [Define default group for device node])
################################################################################
dnl -- Setup device mode
@@ -217,6 +230,7 @@ AC_ARG_WITH(device-mode,
[set the mode used for new device nodes [MODE=0600]]),
DM_DEVICE_MODE=$withval, DM_DEVICE_MODE=0600)
AC_MSG_RESULT($DM_DEVICE_MODE)
+AC_DEFINE_UNQUOTED([DM_DEVICE_MODE], [$DM_DEVICE_MODE], [Define default mode for device node])
AC_MSG_CHECKING(when to create device nodes)
AC_ARG_WITH(device-nodes-on,
@@ -256,8 +270,13 @@ AC_ARG_ENABLE(lvm1_fallback,
AC_MSG_RESULT($LVM1_FALLBACK)
if test "$LVM1_FALLBACK" = yes; then
+ DEFAULT_FALLBACK_TO_LVM1=1
AC_DEFINE([LVM1_FALLBACK], 1, [Define to 1 if 'lvm' should fall back to using LVM1 binaries if device-mapper is missing from the kernel])
+else
+ DEFAULT_FALLBACK_TO_LVM1=0
fi
+AC_DEFINE_UNQUOTED(DEFAULT_FALLBACK_TO_LVM1, [$DEFAULT_FALLBACK_TO_LVM1],
+ [Fall back to LVM1 by default if device-mapper is missing from the kernel.])
################################################################################
dnl -- format1 inclusion type
@@ -353,7 +372,7 @@ AC_ARG_WITH(default-mirror-segtype,
AC_HELP_STRING([--with-default-mirror-segtype=TYPE],
[default mirror segtype: raid1/mirror [raid1]]),
DEFAULT_MIRROR_SEGTYPE=$withval, DEFAULT_MIRROR_SEGTYPE="raid1")
-AC_ARG_WITH(default-raid10r-segtype,
+AC_ARG_WITH(default-raid10-segtype,
AC_HELP_STRING([--with-default-raid10-segtype=TYPE],
[default mirror segtype: raid10/mirror [raid10]]),
DEFAULT_RAID10_SEGTYPE=$withval, DEFAULT_RAID10_SEGTYPE="raid10")
@@ -545,6 +564,12 @@ case "$CACHE" in
*) AC_MSG_ERROR([--with-cache parameter invalid]) ;;
esac
+dnl -- cache_check needs-check flag
+AC_ARG_ENABLE(cache_check_needs_check,
+ AC_HELP_STRING([--disable-cache_check_needs_check],
+ [required if cache_check version is < 0.5]),
+ CACHE_CHECK_NEEDS_CHECK=$enableval, CACHE_CHECK_NEEDS_CHECK=yes)
+
# Test if necessary cache tools are available
# if not - use plain defaults and warn user
case "$CACHE" in
@@ -558,6 +583,28 @@ case "$CACHE" in
CACHE_CONFIGURE_WARN=y
fi
fi
+ if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+ $CACHE_CHECK_CMD -V 2>/dev/null >conftest.tmp
+ read -r CACHE_CHECK_VSN < conftest.tmp
+ IFS=. read -r CACHE_CHECK_VSN_MAJOR CACHE_CHECK_VSN_MINOR CACHE_CHECK_VSN_PATCH < conftest.tmp
+ rm -f conftest.tmp
+
+ # Require version >= 0.5.4 for --clear-needs-check-flag
+ if test -z "$CACHE_CHECK_VSN_MAJOR" \
+ || test -z "$CACHE_CHECK_VSN_MINOR" \
+ || test -z "$CACHE_CHECK_VSN_PATCH"; then
+ AC_MSG_WARN([$CACHE_CHECK_CMD: Bad version "$CACHE_CHECK_VSN" found])
+ CACHE_CHECK_VERSION_WARN=y
+ CACHE_CHECK_NEEDS_CHECK=no
+ elif test "$CACHE_CHECK_VSN_MAJOR" -eq 0 ; then
+ if test "$CACHE_CHECK_VSN_MINOR" -lt 5 \
+ || test "$CACHE_CHECK_VSN_MINOR" -eq 5 -a "$CACHE_CHECK_VSN_PATCH" -lt 4; then
+ AC_MSG_WARN([$CACHE_CHECK_CMD: Old version "$CACHE_CHECK_VSN" found])
+ CACHE_CHECK_VERSION_WARN=y
+ CACHE_CHECK_NEEDS_CHECK=no
+ fi
+ fi
+ fi
# Empty means a config way to ignore cache dumping
if test "$CACHE_DUMP_CMD" = "autodetect"; then
AC_PATH_TOOL(CACHE_DUMP_CMD, cache_dump)
@@ -585,6 +632,12 @@ case "$CACHE" in
CACHE_CONFIGURE_WARN=y
}
fi
+
+ AC_MSG_CHECKING([whether cache_check supports the needs-check flag])
+ AC_MSG_RESULT([$CACHE_CHECK_NEEDS_CHECK])
+ if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+ AC_DEFINE([CACHE_CHECK_NEEDS_CHECK], 1, [Define to 1 if the external 'cache_check' tool requires the --clear-needs-check-flag option])
+ fi
;;
esac
@@ -613,8 +666,8 @@ AC_MSG_RESULT($READLINE)
dnl -- Disable realtime clock support
AC_MSG_CHECKING(whether to enable realtime support)
AC_ARG_ENABLE(realtime,
- AC_HELP_STRING([--enable-realtime], [enable realtime clock support]),
- REALTIME=$enableval)
+ AC_HELP_STRING([--disable-realtime], [disable realtime clock support]),
+ REALTIME=$enableval, REALTIME=yes)
AC_MSG_RESULT($REALTIME)
################################################################################
@@ -644,28 +697,32 @@ pkg_config_init() {
}
################################################################################
+AC_MSG_CHECKING(for default run directory)
+RUN_DIR="/run"
+test -d "/run" || RUN_DIR="/var/run"
+AC_MSG_RESULT($RUN_DIR)
dnl -- Set up pidfile and run directory
AH_TEMPLATE(DEFAULT_PID_DIR)
AC_ARG_WITH(default-pid-dir,
AC_HELP_STRING([--with-default-pid-dir=PID_DIR],
- [Default directory to keep PID files in. [/var/run]]),
- DEFAULT_PID_DIR="$withval", DEFAULT_PID_DIR="/var/run")
+ [Default directory to keep PID files in. [autodetect]]),
+ DEFAULT_PID_DIR="$withval", DEFAULT_PID_DIR=$RUN_DIR)
AC_DEFINE_UNQUOTED(DEFAULT_PID_DIR, ["$DEFAULT_PID_DIR"],
[Default directory to keep PID files in.])
AH_TEMPLATE(DEFAULT_DM_RUN_DIR, [Name of default DM run directory.])
AC_ARG_WITH(default-dm-run-dir,
AC_HELP_STRING([--with-default-dm-run-dir=DM_RUN_DIR],
- [ Default DM run directory. [/var/run]]),
- DEFAULT_DM_RUN_DIR="$withval", DEFAULT_DM_RUN_DIR="/var/run")
+ [ Default DM run directory. [autodetect]]),
+ DEFAULT_DM_RUN_DIR="$withval", DEFAULT_DM_RUN_DIR=$RUN_DIR)
AC_DEFINE_UNQUOTED(DEFAULT_DM_RUN_DIR, ["$DEFAULT_DM_RUN_DIR"],
[Default DM run directory.])
AH_TEMPLATE(DEFAULT_RUN_DIR, [Name of default LVM run directory.])
AC_ARG_WITH(default-run-dir,
AC_HELP_STRING([--with-default-run-dir=RUN_DIR],
- [Default LVM run directory. [/var/run/lvm]]),
- DEFAULT_RUN_DIR="$withval", DEFAULT_RUN_DIR="/var/run/lvm")
+ [Default LVM run directory. [autodetect_run_dir/lvm]]),
+ DEFAULT_RUN_DIR="$withval", DEFAULT_RUN_DIR="$RUN_DIR/lvm")
AC_DEFINE_UNQUOTED(DEFAULT_RUN_DIR, ["$DEFAULT_RUN_DIR"],
[Default LVM run directory.])
@@ -961,6 +1018,8 @@ AC_TRY_CCFLAG([-Wjump-misses-init], [HAVE_WJUMP], [], [])
AC_SUBST(HAVE_WJUMP)
AC_TRY_CCFLAG([-Wclobbered], [HAVE_WCLOBBERED], [], [])
AC_SUBST(HAVE_WCLOBBERED)
+AC_TRY_CCFLAG([-Wsync-nand], [HAVE_WSYNCNAND], [], [])
+AC_SUBST(HAVE_WSYNCNAND)
################################################################################
dnl -- Override optimisation
@@ -1014,6 +1073,13 @@ if test "$TESTING" = yes; then
fi
################################################################################
+dnl -- Set LVM2 testsuite data
+TESTSUITE_DATA='${datarootdir}/lvm2-testsuite'
+# double eval needed ${datarootdir} -> ${prefix}/share -> real path
+AC_DEFINE_UNQUOTED(TESTSUITE_DATA, ["$(eval echo $(eval echo $TESTSUITE_DATA))"], [Path to testsuite data])
+
+
+################################################################################
dnl -- Enable valgrind awareness of memory pools
AC_MSG_CHECKING(whether to enable valgrind awareness of pools)
AC_ARG_ENABLE(valgrind_pool,
@@ -1022,12 +1088,16 @@ AC_ARG_ENABLE(valgrind_pool,
VALGRIND_POOL=$enableval, VALGRIND_POOL=no)
AC_MSG_RESULT($VALGRIND_POOL)
-if test "$VALGRIND_POOL" = yes; then
- pkg_config_init
- PKG_CHECK_MODULES(VALGRIND, valgrind, [], [AC_MSG_ERROR(bailing out)])
+pkg_config_init
+PKG_CHECK_MODULES(VALGRIND, valgrind, [HAVE_VALGRIND=yes], [if test x$VALGRIND_POOL = xyes; then AC_MSG_ERROR(bailing out); fi])
+AC_SUBST(VALGRIND_CFLAGS)
+
+if test x$HAVE_VALGRIND = xyes; then
+ AC_DEFINE([HAVE_VALGRIND], 1, [valgrind.h found])
+fi
+
+if test x$VALGRIND_POOL = xyes; then
AC_DEFINE([VALGRIND_POOL], 1, [Enable a valgrind aware build of pool])
- AC_SUBST(VALGRIND_POOL)
- AC_SUBST(VALGRIND_CFLAGS)
fi
################################################################################
@@ -1054,7 +1124,106 @@ AC_MSG_RESULT($LVMETAD)
BUILD_LVMETAD=$LVMETAD
+################################################################################
+dnl -- Build lvmpolld
+AC_MSG_CHECKING(whether to build lvmpolld)
+AC_ARG_ENABLE(lvmpolld,
+ AC_HELP_STRING([--enable-lvmpolld],
+ [enable the LVM Polling Daemon]),
+ LVMPOLLD=$enableval)
+AC_MSG_RESULT($LVMPOLLD)
+
+BUILD_LVMPOLLD=$LVMPOLLD
+
+################################################################################
+dnl -- Build lockdsanlock
+AC_MSG_CHECKING(whether to build lockdsanlock)
+AC_ARG_ENABLE(lockd-sanlock,
+ AC_HELP_STRING([--enable-lockd-sanlock],
+ [enable the LVM lock daemon using sanlock]),
+ LOCKDSANLOCK=$enableval)
+AC_MSG_RESULT($LOCKDSANLOCK)
+
+BUILD_LOCKDSANLOCK=$LOCKDSANLOCK
+
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+ AC_DEFINE([LOCKDSANLOCK_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd sanlock option.])
+fi
+
+################################################################################
+dnl -- Look for sanlock libraries
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+ PKG_CHECK_MODULES(LOCKD_SANLOCK, libsanlock_client, [HAVE_LOCKD_SANLOCK=yes], $bailout)
+ BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+dnl -- Build lockddlm
+AC_MSG_CHECKING(whether to build lockddlm)
+AC_ARG_ENABLE(lockd-dlm,
+ AC_HELP_STRING([--enable-lockd-dlm],
+ [enable the LVM lock daemon using dlm]),
+ LOCKDDLM=$enableval)
+AC_MSG_RESULT($LOCKDDLM)
+
+BUILD_LOCKDDLM=$LOCKDDLM
+
+if test "$BUILD_LOCKDDLM" = yes; then
+ AC_DEFINE([LOCKDDLM_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd dlm option.])
+fi
+
+################################################################################
+dnl -- Look for dlm libraries
+if test "$BUILD_LOCKDDLM" = yes; then
+ PKG_CHECK_MODULES(LOCKD_DLM, libdlm, [HAVE_LOCKD_DLM=yes], $bailout)
+ BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+dnl -- Build lvmlockd
+
+AC_MSG_CHECKING(whether to build lvmlockd)
+AC_MSG_RESULT($BUILD_LVMLOCKD)
+
+if test "$BUILD_LVMLOCKD" = yes; then
+ AS_IF([test -n "$BUILD_LVMPOLLD"], [BUILD_LVMPOLLD=yes; AC_MSG_WARN([Enabling lvmpolld - required by lvmlockd.])])
+ AS_IF([test -n "$BUILD_LVMETAD"], [BUILD_LVMETAD=yes; AC_MSG_WARN([Enabling lvmetad - required by lvmlockd.])])
+ AC_MSG_CHECKING([defaults for use_lvmlockd])
+ AC_ARG_ENABLE(use_lvmlockd,
+ AC_HELP_STRING([--disable-use-lvmlockd],
+ [disable usage of LVM lock daemon]),
+ [case ${enableval} in
+ yes) DEFAULT_USE_LVMLOCKD=1 ;;
+ *) DEFAULT_USE_LVMLOCKD=0 ;;
+ esac], DEFAULT_USE_LVMLOCKD=1)
+ AC_MSG_RESULT($DEFAULT_USE_LVMLOCKD)
+ AC_DEFINE([LVMLOCKD_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd.])
+
+ AC_ARG_WITH(lvmlockd-pidfile,
+ AC_HELP_STRING([--with-lvmlockd-pidfile=PATH],
+ [lvmlockd pidfile [PID_DIR/lvmlockd.pid]]),
+ LVMLOCKD_PIDFILE=$withval,
+ LVMLOCKD_PIDFILE="$DEFAULT_PID_DIR/lvmlockd.pid")
+ AC_DEFINE_UNQUOTED(LVMLOCKD_PIDFILE, ["$LVMLOCKD_PIDFILE"],
+ [Path to lvmlockd pidfile.])
+else
+ DEFAULT_USE_LVMLOCKD=0
+fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMLOCKD, [$DEFAULT_USE_LVMLOCKD],
+ [Use lvmlockd by default.])
+
+################################################################################
+dnl -- Check lvmetad
if test "$BUILD_LVMETAD" = yes; then
+ AC_MSG_CHECKING([defaults for use_lvmetad])
+ AC_ARG_ENABLE(use_lvmetad,
+ AC_HELP_STRING([--disable-use-lvmetad],
+ [disable usage of LVM Metadata Daemon]),
+ [case ${enableval} in
+ yes) DEFAULT_USE_LVMETAD=1 ;;
+ *) DEFAULT_USE_LVMETAD=0 ;;
+ esac], DEFAULT_USE_LVMETAD=1)
+ AC_MSG_RESULT($DEFAULT_USE_LVMETAD)
AC_DEFINE([LVMETAD_SUPPORT], 1, [Define to 1 to include code that uses lvmetad.])
AC_ARG_WITH(lvmetad-pidfile,
@@ -1064,9 +1233,41 @@ if test "$BUILD_LVMETAD" = yes; then
LVMETAD_PIDFILE="$DEFAULT_PID_DIR/lvmetad.pid")
AC_DEFINE_UNQUOTED(LVMETAD_PIDFILE, ["$LVMETAD_PIDFILE"],
[Path to lvmetad pidfile.])
+else
+ DEFAULT_USE_LVMETAD=0
fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMETAD, [$DEFAULT_USE_LVMETAD],
+ [Use lvmetad by default.])
+
+################################################################################
+dnl -- Check lvmpolld
+if test "$BUILD_LVMPOLLD" = yes; then
+ AC_MSG_CHECKING([defaults for use_lvmpolld])
+ AC_ARG_ENABLE(use_lvmpolld,
+ AC_HELP_STRING([--disable-use-lvmpolld],
+ [disable usage of LVM Poll Daemon]),
+ [case ${enableval} in
+ yes) DEFAULT_USE_LVMPOLLD=1 ;;
+ *) DEFAULT_USE_LVMPOLLD=0 ;;
+ esac], DEFAULT_USE_LVMPOLLD=1)
+ AC_MSG_RESULT($DEFAULT_USE_LVMPOLLD)
+ AC_DEFINE([LVMPOLLD_SUPPORT], 1, [Define to 1 to include code that uses lvmpolld.])
+
+ AC_ARG_WITH(lvmpolld-pidfile,
+ AC_HELP_STRING([--with-lvmpolld-pidfile=PATH],
+ [lvmpolld pidfile [PID_DIR/lvmpolld.pid]]),
+ LVMPOLLD_PIDFILE=$withval,
+ LVMPOLLD_PIDFILE="$DEFAULT_PID_DIR/lvmpolld.pid")
+ AC_DEFINE_UNQUOTED(LVMPOLLD_PIDFILE, ["$LVMPOLLD_PIDFILE"],
+ [Path to lvmpolld pidfile.])
+else
+ DEFAULT_USE_LVMPOLLD=0
+fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMPOLLD, [$DEFAULT_USE_LVMPOLLD],
+ [Use lvmpolld by default.])
################################################################################
+
dnl -- Enable blkid wiping functionality
AC_MSG_CHECKING(whether to enable libblkid detection of signatures when wiping)
AC_ARG_ENABLE(blkid_wiping,
@@ -1086,9 +1287,16 @@ if test "$BLKID_WIPING" != no; then
fi])
if test "$BLKID_WIPING" = yes; then
BLKID_PC="blkid"
+ DEFAULT_USE_BLKID_WIPING=1
AC_DEFINE([BLKID_WIPING_SUPPORT], 1, [Define to 1 to use libblkid detection of signatures when wiping.])
+ else
+ DEFAULT_USE_BLKID_WIPING=1
fi
+else
+ DEFAULT_USE_BLKID_WIPING=0
fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_BLKID_WIPING, [$DEFAULT_USE_BLKID_WIPING],
+ [Use blkid wiping by default.])
################################################################################
dnl -- Enable udev-systemd protocol to instantiate a service for background jobs
@@ -1161,11 +1369,11 @@ AC_ARG_ENABLE(compat,
[enable support for old device-mapper versions]),
DM_COMPAT=$enableval, DM_COMPAT=no)
-if test "$DM_COMPAT" = yes; then
- AC_MSG_ERROR([--enable-compat is not currently supported.
+AS_IF([test "$DM_COMPAT" = yes],
+ [AC_DEFINE([DM_COMPAT], 1, [Define to enable compat protocol])
+ AC_MSG_ERROR([--enable-compat is not currently supported.
Since device-mapper version 1.02.66, only one version (4) of the device-mapper
-ioctl protocol is supported.])
-fi
+ioctl protocol is supported.])])
################################################################################
dnl -- Compatible units suffix mode
@@ -1185,6 +1393,8 @@ AC_ARG_ENABLE(ioctl,
AC_HELP_STRING([--disable-ioctl],
[disable ioctl calls to device-mapper in the kernel]),
DM_IOCTLS=$enableval)
+AS_IF([test "$DM_IOCTLS" = yes],
+ [AC_DEFINE([DM_IOCTLS], 1, [Define to enable ioctls calls to kernel])])
################################################################################
dnl -- Disable O_DIRECT
@@ -1328,6 +1538,10 @@ if [[ \( "$LVM1" = shared -o "$POOL" = shared -o "$CLUSTER" = shared \
fi
################################################################################
+AC_CHECK_LIB(m, log10,
+ [M_LIBS="-lm"], hard_bailout)
+
+################################################################################
AC_CHECK_LIB([pthread], [pthread_mutex_lock],
[PTHREAD_LIBS="-lpthread"], hard_bailout)
@@ -1367,11 +1581,25 @@ if test "$REALTIME" = yes; then
if test "$HAVE_REALTIME" = yes; then
AC_DEFINE([HAVE_REALTIME], 1, [Define to 1 to include support for realtime clock.])
LIBS="-lrt $LIBS"
+ RT_PC="librt"
else
AC_MSG_WARN(Disabling realtime clock)
fi
+ AC_MSG_RESULT($HAVE_REALTIME)
fi
+dnl Check if the system has struct stat st_ctim.
+AC_CACHE_CHECK([for struct stat has st_ctim.],
+ [ac_cv_stat_st_ctim],
+ [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+[#include <sys/stat.h>
+long bar(void) { struct stat s; return (long)(s.st_ctim.tv_sec + s.st_ctim.tv_nsec);}]
+ )], [ac_cv_stat_st_ctim=yes], [ac_cv_stat_st_ctim=no])])
+
+AC_IF_YES(ac_cv_stat_st_ctim,
+ AC_DEFINE(HAVE_STAT_ST_CTIM, 1,
+ [Define if struct stat has a field st_ctim with timespec for ctime]))
+
################################################################################
dnl -- Check for getopt
AC_CHECK_HEADERS(getopt.h, AC_DEFINE([HAVE_GETOPTLONG], 1, [Define to 1 if getopt_long is available.]))
@@ -1431,14 +1659,16 @@ if test "$INTL" = yes; then
# FIXME - Move this - can be device-mapper too
INTL_PACKAGE="lvm2"
AC_PATH_TOOL(MSGFMT, msgfmt)
- if [[ -z "$MSGFMT" ]]; then
- AC_MSG_ERROR([msgfmt not found in path $PATH])
- fi
+
+ AS_IF([test -z "$MSGFMT"], [AC_MSG_ERROR([msgfmt not found in path $PATH])])
AC_ARG_WITH(localedir,
AC_HELP_STRING([--with-localedir=DIR],
- [translation files in DIR [PREFIX/share/locale]]),
- LOCALEDIR=$withval, LOCALEDIR='${prefix}/share/locale')
+ [locale-dependent data [DATAROOTDIR/locale]]),
+ localedir=$withval, localedir=${localedir-'${datarootdir}/locale'})
+ AC_DEFINE_UNQUOTED([INTL_PACKAGE], ["$INTL_PACKAGE"], [Internalization package])
+ # double eval needed ${datarootdir} -> ${prefix}/share -> real path
+ AC_DEFINE_UNQUOTED([LOCALEDIR], ["$(eval echo $(eval echo $localedir))"], [Locale-dependent data])
fi
################################################################################
@@ -1447,6 +1677,8 @@ AC_ARG_WITH(confdir,
AC_HELP_STRING([--with-confdir=DIR],
[configuration files in DIR [/etc]]),
CONFDIR=$withval, CONFDIR='/etc')
+AC_DEFINE_UNQUOTED(DEFAULT_ETC_DIR, ["$CONFDIR"],
+ [Default system configuration directory.])
AC_ARG_WITH(staticdir,
AC_HELP_STRING([--with-staticdir=DIR],
@@ -1501,6 +1733,19 @@ if test "$READLINE" = yes; then
AC_CHECK_HEADERS(readline/readline.h readline/history.h,,hard_bailout)
fi
+if test "$BUILD_CMIRRORD" = yes; then
+ AC_CHECK_FUNCS(atexit,,hard_bailout)
+fi
+
+if test "$BUILD_LVMLOCKD" = yes; then
+ AC_CHECK_FUNCS(clock_gettime strtoull,,hard_bailout)
+fi
+
+if test "$BUILD_LVMPOLLD" = yes; then
+ AC_CHECK_FUNCS(strpbrk,,hard_bailout)
+ AC_FUNC_STRERROR_R
+fi
+
if test "$CLVMD" != none; then
AC_CHECK_HEADERS(mntent.h netdb.h netinet/in.h pthread.h search.h sys/mount.h sys/socket.h sys/uio.h sys/un.h utmpx.h,,AC_MSG_ERROR(bailing out))
AC_CHECK_FUNCS(dup2 getmntent memmove select socket,,hard_bailout)
@@ -1658,10 +1903,13 @@ LVM_LIBAPI=`echo "$VER" | $AWK -F '[[()]]' '{print $2}'`
AC_SUBST(APPLIB)
AC_SUBST(AWK)
AC_SUBST(BLKID_PC)
-AC_SUBST(BLKID_WIPING)
AC_SUBST(BUILD_CMIRRORD)
AC_SUBST(BUILD_DMEVENTD)
AC_SUBST(BUILD_LVMETAD)
+AC_SUBST(BUILD_LVMPOLLD)
+AC_SUBST(BUILD_LVMLOCKD)
+AC_SUBST(BUILD_LOCKDSANLOCK)
+AC_SUBST(BUILD_LOCKDDLM)
AC_SUBST(CACHE)
AC_SUBST(CFLAGS)
AC_SUBST(CFLOW_CMD)
@@ -1691,6 +1939,7 @@ AC_SUBST(DEFAULT_CACHE_SUBDIR)
AC_SUBST(DEFAULT_DATA_ALIGNMENT)
AC_SUBST(DEFAULT_DM_RUN_DIR)
AC_SUBST(DEFAULT_LOCK_DIR)
+AC_SUBST(DEFAULT_FALLBACK_TO_LVM1)
AC_SUBST(DEFAULT_MIRROR_SEGTYPE)
AC_SUBST(DEFAULT_PID_DIR)
AC_SUBST(DEFAULT_PROFILE_SUBDIR)
@@ -1698,17 +1947,16 @@ AC_SUBST(DEFAULT_RAID10_SEGTYPE)
AC_SUBST(DEFAULT_RUN_DIR)
AC_SUBST(DEFAULT_SPARSE_SEGTYPE)
AC_SUBST(DEFAULT_SYS_DIR)
+AC_SUBST(DEFAULT_USE_BLKID_WIPING)
+AC_SUBST(DEFAULT_USE_LVMETAD)
+AC_SUBST(DEFAULT_USE_LVMPOLLD)
+AC_SUBST(DEFAULT_USE_LVMLOCKD)
AC_SUBST(DEVMAPPER)
AC_SUBST(DLM_CFLAGS)
AC_SUBST(DLM_LIBS)
AC_SUBST(DL_LIBS)
AC_SUBST(DMEVENTD)
AC_SUBST(DMEVENTD_PATH)
-AC_SUBST(DM_COMPAT)
-AC_SUBST(DM_DEVICE_GID)
-AC_SUBST(DM_DEVICE_MODE)
-AC_SUBST(DM_DEVICE_UID)
-AC_SUBST(DM_IOCTLS)
AC_SUBST(DM_LIB_VERSION)
AC_SUBST(DM_LIB_PATCHLEVEL)
AC_SUBST(ELDFLAGS)
@@ -1716,13 +1964,12 @@ AC_SUBST(FSADM)
AC_SUBST(BLKDEACTIVATE)
AC_SUBST(HAVE_LIBDL)
AC_SUBST(HAVE_REALTIME)
+AC_SUBST(HAVE_VALGRIND)
AC_SUBST(INTL)
-AC_SUBST(INTL_PACKAGE)
AC_SUBST(JOBS)
AC_SUBST(LDDEPS)
AC_SUBST(LIBS)
AC_SUBST(LIB_SUFFIX)
-AC_SUBST(LOCALEDIR)
AC_SUBST(LVM1)
AC_SUBST(LVM1_FALLBACK)
AC_SUBST(LVM_VERSION)
@@ -1733,6 +1980,7 @@ AC_SUBST(LVM_PATCHLEVEL)
AC_SUBST(LVM_PATH)
AC_SUBST(LVM_RELEASE)
AC_SUBST(LVM_RELEASE_DATE)
+AC_SUBST(localedir)
AC_SUBST(MANGLING)
AC_SUBST(MIRRORS)
AC_SUBST(MSGFMT)
@@ -1740,6 +1988,7 @@ AC_SUBST(OCF)
AC_SUBST(OCFDIR)
AC_SUBST(PKGCONFIG)
AC_SUBST(POOL)
+AC_SUBST(M_LIBS)
AC_SUBST(PTHREAD_LIBS)
AC_SUBST(PYTHON)
AC_SUBST(PYTHON_BINDINGS)
@@ -1748,6 +1997,7 @@ AC_SUBST(PYTHON_LIBDIRS)
AC_SUBST(QUORUM_CFLAGS)
AC_SUBST(QUORUM_LIBS)
AC_SUBST(RAID)
+AC_SUBST(RT_PC)
AC_SUBST(READLINE_LIBS)
AC_SUBST(REPLICATORS)
AC_SUBST(SACKPT_CFLAGS)
@@ -1760,6 +2010,7 @@ AC_SUBST(SNAPSHOTS)
AC_SUBST(STATICDIR)
AC_SUBST(STATIC_LINK)
AC_SUBST(TESTING)
+AC_SUBST(TESTSUITE_DATA)
AC_SUBST(THIN)
AC_SUBST(THIN_CHECK_CMD)
AC_SUBST(THIN_DUMP_CMD)
@@ -1775,9 +2026,12 @@ AC_SUBST(UDEV_SYNC)
AC_SUBST(UDEV_SYSTEMD_BACKGROUND_JOBS)
AC_SUBST(UDEV_RULE_EXEC_DETECTION)
AC_SUBST(UDEV_HAS_BUILTIN_BLKID)
+AC_SUBST(VALGRIND_POOL)
AC_SUBST(WRITE_INSTALL)
AC_SUBST(DMEVENTD_PIDFILE)
AC_SUBST(LVMETAD_PIDFILE)
+AC_SUBST(LVMPOLLD_PIDFILE)
+AC_SUBST(LVMLOCKD_PIDFILE)
AC_SUBST(CLVMD_PIDFILE)
AC_SUBST(CMIRRORD_PIDFILE)
AC_SUBST(interface)
@@ -1811,8 +2065,11 @@ daemons/dmeventd/plugins/mirror/Makefile
daemons/dmeventd/plugins/snapshot/Makefile
daemons/dmeventd/plugins/thin/Makefile
daemons/lvmetad/Makefile
+daemons/lvmpolld/Makefile
+daemons/lvmlockd/Makefile
conf/Makefile
conf/example.conf
+conf/lvmlocal.conf
conf/command_profile_template.profile
conf/metadata_profile_template.profile
include/.symlinks
@@ -1853,6 +2110,11 @@ scripts/lvm2_cmirrord_systemd_red_hat.service
scripts/lvm2_lvmetad_init_red_hat
scripts/lvm2_lvmetad_systemd_red_hat.service
scripts/lvm2_lvmetad_systemd_red_hat.socket
+scripts/lvm2_lvmpolld_init_red_hat
+scripts/lvm2_lvmpolld_systemd_red_hat.service
+scripts/lvm2_lvmpolld_systemd_red_hat.socket
+scripts/lvm2_lvmlockd_systemd_red_hat.service
+scripts/lvm2_lvmlocking_systemd_red_hat.service
scripts/lvm2_monitoring_init_red_hat
scripts/lvm2_monitoring_systemd_red_hat.service
scripts/lvm2_pvscan_systemd_red_hat@.service
@@ -1869,10 +2131,14 @@ unit-tests/mm/Makefile
])
AC_OUTPUT
-test -n "$THIN_CONFIGURE_WARN" && AC_MSG_WARN([Support for thin provisioning is limited since some thin provisioning tools are missing!])
+AS_IF([test -n "$THIN_CONFIGURE_WARN"],
+ [AC_MSG_WARN([Support for thin provisioning is limited since some thin provisioning tools are missing!])])
-test -n "$THIN_CHECK_VERSION_WARN" && AC_MSG_WARN([You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning])
+AS_IF([test -n "$THIN_CHECK_VERSION_WARN"],
+ [AC_MSG_WARN([You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning])])
-test -n "$CACHE_CONFIGURE_WARN" && AC_MSG_WARN([Support for cache is limited since some cache tools are missing!])
+AS_IF([test -n "$CACHE_CONFIGURE_WARN"],
+ [AC_MSG_WARN([Support for cache is limited since some cache tools are missing!])])
-test "$ODIRECT" = yes || AC_MSG_WARN([O_DIRECT disabled: low-memory pvmove may lock up])
+AS_IF([test "$ODIRECT" != yes],
+ [AC_MSG_WARN([O_DIRECT disabled: low-memory pvmove may lock up])])
diff --git a/daemons/Makefile.in b/daemons/Makefile.in
index 9a7351681..a2e7094cf 100644
--- a/daemons/Makefile.in
+++ b/daemons/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,7 +15,7 @@ srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-.PHONY: dmeventd clvmd cmirrord lvmetad
+.PHONY: dmeventd clvmd cmirrord lvmetad lvmpolld lvmlockd
ifneq ("@CLVMD@", "none")
SUBDIRS += clvmd
@@ -36,8 +36,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
SUBDIRS += lvmetad
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+ SUBDIRS += lvmpolld
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+ SUBDIRS += lvmlockd
+endif
+
ifeq ($(MAKECMDGOALS),distclean)
- SUBDIRS = clvmd cmirrord dmeventd lvmetad
+ SUBDIRS = clvmd cmirrord dmeventd lvmetad lvmpolld lvmlockd
endif
include $(top_builddir)/make.tmpl
diff --git a/daemons/clvmd/.gitignore b/daemons/clvmd/.gitignore
new file mode 100644
index 000000000..816032f97
--- /dev/null
+++ b/daemons/clvmd/.gitignore
@@ -0,0 +1 @@
+clvmd
diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in
index d51ea3d63..38ff1c7b0 100644
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -36,10 +36,6 @@ SOURCES = \
lvm-functions.c \
refresh_clvmd.c
-ifeq ("@DEBUG@", "yes")
- DEFS += -DDEBUG
-endif
-
ifneq (,$(findstring cman,, "@CLVMD@,"))
SOURCES += clvmd-cman.c
LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS)
diff --git a/daemons/clvmd/clvmd-command.c b/daemons/clvmd/clvmd-command.c
index 9e59e51e0..ff068b0c6 100644
--- a/daemons/clvmd/clvmd-command.c
+++ b/daemons/clvmd/clvmd-command.c
@@ -323,6 +323,7 @@ void cmd_client_cleanup(struct local_client *client)
int lkid;
char *lockname;
+ DEBUGLOG("Client thread cleanup (%p)\n", client);
if (!client->bits.localsock.private)
return;
@@ -331,7 +332,7 @@ void cmd_client_cleanup(struct local_client *client)
dm_hash_iterate(v, lock_hash) {
lkid = (int)(long)dm_hash_get_data(lock_hash, v);
lockname = dm_hash_get_key(lock_hash, v);
- DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid);
+ DEBUGLOG("Cleanup (%p): Unlocking lock %s %x\n", client, lockname, lkid);
(void) sync_unlock(lockname, lkid);
}
@@ -339,7 +340,6 @@ void cmd_client_cleanup(struct local_client *client)
client->bits.localsock.private = NULL;
}
-
static int restart_clvmd(void)
{
const char **argv;
diff --git a/daemons/clvmd/clvmd-common.h b/daemons/clvmd/clvmd-common.h
index 3070e495d..6df1eb200 100644
--- a/daemons/clvmd/clvmd-common.h
+++ b/daemons/clvmd/clvmd-common.h
@@ -18,15 +18,10 @@
#ifndef _LVM_CLVMD_COMMON_H
#define _LVM_CLVMD_COMMON_H
-#include "configure.h"
-
#define _REENTRANT
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-#include "libdevmapper.h"
-#include "lvm-logging.h"
+#include "tool.h"
-#include <unistd.h>
+#include "lvm-logging.h"
#endif
diff --git a/daemons/clvmd/clvmd-openais.c b/daemons/clvmd/clvmd-openais.c
index 29a323a1f..bd76214f7 100644
--- a/daemons/clvmd/clvmd-openais.c
+++ b/daemons/clvmd/clvmd-openais.c
@@ -243,7 +243,7 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle,
struct node_info *ninfo;
DEBUGLOG("confchg callback. %" PRIsize_t " joined, "
- "%" PRIsize_t " left, %" PRIsize_t " members\n",
+ FMTsize_t " left, %" PRIsize_t " members\n",
joined_list_entries, left_list_entries, member_list_entries);
for (i=0; i<joined_list_entries; i++) {
diff --git a/daemons/clvmd/clvmd-singlenode.c b/daemons/clvmd/clvmd-singlenode.c
index e6d2b0da0..dc9360dae 100644
--- a/daemons/clvmd/clvmd-singlenode.c
+++ b/daemons/clvmd/clvmd-singlenode.c
@@ -208,8 +208,6 @@ static int _lock_resource(const char *resource, int mode, int flags, int *lockid
pthread_mutex_lock(&_lock_mutex);
retry:
- pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
-
if (!(head = dm_hash_lookup(_locks, resource))) {
if (flags & LCKF_CONVERT) {
/* In real DLM, lock is identified only by lockid, resource is not used */
@@ -269,12 +267,14 @@ retry:
dm_list_add(head, &lck->list);
}
out:
+ pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
pthread_mutex_unlock(&_lock_mutex);
DEBUGLOG("Locked resource %s, lockid=%d, mode=%s\n",
resource, lck->lockid, _get_mode(lck->mode));
return 0;
bad:
+ pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
pthread_mutex_unlock(&_lock_mutex);
DEBUGLOG("Failed to lock resource %s\n", resource);
diff --git a/daemons/clvmd/clvmd.c b/daemons/clvmd/clvmd.c
index 5cd687433..c6d51bda1 100644
--- a/daemons/clvmd/clvmd.c
+++ b/daemons/clvmd/clvmd.c
@@ -172,6 +172,7 @@ static void usage(const char *prog, FILE *file)
#ifdef USE_SINGLENODE
"singlenode "
#endif
+ "\n"
" -R Tell all running clvmds in the cluster to reload their device cache\n"
" -S Restart clvmd, preserving exclusive locks\n"
" -t<secs> Command timeout (default: 60 seconds)\n"
@@ -222,6 +223,7 @@ void debuglog(const char *fmt, ...)
fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime_r(&P, buf_ctime) + 4);
vfprintf(stderr, fmt, ap);
va_end(ap);
+ fflush(stderr);
break;
case DEBUG_SYSLOG:
if (!syslog_init) {
@@ -597,7 +599,9 @@ int main(int argc, char *argv[])
/* This needs to be started after cluster initialisation
as it may need to take out locks */
- DEBUGLOG("starting LVM thread\n");
+ DEBUGLOG("Starting LVM thread\n");
+ DEBUGLOG("Main cluster socket fd %d (%p) with local socket %d (%p)\n",
+ local_client_head.fd, &local_client_head, newfd->fd, newfd);
/* Don't let anyone else to do work until we are started */
pthread_create(&lvm_thread, &stack_attr, lvm_thread_fn, &lvm_params);
@@ -697,7 +701,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
newfd->type = LOCAL_SOCK;
newfd->callback = local_sock_callback;
newfd->bits.localsock.all_success = 1;
- DEBUGLOG("Got new connection on fd %d\n", newfd->fd);
+ DEBUGLOG("Got new connection on fd %d (%p)\n", newfd->fd, newfd);
*new_client = newfd;
}
return 1;
@@ -849,18 +853,48 @@ static void main_loop(int cmd_timeout)
struct local_client *thisfd;
struct timeval tv = { cmd_timeout, 0 };
int quorate = clops->is_quorate();
+ int client_count = 0;
+ int max_fd = 0;
+ struct local_client *lastfd = &local_client_head;
+ struct local_client *nextfd = local_client_head.next;
/* Wait on the cluster FD and all local sockets/pipes */
local_client_head.fd = clops->get_main_cluster_fd();
FD_ZERO(&in);
+
for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
+ client_count++;
+ max_fd = max(max_fd, thisfd->fd);
+ }
+
+ if (max_fd > FD_SETSIZE - 32) {
+ fprintf(stderr, "WARNING: There are too many connections to clvmd. Investigate and take action now!\n");
+ fprintf(stderr, "WARNING: Your cluster may freeze up if the number of clvmd file descriptors (%d) exceeds %d.\n", max_fd + 1, FD_SETSIZE);
+ }
+
+ for (thisfd = &local_client_head; thisfd; thisfd = nextfd, nextfd = thisfd ? thisfd->next : NULL) {
+
+ if (thisfd->removeme && !cleanup_zombie(thisfd)) {
+ struct local_client *free_fd = thisfd;
+ lastfd->next = nextfd;
+ DEBUGLOG("removeme set for %p with %d monitored fds remaining\n", free_fd, client_count - 1);
+
+ /* Queue cleanup, this also frees the client struct */
+ add_to_lvmqueue(free_fd, NULL, 0, NULL);
+
+ continue;
+ }
+
+ lastfd = thisfd;
+
if (thisfd->removeme)
continue;
/* if the cluster is not quorate then don't listen for new requests */
if ((thisfd->type != LOCAL_RENDEZVOUS &&
thisfd->type != LOCAL_SOCK) || quorate)
- FD_SET(thisfd->fd, &in);
+ if (thisfd->fd < FD_SETSIZE)
+ FD_SET(thisfd->fd, &in);
}
select_status = select(FD_SETSIZE, &in, NULL, NULL, &tv);
@@ -876,31 +910,22 @@ static void main_loop(int cmd_timeout)
}
if (select_status > 0) {
- struct local_client *lastfd = NULL;
char csid[MAX_CSID_LEN];
char buf[max_cluster_message];
for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
- if (thisfd->removeme && !cleanup_zombie(thisfd)) {
- struct local_client *free_fd = thisfd;
- lastfd->next = thisfd->next;
- DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
-
- /* Queue cleanup, this also frees the client struct */
- add_to_lvmqueue(free_fd, NULL, 0, NULL);
- break;
- }
-
- if (FD_ISSET(thisfd->fd, &in)) {
+ if (thisfd->fd < FD_SETSIZE && FD_ISSET(thisfd->fd, &in)) {
struct local_client *newfd = NULL;
int ret;
+ /* FIXME Remove from main thread in case it blocks! */
/* Do callback */
ret = thisfd->callback(thisfd, buf, sizeof(buf),
csid, &newfd);
/* Ignore EAGAIN */
- if (ret < 0 && (errno == EAGAIN || errno == EINTR))
+ if (ret < 0 && (errno == EAGAIN || errno == EINTR)) {
continue;
+ }
/* Got error or EOF: Remove it from the list safely */
if (ret <= 0) {
@@ -914,17 +939,16 @@ static void main_loop(int cmd_timeout)
DEBUGLOG("ret == %d, errno = %d. removing client\n",
ret, errno);
thisfd->removeme = 1;
- break;
+ continue;
}
/* New client...simply add it to the list */
if (newfd) {
newfd->next = thisfd->next;
thisfd->next = newfd;
- break;
+ thisfd = newfd;
}
}
- lastfd = thisfd;
}
}
@@ -1417,7 +1441,7 @@ static int read_from_local_sock(struct local_client *thisfd)
thisfd->bits.localsock.in_progress = TRUE;
thisfd->bits.localsock.state = PRE_COMMAND;
thisfd->bits.localsock.cleanup_needed = 1;
- DEBUGLOG("Creating pre&post thread\n");
+ DEBUGLOG("Creating pre&post thread for pipe fd %d (%p)\n", newfd->fd, newfd);
status = pthread_create(&thisfd->bits.localsock.threadid,
&stack_attr, pre_and_post_thread, thisfd);
DEBUGLOG("Created pre&post thread, state = %d\n", status);
@@ -1671,7 +1695,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
sigset_t ss;
int pipe_fd = client->bits.localsock.pipe;
- DEBUGLOG("Pre&post thread (%p), pipe %d\n", client, pipe_fd);
+ DEBUGLOG("Pre&post thread (%p), pipe fd %d\n", client, pipe_fd);
pthread_mutex_lock(&client->bits.localsock.mutex);
/* Ignore SIGUSR1 (handled by master process) but enable
@@ -1691,7 +1715,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
if ((status = do_pre_command(client)))
client->bits.localsock.all_success = 0;
- DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe %d\n",
+ DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe fd %d\n",
client, status, pipe_fd);
/* Tell the parent process we have finished this bit */
@@ -1973,7 +1997,7 @@ static int process_work_item(struct lvm_thread_cmd *cmd)
{
/* If msg is NULL then this is a cleanup request */
if (cmd->msg == NULL) {
- DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
+ DEBUGLOG("process_work_item: free %p\n", cmd->client);
cmd_client_cleanup(cmd->client);
pthread_mutex_destroy(&cmd->client->bits.localsock.mutex);
pthread_cond_destroy(&cmd->client->bits.localsock.cond);
diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c
index dafffcc40..99e731789 100644
--- a/daemons/clvmd/lvm-functions.c
+++ b/daemons/clvmd/lvm-functions.c
@@ -510,7 +510,7 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section());
- if (!cmd->config_initialized || config_files_changed(cmd)) {
+ if (!cmd->initialized.config || config_files_changed(cmd)) {
/* Reinitialise various settings inc. logging, filters */
if (do_refresh_cache()) {
log_error("Updated config file invalid. Aborting.");
@@ -899,7 +899,7 @@ int init_clvm(struct dm_hash_table *excl_uuid)
if (!get_initial_state(excl_uuid))
log_error("Cannot load initial lock states.");
- if (!(cmd = create_toolcontext(1, NULL, 0, 1))) {
+ if (!(cmd = create_toolcontext(1, NULL, 0, 1, 1, 1))) {
log_error("Failed to allocate command context");
return 0;
}
diff --git a/daemons/cmirrord/.gitignore b/daemons/cmirrord/.gitignore
new file mode 100644
index 000000000..2711e7720
--- /dev/null
+++ b/daemons/cmirrord/.gitignore
@@ -0,0 +1 @@
+cmirrord
diff --git a/daemons/cmirrord/clogd.c b/daemons/cmirrord/clogd.c
index 86d06bda9..6638843d1 100644
--- a/daemons/cmirrord/clogd.c
+++ b/daemons/cmirrord/clogd.c
@@ -15,6 +15,7 @@
#include "link_mon.h"
#include "local.h"
+#include <getopt.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/socket.h>
@@ -32,14 +33,49 @@ static void daemonize(void);
static void init_all(void);
static void cleanup_all(void);
-int main(int argc __attribute__((unused)), char *argv[] __attribute__((unused)))
+static void usage (FILE *dest)
{
- daemonize();
+ fprintf (dest, "Usage: cmirrord [options]\n"
+ " -f, --foreground stay in the foreground, log to the terminal\n"
+ " -h, --help print this help\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int foreground_mode = 0;
+ struct option longopts[] = {
+ { "foreground", no_argument, NULL, 'f' },
+ { "help" , no_argument, NULL, 'h' },
+ { 0, 0, 0, 0 }
+ };
+ int opt;
+
+ while ((opt = getopt_long (argc, argv, "fh", longopts, NULL)) != -1) {
+ switch (opt) {
+ case 'f':
+ foreground_mode = 1;
+ break;
+ case 'h':
+ usage (stdout);
+ exit (0);
+ default:
+ usage (stderr);
+ exit (2);
+ }
+ }
+ if (optind < argc) {
+ usage (stderr);
+ exit (2);
+ }
+
+ if (!foreground_mode)
+ daemonize();
init_all();
/* Parent can now exit, we're ready to handle requests */
- kill(getppid(), SIGTERM);
+ if (!foreground_mode)
+ kill(getppid(), SIGTERM);
LOG_PRINT("Starting cmirrord:");
LOG_PRINT(" Built: "__DATE__" "__TIME__"\n");
@@ -209,6 +245,16 @@ static void daemonize(void)
}
LOG_OPEN("cmirrord", LOG_PID, LOG_DAEMON);
+}
+
+/*
+ * init_all
+ *
+ * Initialize modules. Exit on failure.
+ */
+static void init_all(void)
+{
+ int r;
(void) dm_prepare_selinux_context(CMIRRORD_PIDFILE, S_IFREG);
if (dm_create_lockfile(CMIRRORD_PIDFILE) == 0)
@@ -227,16 +273,6 @@ static void daemonize(void)
signal(SIGUSR2, &sig_handler);
sigemptyset(&signal_mask);
signal_received = 0;
-}
-
-/*
- * init_all
- *
- * Initialize modules. Exit on failure.
- */
-static void init_all(void)
-{
- int r;
if ((r = init_local()) ||
(r = init_cluster())) {
diff --git a/daemons/cmirrord/cluster.c b/daemons/cmirrord/cluster.c
index 3fd5d2371..2850d9b9b 100644
--- a/daemons/cmirrord/cluster.c
+++ b/daemons/cmirrord/cluster.c
@@ -104,10 +104,11 @@ static SaVersionT version = { 'B', 1, 1 };
#endif
#define DEBUGGING_HISTORY 100
+#define DEBUGGING_BUFLEN 128
#define LOG_SPRINT(cc, f, arg...) do { \
cc->idx++; \
cc->idx = cc->idx % DEBUGGING_HISTORY; \
- sprintf(cc->debugging[cc->idx], f, ## arg); \
+ snprintf(cc->debugging[cc->idx], DEBUGGING_BUFLEN, f, ## arg); \
} while (0)
static int log_resp_rec = 0;
@@ -150,7 +151,7 @@ struct clog_cpg {
uint32_t checkpoint_requesters[MAX_CHECKPOINT_REQUESTERS];
struct checkpoint_data *checkpoint_list;
int idx;
- char debugging[DEBUGGING_HISTORY][128];
+ char debugging[DEBUGGING_HISTORY][DEBUGGING_BUFLEN];
};
static struct dm_list clog_cpg_list;
@@ -1294,7 +1295,9 @@ static void cpg_join_callback(struct clog_cpg *match,
uint32_t my_pid = (uint32_t)getpid();
uint32_t lowest = match->lowest_id;
struct clog_request *rq;
- char dbuf[32] = { 0 };
+ char dbuf[64] = { 0 };
+ char *dbuf_p = dbuf;
+ size_t dbuf_rem = sizeof dbuf;
/* Assign my_cluster_id */
if ((my_cluster_id == 0xDEAD) && (joined->pid == my_pid))
@@ -1310,9 +1313,17 @@ static void cpg_join_callback(struct clog_cpg *match,
if (joined->nodeid == my_cluster_id)
goto out;
- for (i = 0; i < member_list_entries - 1; i++)
- sprintf(dbuf+strlen(dbuf), "%u-", member_list[i].nodeid);
- sprintf(dbuf+strlen(dbuf), "(%u)", joined->nodeid);
+ for (i = 0; i < member_list_entries - 1; i++) {
+ int written = snprintf(dbuf_p, dbuf_rem, "%u-", member_list[i].nodeid);
+ if (written < 0) continue; /* impossible */
+ if ((unsigned)written >= dbuf_rem) {
+ dbuf_rem = 0;
+ break;
+ }
+ dbuf_rem -= written;
+ dbuf_p += written;
+ }
+ snprintf(dbuf_p, dbuf_rem, "(%u)", joined->nodeid);
LOG_COND(log_checkpoint, "[%s] Joining node, %u needs checkpoint [%s]",
SHORT_UUID(match->name.value), joined->nodeid, dbuf);
diff --git a/daemons/cmirrord/functions.c b/daemons/cmirrord/functions.c
index f6e09180d..61b3e118f 100644
--- a/daemons/cmirrord/functions.c
+++ b/daemons/cmirrord/functions.c
@@ -32,12 +32,13 @@
#define LOG_OFFSET 2
#define RESYNC_HISTORY 50
+#define RESYNC_BUFLEN 128
//static char resync_history[RESYNC_HISTORY][128];
//static int idx = 0;
#define LOG_SPRINT(_lc, f, arg...) do { \
lc->idx++; \
lc->idx = lc->idx % RESYNC_HISTORY; \
- sprintf(lc->resync_history[lc->idx], f, ## arg); \
+ snprintf(lc->resync_history[lc->idx], RESYNC_BUFLEN, f, ## arg); \
} while (0)
struct log_header {
@@ -88,7 +89,7 @@ struct log_c {
size_t disk_size; /* size of disk_buffer in bytes */
void *disk_buffer; /* aligned memory for O_DIRECT */
int idx;
- char resync_history[RESYNC_HISTORY][128];
+ char resync_history[RESYNC_HISTORY][RESYNC_BUFLEN];
};
struct mark_entry {
diff --git a/daemons/dmeventd/.gitignore b/daemons/dmeventd/.gitignore
new file mode 100644
index 000000000..71cca4c0c
--- /dev/null
+++ b/daemons/dmeventd/.gitignore
@@ -0,0 +1 @@
+dmeventd
diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c
index e8d1e3b68..1ff5bf9ec 100644
--- a/daemons/dmeventd/dmeventd.c
+++ b/daemons/dmeventd/dmeventd.c
@@ -16,26 +16,21 @@
* dmeventd - dm event daemon to monitor active mapped devices
*/
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
+#include "tool.h"
-#include "configure.h"
-#include "libdevmapper.h"
-#include "libdevmapper-event.h"
-#include "dmeventd.h"
//#include "libmultilog.h"
#include "dm-logging.h"
-#include <stdarg.h>
+#include "libdevmapper-event.h"
+#include "dmeventd.h"
+
#include <dlfcn.h>
-#include <errno.h>
#include <pthread.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
-#include <unistd.h>
#include <signal.h>
#include <arpa/inet.h> /* for htonl, ntohl */
#include <fcntl.h> /* for musl libc */
@@ -133,51 +128,20 @@ void debuglog(const char *fmt, ...)
static const char *decode_cmd(uint32_t cmd)
{
- static char buf[128];
- const char *command;
-
switch (cmd) {
- case DM_EVENT_CMD_ACTIVE:
- command = "ACTIVE";
- break;
- case DM_EVENT_CMD_REGISTER_FOR_EVENT:
- command = "REGISTER_FOR_EVENT";
- break;
- case DM_EVENT_CMD_UNREGISTER_FOR_EVENT:
- command = "UNREGISTER_FOR_EVENT";
- break;
- case DM_EVENT_CMD_GET_REGISTERED_DEVICE:
- command = "GET_REGISTERED_DEVICE";
- break;
- case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE:
- command = "GET_NEXT_REGISTERED_DEVICE";
- break;
- case DM_EVENT_CMD_SET_TIMEOUT:
- command = "SET_TIMEOUT";
- break;
- case DM_EVENT_CMD_GET_TIMEOUT:
- command = "GET_TIMEOUT";
- break;
- case DM_EVENT_CMD_HELLO:
- command = "HELLO";
- break;
- case DM_EVENT_CMD_DIE:
- command = "DIE";
- break;
- case DM_EVENT_CMD_GET_STATUS:
- command = "GET_STATUS";
- break;
- case DM_EVENT_CMD_GET_PARAMETERS:
- command = "GET_PARAMETERS";
- break;
- default:
- command = "unknown";
- break;
+ case DM_EVENT_CMD_ACTIVE: return "ACTIVE";
+ case DM_EVENT_CMD_REGISTER_FOR_EVENT: return "REGISTER_FOR_EVENT";
+ case DM_EVENT_CMD_UNREGISTER_FOR_EVENT: return "UNREGISTER_FOR_EVENT";
+ case DM_EVENT_CMD_GET_REGISTERED_DEVICE: return "GET_REGISTERED_DEVICE";
+ case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE: return "GET_NEXT_REGISTERED_DEVICE";
+ case DM_EVENT_CMD_SET_TIMEOUT: return "SET_TIMEOUT";
+ case DM_EVENT_CMD_GET_TIMEOUT: return "GET_TIMEOUT";
+ case DM_EVENT_CMD_HELLO: return "HELLO";
+ case DM_EVENT_CMD_DIE: return "DIE";
+ case DM_EVENT_CMD_GET_STATUS: return "GET_STATUS";
+ case DM_EVENT_CMD_GET_PARAMETERS: return "GET_PARAMETERS";
+ default: return "unknown";
}
-
- snprintf(buf, sizeof(buf), "%s (0x%x)", command, cmd);
-
- return buf;
}
#else
@@ -710,6 +674,7 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
int ret = DM_WAIT_RETRY;
struct dm_task *dmt;
struct dm_info info;
+ int ioctl_errno;
*task = 0;
@@ -739,25 +704,27 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
* either for a timeout event, or to cancel the thread.
*/
set = _unblock_sigalrm();
- errno = 0;
if (dm_task_run(dmt)) {
thread->current_events |= DM_EVENT_DEVICE_ERROR;
ret = DM_WAIT_INTR;
if ((ret = dm_task_get_info(dmt, &info)))
thread->event_nr = info.event_nr;
- } else if (thread->events & DM_EVENT_TIMEOUT && errno == EINTR) {
- thread->current_events |= DM_EVENT_TIMEOUT;
- ret = DM_WAIT_INTR;
- } else if (thread->status == DM_THREAD_SHUTDOWN && errno == EINTR) {
- ret = DM_WAIT_FATAL;
} else {
- syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
- errno, strerror(errno));
- if (errno == ENXIO) {
- syslog(LOG_ERR, "%s disappeared, detaching",
- thread->device.name);
+ ioctl_errno = dm_task_get_errno(dmt);
+ if (thread->events & DM_EVENT_TIMEOUT && ioctl_errno == EINTR) {
+ thread->current_events |= DM_EVENT_TIMEOUT;
+ ret = DM_WAIT_INTR;
+ } else if (thread->status == DM_THREAD_SHUTDOWN && ioctl_errno == EINTR)
ret = DM_WAIT_FATAL;
+ else {
+ syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
+ ioctl_errno, strerror(ioctl_errno));
+ if (ioctl_errno == ENXIO) {
+ syslog(LOG_ERR, "%s disappeared, detaching",
+ thread->device.name);
+ ret = DM_WAIT_FATAL;
+ }
}
}
DEBUGLOG("Completed waitevent task for %s", thread->device.uuid);
@@ -1595,9 +1562,6 @@ static void _process_request(struct dm_event_fifos *fifos)
{
int die;
struct dm_event_daemon_message msg = { 0 };
-#ifdef DEBUG
- const char *cmd;
-#endif
/*
* Read the request from the client (client_read, client_write
@@ -1606,7 +1570,8 @@ static void _process_request(struct dm_event_fifos *fifos)
if (!_client_read(fifos, &msg))
return;
- DEBUGLOG("%s processing...", cmd = decode_cmd(msg.cmd));
+ DEBUGLOG("%s (0x%x) processing...", decode_cmd(msg.cmd), msg.cmd);
+
die = (msg.cmd == DM_EVENT_CMD_DIE) ? 1 : 0;
/* _do_process_request fills in msg (if memory allows for
@@ -1618,7 +1583,7 @@ static void _process_request(struct dm_event_fifos *fifos)
dm_free(msg.data);
- DEBUGLOG("%s completed.", cmd);
+ DEBUGLOG("%s (0x%x) completed.", decode_cmd(msg.cmd), msg.cmd);
if (die) {
if (unlink(DMEVENTD_PIDFILE))
@@ -1668,10 +1633,8 @@ static void _cleanup_unused_threads(void)
if (ret == ESRCH) {
thread->status = DM_THREAD_DONE;
} else if (ret) {
- syslog(LOG_ERR,
- "Unable to terminate thread: %s\n",
- strerror(-ret));
- stack;
+ syslog(LOG_ERR, "Unable to terminate thread: %s",
+ strerror(ret));
}
break;
}
@@ -1703,8 +1666,7 @@ static void _cleanup_unused_threads(void)
static void _sig_alarm(int signum __attribute__((unused)))
{
- DEBUGLOG("Received SIGALRM.");
- pthread_testcancel();
+ /* empty SIG_IGN */;
}
/* Init thread signal handling. */
diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c
index 6e41b0a12..81f18c9c4 100644
--- a/daemons/dmeventd/libdevmapper-event.c
+++ b/daemons/dmeventd/libdevmapper-event.c
@@ -17,15 +17,10 @@
//#include "libmultilog.h"
#include "dmeventd.h"
-#include <errno.h>
#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <unistd.h>
#include <sys/wait.h>
#include <arpa/inet.h> /* for htonl, ntohl */
diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
index 80ed83aa8..9b08bbec8 100644
--- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
+++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
@@ -102,7 +102,8 @@ int dmeventd_lvm2_init(void)
goto out;
if (!_lvm_handle) {
- lvm2_log_fn(_temporary_log_fn);
+ if (!getenv("LVM_LOG_FILE_EPOCH"))
+ lvm2_log_fn(_temporary_log_fn);
if (!(_lvm_handle = lvm2_init())) {
dm_pool_destroy(_mem_pool);
_mem_pool = NULL;
diff --git a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
index 2328089e2..4c5877ed5 100644
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
@@ -17,6 +17,7 @@
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
#include "defaults.h"
+#include "segtype.h"
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
@@ -136,10 +137,20 @@ static int _remove_failed_devices(const char *device)
char cmd_str[CMD_SIZE];
if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
+ "lvscan --cache", device))
+ return -1;
+
+ r = dmeventd_lvm2_run(cmd_str);
+
+ if (!r)
+ syslog(LOG_INFO, "Re-scan of mirror device %s failed.", device);
+
+ if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
"lvconvert --config devices{ignore_suspended_devices=1} "
"--repair --use-policies", device))
return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */
+ /* if repair goes OK, report success even if lvscan has failed */
r = dmeventd_lvm2_run(cmd_str);
syslog(LOG_INFO, "Repair of mirrored device %s %s.", device,
@@ -169,7 +180,7 @@ void process_event(struct dm_task *dmt,
continue;
}
- if (strcmp(target_type, "mirror")) {
+ if (strcmp(target_type, SEG_TYPE_NAME_MIRROR)) {
syslog(LOG_INFO, "%s has unmirrored portion.", device);
continue;
}
diff --git a/daemons/lvmetad/.gitignore b/daemons/lvmetad/.gitignore
new file mode 100644
index 000000000..773097e32
--- /dev/null
+++ b/daemons/lvmetad/.gitignore
@@ -0,0 +1,2 @@
+lvmetad
+lvmetactl
diff --git a/daemons/lvmetad/Makefile.in b/daemons/lvmetad/Makefile.in
index 0a174bb04..090bb7649 100644
--- a/daemons/lvmetad/Makefile.in
+++ b/daemons/lvmetad/Makefile.in
@@ -18,7 +18,7 @@ top_builddir = @top_builddir@
SOURCES = lvmetad-core.c
SOURCES2 = testclient.c
-TARGETS = lvmetad lvmetad-testclient
+TARGETS = lvmetad lvmetactl
.PHONY: install_lvmetad
@@ -39,8 +39,11 @@ CFLAGS += $(EXTRA_EXEC_CFLAGS)
lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
$(top_builddir)/libdaemon/server/libdaemonserver.a
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
- $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LVMLIBS)
# TODO: No idea. No idea how to test either.
#ifneq ("$(CFLOW_CMD)", "")
diff --git a/daemons/lvmetad/lvmetactl.c b/daemons/lvmetad/lvmetactl.c
new file mode 100644
index 000000000..2c1c9b94d
--- /dev/null
+++ b/daemons/lvmetad/lvmetactl.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmetad-client.h"
+
+daemon_handle h;
+
+static void print_reply(daemon_reply reply)
+{
+ const char *a = daemon_reply_str(reply, "response", NULL);
+ const char *b = daemon_reply_str(reply, "status", NULL);
+ const char *c = daemon_reply_str(reply, "reason", NULL);
+
+ printf("response \"%s\" status \"%s\" reason \"%s\"\n",
+ a ? a : "", b ? b : "", c ? c : "");
+}
+
+int main(int argc, char **argv)
+{
+ daemon_reply reply;
+ char *cmd;
+ char *uuid;
+ char *name;
+ int val;
+ int ver;
+
+ if (argc < 2) {
+ printf("lvmeta dump\n");
+ printf("lvmeta pv_list\n");
+ printf("lvmeta vg_list\n");
+ printf("lvmeta vg_lookup_name <name>\n");
+ printf("lvmeta vg_lookup_uuid <uuid>\n");
+ printf("lvmeta pv_lookup_uuid <uuid>\n");
+ printf("lvmeta set_global_invalid 0|1\n");
+ printf("lvmeta get_global_invalid\n");
+ printf("lvmeta set_vg_version <uuid> <version>\n");
+ printf("lvmeta vg_lock_type <uuid>\n");
+ return -1;
+ }
+
+ cmd = argv[1];
+
+ h = lvmetad_open(NULL);
+
+ if (!strcmp(cmd, "dump")) {
+ reply = daemon_send_simple(h, "dump",
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "pv_list")) {
+ reply = daemon_send_simple(h, "pv_list",
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "vg_list")) {
+ reply = daemon_send_simple(h, "vg_list",
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "set_global_invalid")) {
+ if (argc < 3) {
+ printf("set_global_invalid 0|1\n");
+ return -1;
+ }
+ val = atoi(argv[2]);
+
+ reply = daemon_send_simple(h, "set_global_info",
+ "global_invalid = %d", val,
+ "token = %s", "skip",
+ NULL);
+ print_reply(reply);
+
+ } else if (!strcmp(cmd, "get_global_invalid")) {
+ reply = daemon_send_simple(h, "get_global_info",
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "set_vg_version")) {
+ if (argc < 4) {
+ printf("set_vg_version <uuid> <ver>\n");
+ return -1;
+ }
+ uuid = argv[2];
+ ver = atoi(argv[3]);
+
+ reply = daemon_send_simple(h, "set_vg_info",
+ "uuid = %s", uuid,
+ "version = %d", ver,
+ "token = %s", "skip",
+ NULL);
+ print_reply(reply);
+
+ } else if (!strcmp(cmd, "vg_lookup_name")) {
+ if (argc < 3) {
+ printf("vg_lookup_name <name>\n");
+ return -1;
+ }
+ name = argv[2];
+
+ reply = daemon_send_simple(h, "vg_lookup",
+ "name = %s", name,
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "vg_lookup_uuid")) {
+ if (argc < 3) {
+ printf("vg_lookup_uuid <uuid>\n");
+ return -1;
+ }
+ uuid = argv[2];
+
+ reply = daemon_send_simple(h, "vg_lookup",
+ "uuid = %s", uuid,
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else if (!strcmp(cmd, "vg_lock_type")) {
+ struct dm_config_node *metadata;
+ const char *lock_type;
+
+ if (argc < 3) {
+ printf("vg_lock_type <uuid>\n");
+ return -1;
+ }
+ uuid = argv[2];
+
+ reply = daemon_send_simple(h, "vg_lookup",
+ "uuid = %s", uuid,
+ "token = %s", "skip",
+ NULL);
+ /* printf("%s\n", reply.buffer.mem); */
+
+ metadata = dm_config_find_node(reply.cft->root, "metadata");
+ if (!metadata) {
+ printf("no metadata\n");
+ goto out;
+ }
+
+ lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
+ if (!lock_type) {
+ printf("no lock_type\n");
+ goto out;
+ }
+ printf("lock_type %s\n", lock_type);
+
+ } else if (!strcmp(cmd, "pv_lookup_uuid")) {
+ if (argc < 3) {
+ printf("pv_lookup_uuid <uuid>\n");
+ return -1;
+ }
+ uuid = argv[2];
+
+ reply = daemon_send_simple(h, "pv_lookup",
+ "uuid = %s", uuid,
+ "token = %s", "skip",
+ NULL);
+ printf("%s\n", reply.buffer.mem);
+
+ } else {
+ printf("unknown command\n");
+ goto out_close;
+ }
+out:
+ daemon_reply_destroy(reply);
+out_close:
+ daemon_close(h);
+ return 0;
+}
diff --git a/daemons/lvmetad/lvmetad-core.c b/daemons/lvmetad/lvmetad-core.c
index 693eb86dc..7b57dc4b9 100644
--- a/daemons/lvmetad/lvmetad-core.c
+++ b/daemons/lvmetad/lvmetad-core.c
@@ -14,23 +14,114 @@
#define _XOPEN_SOURCE 500 /* pthread */
-#include "configure.h"
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-io.h"
-#include "config-util.h"
#include "daemon-server.h"
#include "daemon-log.h"
#include "lvm-version.h"
#include <assert.h>
#include <pthread.h>
-#include <stdint.h>
-#include <unistd.h>
-
-#include <math.h> /* fabs() */
-#include <float.h> /* DBL_EPSILON */
#define LVMETAD_SOCKET DEFAULT_RUN_DIR "/lvmetad.socket"
+/*
+ * valid/invalid state of cached metadata
+ *
+ * Normally when using lvmetad, the state is kept up-to-date through a
+ * combination of notifications from clients and updates triggered by uevents.
+ * When using lvmlockd, the lvmetad state is expected to become out of
+ * date (invalid/stale) when other hosts make changes to the metadata on disk.
+ *
+ * To deal with this, the metadata cached in lvmetad can be flagged as invalid.
+ * This invalid flag is returned along with the metadata when read by a
+ * command. The command can check for the invalid flag and decide that it
+ * should either use the stale metadata (uncommon), or read the latest metadata
+ * from disk rather than using the invalid metadata that was returned. If the
+ * command reads the latest metadata from disk, it can choose to send it to
+ * lvmetad to update the cached copy and clear the invalid flag in lvmetad.
+ * Otherwise, the next command to read the metadata from lvmetad will also
+ * receive the invalid metadata with the invalid flag (and like the previous
+ * command, it too may choose to read the latest metadata from disk and can
+ * then also choose to update the lvmetad copy.)
+ *
+ * For purposes of tracking the invalid state, LVM metadata is considered
+ * to be either VG-specific or global. VG-specific metadata is metadata
+ * that is isolated to a VG, such as the LVs it contains. Global
+ * metadata is metadata that is not isolated to a single VG. Global
+ * metdata includes:
+ * . the VG namespace (which VG names are used)
+ * . the set of orphan PVs (which PVs are in VGs and which are not)
+ * . properties of orphan PVs (the size of an orphan PV)
+ *
+ * If the metadata for a single VG becomes invalid, the VGFL_INVALID
+ * flag can be set in the vg_info struct for that VG. If the global
+ * metdata becomes invalid, the GLFL_INVALID flag can be set in the
+ * lvmetad daemon state.
+ *
+ * If a command reads VG metadata and VGFL_INVALID is set, an
+ * extra config node called "vg_invalid" is added to the config
+ * data returned to the command.
+ *
+ * If a command reads global metdata and GLFL_INVALID is set, an
+ * extra config node called "global_invalid" is added to the
+ * config data returned to the command.
+ *
+ * If a command sees vg_invalid, and wants the latest VG metadata,
+ * it only needs to scan disks of the PVs in that VG.
+ * It can then use vg_update to send the latest metadata to lvmetad
+ * which clears the VGFL_INVALID flag.
+ *
+ * If a command sees global_invalid, and wants the latest metadata,
+ * it should scan all devices to update lvmetad, and then send
+ * lvmetad the "set_global_info global_invalid=0" message to clear
+ * GLFL_INVALID.
+ *
+ * (When rescanning devices to update lvmetad, the command must use
+ * the global filter cmd->lvmetad_filter so that it processes the same
+ * devices that are seen by lvmetad.)
+ *
+ * The lvmetad INVALID flags can be set by sending lvmetad the messages:
+ *
+ * . set_vg_info with the latest VG seqno. If the VG seqno is larger
+ * than the cached VG seqno, VGFL_INVALID is set for the VG.
+ *
+ * . set_global_info with global_invalid=1 sets GLFL_INVALID.
+ *
+ * Different entities could use these functions to invalidate metadata
+ * if/when they detected that the cache is stale. How they detect that
+ * the cache is stale depends on the details of the specific entity.
+ *
+ * In the case of lvmlockd, it embeds values into its locks to keep track
+ * of when other nodes have changed metadata on disk related to those locks.
+ * When acquring locks it can look at these values and detect that
+ * the metadata associated with the lock has been changed.
+ * When the values change, it uses set_vg_info/set_global_info to
+ * invalidate the lvmetad cache.
+ *
+ * The values that lvmlockd distributes through its locks are the
+ * latest VG seqno in VG locks and a global counter in the global lock.
+ * When a host acquires a VG lock and sees that the embedded seqno is
+ * larger than it was previously, it knows that it should invalidate the
+ * lvmetad cache for the VG. If the host acquires the global lock
+ * and sees that the counter is larger than previously, it knows that
+ * it should invalidate the global info in lvmetad. This invalidation
+ * is done before the lock is returned to the command. This way the
+ * invalid flag will be set on the metadata before the command reads
+ * it from lvmetad.
+ */
+
+struct vg_info {
+ int64_t external_version;
+ uint32_t flags; /* VGFL_ */
+};
+
+#define GLFL_INVALID 0x00000001
+#define VGFL_INVALID 0x00000001
+
typedef struct {
log_state *log; /* convenience */
const char *log_config;
@@ -40,6 +131,8 @@ typedef struct {
struct dm_hash_table *vgid_to_metadata;
struct dm_hash_table *vgid_to_vgname;
+ struct dm_hash_table *vgid_to_outdated_pvs;
+ struct dm_hash_table *vgid_to_info;
struct dm_hash_table *vgname_to_vgid;
struct dm_hash_table *pvid_to_vgid;
struct {
@@ -50,6 +143,7 @@ typedef struct {
pthread_mutex_t pvid_to_vgid;
} lock;
char token[128];
+ uint32_t flags; /* GLFL_ */
pthread_mutex_t token_lock;
} lvmetad_state;
@@ -60,17 +154,19 @@ static void destroy_metadata_hashes(lvmetad_state *s)
dm_hash_iterate(n, s->vgid_to_metadata)
dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n));
+ dm_hash_iterate(n, s->vgid_to_outdated_pvs)
+ dm_config_destroy(dm_hash_get_data(s->vgid_to_outdated_pvs, n));
+
dm_hash_iterate(n, s->pvid_to_pvmeta)
dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n));
dm_hash_destroy(s->pvid_to_pvmeta);
dm_hash_destroy(s->vgid_to_metadata);
dm_hash_destroy(s->vgid_to_vgname);
+ dm_hash_destroy(s->vgid_to_outdated_pvs);
+ dm_hash_destroy(s->vgid_to_info);
dm_hash_destroy(s->vgname_to_vgid);
- dm_hash_iterate(n, s->device_to_pvid)
- dm_free(dm_hash_get_data(s->device_to_pvid, n));
-
dm_hash_destroy(s->device_to_pvid);
dm_hash_destroy(s->pvid_to_vgid);
}
@@ -81,6 +177,8 @@ static void create_metadata_hashes(lvmetad_state *s)
s->device_to_pvid = dm_hash_create(32);
s->vgid_to_metadata = dm_hash_create(32);
s->vgid_to_vgname = dm_hash_create(32);
+ s->vgid_to_outdated_pvs = dm_hash_create(32);
+ s->vgid_to_info = dm_hash_create(32);
s->pvid_to_vgid = dm_hash_create(32);
s->vgname_to_vgid = dm_hash_create(32);
}
@@ -244,6 +342,30 @@ static int update_pv_status(lvmetad_state *s,
return complete;
}
+static struct dm_config_node *add_last_node(struct dm_config_tree *cft, const char *node_name)
+{
+ struct dm_config_node *cn, *last;
+
+ cn = cft->root;
+ last = cn;
+
+ while (cn->sib) {
+ last = cn->sib;
+ cn = last;
+ }
+
+ cn = dm_config_create_node(cft, node_name);
+ if (!cn)
+ return NULL;
+
+ cn->v = NULL;
+ cn->sib = NULL;
+ cn->parent = cft->root;
+ last->sib = cn;
+
+ return cn;
+}
+
static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid,
struct dm_config_tree *cft,
struct dm_config_node *parent,
@@ -307,6 +429,9 @@ static response pv_list(lvmetad_state *s, request r)
cn = make_pv_node(s, id, res.cft, cn_pvs, cn);
}
+ if (s->flags & GLFL_INVALID)
+ add_last_node(res.cft, "global_invalid");
+
unlock_pvid_to_pvmeta(s);
return res;
@@ -351,6 +476,9 @@ static response pv_lookup(lvmetad_state *s, request r)
pv->key = "physical_volume";
unlock_pvid_to_pvmeta(s);
+ if (s->flags & GLFL_INVALID)
+ add_last_node(res.cft, "global_invalid");
+
return res;
}
@@ -419,14 +547,87 @@ static response vg_list(lvmetad_state *s, request r)
}
unlock_vgid_to_metadata(s);
+
+ if (s->flags & GLFL_INVALID)
+ add_last_node(res.cft, "global_invalid");
bad:
return res;
}
+static void mark_outdated_pv(lvmetad_state *s, const char *vgid, const char *pvid)
+{
+ struct dm_config_tree *pvmeta, *outdated_pvs;
+ struct dm_config_node *list, *cft_vgid;
+ struct dm_config_value *v;
+
+ lock_pvid_to_pvmeta(s);
+ pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+ unlock_pvid_to_pvmeta(s);
+
+ /* if the MDA exists and is used, it will have ignore=0 set */
+ if (!pvmeta ||
+ (dm_config_find_int64(pvmeta->root, "pvmeta/mda0/ignore", 1) &&
+ dm_config_find_int64(pvmeta->root, "pvmeta/mda1/ignore", 1)))
+ return;
+
+ WARN(s, "PV %s has outdated metadata", pvid);
+
+ outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
+ if (!outdated_pvs) {
+ if (!(outdated_pvs = dm_config_from_string("outdated_pvs/pv_list = []")) ||
+ !(cft_vgid = make_text_node(outdated_pvs, "vgid", dm_pool_strdup(outdated_pvs->mem, vgid),
+ outdated_pvs->root, NULL)))
+ abort();
+ if(!dm_hash_insert(s->vgid_to_outdated_pvs, cft_vgid->v->v.str, outdated_pvs))
+ abort();
+ DEBUGLOG(s, "created outdated_pvs list for VG %s", vgid);
+ }
+
+ list = dm_config_find_node(outdated_pvs->root, "outdated_pvs/pv_list");
+ v = list->v;
+ while (v) {
+ if (v->type != DM_CFG_EMPTY_ARRAY && !strcmp(v->v.str, pvid))
+ return;
+ v = v->next;
+ }
+ if (!(v = dm_config_create_value(outdated_pvs)))
+ abort();
+ v->type = DM_CFG_STRING;
+ v->v.str = dm_pool_strdup(outdated_pvs->mem, pvid);
+ v->next = list->v;
+ list->v = v;
+}
+
+static void chain_outdated_pvs(lvmetad_state *s, const char *vgid, struct dm_config_tree *metadata_cft, struct dm_config_node *metadata)
+{
+ struct dm_config_tree *cft = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid), *pvmeta;
+ struct dm_config_node *pv, *res, *out_pvs = cft ? dm_config_find_node(cft->root, "outdated_pvs/pv_list") : NULL;
+ struct dm_config_value *pvs_v = out_pvs ? out_pvs->v : NULL;
+ if (!pvs_v)
+ return;
+ if (!(res = make_config_node(metadata_cft, "outdated_pvs", metadata_cft->root, 0)))
+ return; /* oops */
+ res->sib = metadata->child;
+ metadata->child = res;
+ for (; pvs_v && pvs_v->type != DM_CFG_EMPTY_ARRAY; pvs_v = pvs_v->next) {
+ pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvs_v->v.str);
+ if (!pvmeta) {
+ WARN(s, "metadata for PV %s not found", pvs_v->v.str);
+ continue;
+ }
+ if (!(pv = dm_config_clone_node(metadata_cft, pvmeta->root, 0)))
+ continue;
+ pv->key = dm_config_find_str(pv, "pvmeta/id", NULL);
+ pv->sib = res->child;
+ res->child = pv;
+ }
+}
+
static response vg_lookup(lvmetad_state *s, request r)
{
struct dm_config_tree *cft;
struct dm_config_node *metadata, *n;
+ struct vg_info *info;
response res = { 0 };
const char *uuid = daemon_request_str(r, "uuid", NULL);
@@ -489,6 +690,17 @@ static response vg_lookup(lvmetad_state *s, request r)
unlock_vg(s, uuid);
update_pv_status(s, res.cft, n, 1); /* FIXME report errors */
+ chain_outdated_pvs(s, uuid, res.cft, n);
+
+ if (s->flags & GLFL_INVALID)
+ add_last_node(res.cft, "global_invalid");
+
+ info = dm_hash_lookup(s->vgid_to_info, uuid);
+ if (info && (info->flags & VGFL_INVALID)) {
+ n = add_last_node(res.cft, "vg_invalid");
+ if (!n)
+ goto bad;
+ }
return res;
bad:
@@ -496,65 +708,13 @@ bad:
return reply_fail("out of memory");
}
-/* Test if the doubles are close enough to be considered equal */
-static int close_enough(double d1, double d2)
-{
- return fabs(d1 - d2) < DBL_EPSILON;
-}
-
-static int compare_value(struct dm_config_value *a, struct dm_config_value *b)
-{
- int r = 0;
-
- if (a->type > b->type)
- return 1;
- if (a->type < b->type)
- return -1;
-
- switch (a->type) {
- case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break;
- case DM_CFG_FLOAT: r = close_enough(a->v.f, b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break;
- case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break;
- case DM_CFG_EMPTY_ARRAY: return 0;
- }
-
- if (r == 0 && a->next && b->next)
- r = compare_value(a->next, b->next);
- return r;
-}
-
-static int compare_config(struct dm_config_node *a, struct dm_config_node *b)
-{
- int result = 0;
- if (a->v && b->v)
- result = compare_value(a->v, b->v);
- if (a->v && !b->v)
- result = 1;
- if (!a->v && b->v)
- result = -1;
- if (a->child && b->child)
- result = compare_config(a->child, b->child);
-
- if (result) {
- // DEBUGLOG("config inequality at %s / %s", a->key, b->key);
- return result;
- }
-
- if (a->sib && b->sib)
- result = compare_config(a->sib, b->sib);
- if (a->sib && !b->sib)
- result = 1;
- if (!a->sib && b->sib)
- result = -1;
-
- return result;
-}
-
static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_pvids);
+enum update_pvid_mode { UPDATE_ONLY, REMOVE_EMPTY, MARK_OUTDATED };
+
/* You need to be holding the pvid_to_vgid lock already to call this. */
static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
- const char *vgid, int nuke_empty)
+ const char *vgid, int mode)
{
struct dm_config_node *pv;
struct dm_hash_table *to_check;
@@ -574,11 +734,14 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
continue;
- if (nuke_empty &&
+ if (mode == REMOVE_EMPTY &&
(vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
!dm_hash_insert(to_check, vgid_old, (void*) 1))
goto out;
+ if (mode == MARK_OUTDATED)
+ mark_outdated_pv(s, vgid, pvid);
+
if (!dm_hash_insert(s->pvid_to_vgid, pvid, (void*) vgid))
goto out;
@@ -602,10 +765,11 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
/* A pvid map lock needs to be held if update_pvids = 1. */
static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
{
- struct dm_config_tree *old;
+ struct dm_config_tree *old, *outdated_pvs;
const char *oldname;
lock_vgid_to_metadata(s);
old = dm_hash_lookup(s->vgid_to_metadata, vgid);
+ outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
oldname = dm_hash_lookup(s->vgid_to_vgname, vgid);
if (!old) {
@@ -619,12 +783,15 @@ static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
dm_hash_remove(s->vgid_to_metadata, vgid);
dm_hash_remove(s->vgid_to_vgname, vgid);
dm_hash_remove(s->vgname_to_vgid, oldname);
+ dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
unlock_vgid_to_metadata(s);
if (update_pvids)
/* FIXME: What should happen when update fails */
update_pvid_to_vgid(s, old, "#orphan", 0);
dm_config_destroy(old);
+ if (outdated_pvs)
+ dm_config_destroy(outdated_pvs);
return 1;
}
@@ -668,7 +835,7 @@ static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_p
* this function, so they can be safely destroyed after update_metadata returns
* (anything that might have been retained is copied). */
static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid,
- struct dm_config_node *metadata, int64_t *oldseq)
+ struct dm_config_node *metadata, int64_t *oldseq, const char *pvid)
{
struct dm_config_tree *cft = NULL;
struct dm_config_tree *old;
@@ -717,6 +884,10 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid
if (seq < haveseq) {
DEBUGLOG(s, "Refusing to update metadata for %s (at %d) to %d", _vgid, haveseq, seq);
+
+ if (pvid)
+ mark_outdated_pv(s, dm_config_find_str(old->root, "metadata/id", NULL), pvid);
+
/* TODO: notify the client that their metadata is out of date? */
retval = 1;
goto out;
@@ -739,6 +910,8 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid
if (haveseq >= 0 && haveseq < seq) {
INFO(s, "Updating metadata for %s at %d to %d", _vgid, haveseq, seq);
+ if (oldseq)
+ update_pvid_to_vgid(s, old, vgid, MARK_OUTDATED);
/* temporarily orphan all of our PVs */
update_pvid_to_vgid(s, old, "#orphan", 0);
}
@@ -773,12 +946,46 @@ out: /* FIXME: We should probably abort() on partial failures. */
return retval;
}
+static dev_t device_remove(lvmetad_state *s, struct dm_config_tree *pvmeta, dev_t device)
+{
+ struct dm_config_node *pvmeta_tmp;
+ struct dm_config_value *v = NULL;
+ dev_t alt_device = 0, prim_device = 0;
+
+ if ((pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/devices_alternate")))
+ v = pvmeta_tmp->v;
+
+ prim_device = dm_config_find_int64(pvmeta->root, "pvmeta/device", 0);
+
+ /* it is the primary device */
+ if (device > 0 && device == prim_device && pvmeta_tmp && pvmeta_tmp->v)
+ {
+ alt_device = pvmeta_tmp->v->v.i;
+ pvmeta_tmp->v = pvmeta_tmp->v->next;
+ pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/device");
+ pvmeta_tmp->v->v.i = alt_device;
+ } else if (device != prim_device)
+ alt_device = prim_device;
+
+ /* it is an alternate device */
+ if (device > 0 && v && v->v.i == device)
+ pvmeta_tmp->v = v->next;
+ else while (device > 0 && pvmeta_tmp && v) {
+ if (v->next && v->next->v.i == device)
+ v->next = v->next->next;
+ v = v->next;
+ }
+
+ return alt_device;
+}
+
static response pv_gone(lvmetad_state *s, request r)
{
const char *pvid = daemon_request_str(r, "uuid", NULL);
int64_t device = daemon_request_int(r, "device", 0);
+ int64_t alt_device = 0;
struct dm_config_tree *pvmeta;
- char *pvid_old, *vgid;
+ char *vgid;
DEBUGLOG(s, "pv_gone: %s / %" PRIu64, pvid, device);
@@ -792,15 +999,18 @@ static response pv_gone(lvmetad_state *s, request r)
DEBUGLOG(s, "pv_gone (updated): %s / %" PRIu64, pvid, device);
- pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
- pvid_old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
+ if (!(pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid)))
+ return reply_unknown("PVID does not exist");
vgid = dm_hash_lookup(s->pvid_to_vgid, pvid);
dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device));
- dm_hash_remove(s->pvid_to_pvmeta, pvid);
- unlock_pvid_to_pvmeta(s);
- dm_free(pvid_old);
+ if (!(alt_device = device_remove(s, pvmeta, device)))
+ dm_hash_remove(s->pvid_to_pvmeta, pvid);
+
+ DEBUGLOG(s, "pv_gone alt_device = %" PRIu64, alt_device);
+
+ unlock_pvid_to_pvmeta(s);
if (vgid) {
if (!(vgid = dm_strdup(vgid)))
@@ -812,12 +1022,15 @@ static response pv_gone(lvmetad_state *s, request r)
dm_free(vgid);
}
- if (!pvmeta)
- return reply_unknown("PVID does not exist");
+ if (!alt_device)
+ dm_config_destroy(pvmeta);
- dm_config_destroy(pvmeta);
-
- return daemon_reply_simple("OK", NULL);
+ if (alt_device) {
+ return daemon_reply_simple("OK",
+ "device = %"PRId64, alt_device,
+ NULL);
+ } else
+ return daemon_reply_simple("OK", NULL );
}
static response pv_clear_all(lvmetad_state *s, request r)
@@ -845,11 +1058,11 @@ static response pv_found(lvmetad_state *s, request r)
const char *vgname = daemon_request_str(r, "vgname", NULL);
const char *vgid = daemon_request_str(r, "metadata/id", NULL);
const char *vgid_old = NULL;
- struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta");
+ struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta"), *altdev = NULL;
+ struct dm_config_value *altdev_v;
uint64_t device, device_old_pvid = 0;
struct dm_config_tree *cft, *pvmeta_old_dev = NULL, *pvmeta_old_pvid = NULL;
char *old;
- char *pvid_dup;
int complete = 0, orphan = 0;
int64_t seqno = -1, seqno_old = -1, changed = 0;
@@ -861,12 +1074,8 @@ static response pv_found(lvmetad_state *s, request r)
if (!dm_config_get_uint64(pvmeta, "pvmeta/device", &device))
return reply_fail("need PV device number");
- if (!(cft = dm_config_create()) ||
- (!(pvid_dup = dm_strdup(pvid)))) {
- if (cft)
- dm_config_destroy(cft);
+ if (!(cft = dm_config_create()))
return reply_fail("out of memory");
- }
lock_pvid_to_pvmeta(s);
@@ -875,7 +1084,6 @@ static response pv_found(lvmetad_state *s, request r)
if ((old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)))) {
pvmeta_old_dev = dm_hash_lookup(s->pvid_to_pvmeta, old);
- dm_hash_remove(s->pvid_to_pvmeta, old);
vgid_old = dm_hash_lookup(s->pvid_to_vgid, old);
}
@@ -885,35 +1093,69 @@ static response pv_found(lvmetad_state *s, request r)
if (!(cft->root = dm_config_clone_node(cft, pvmeta, 0)))
goto out_of_mem;
+ pvid = dm_config_find_str(cft->root, "pvmeta/id", NULL);
+
if (!pvmeta_old_pvid || compare_config(pvmeta_old_pvid->root, cft->root))
changed |= 1;
if (pvmeta_old_pvid && device != device_old_pvid) {
- DEBUGLOG(s, "pv %s no longer on device %" PRIu64, pvid, device_old_pvid);
- dm_free(dm_hash_lookup_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid)));
+ DEBUGLOG(s, "PV %s duplicated on device %" PRIu64, pvid, device_old_pvid);
dm_hash_remove_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid));
+ if (!dm_hash_insert_binary(s->device_to_pvid, &device_old_pvid,
+ sizeof(device_old_pvid), (void*)pvid))
+ goto out_of_mem;
+ if ((altdev = dm_config_find_node(pvmeta_old_pvid->root, "pvmeta/devices_alternate"))) {
+ altdev = dm_config_clone_node(cft, altdev, 0);
+ chain_node(altdev, cft->root, 0);
+ } else
+ if (!(altdev = make_config_node(cft, "devices_alternate", cft->root, 0)))
+ goto out_of_mem;
+ altdev_v = altdev->v;
+ while (1) {
+ if (altdev_v && altdev_v->v.i == device_old_pvid)
+ break;
+ if (altdev_v)
+ altdev_v = altdev_v->next;
+ if (!altdev_v) {
+ if (!(altdev_v = dm_config_create_value(cft)))
+ goto out_of_mem;
+ altdev_v->next = altdev->v;
+ altdev->v = altdev_v;
+ altdev->v->v.i = device_old_pvid;
+ break;
+ }
+ };
+ altdev_v = altdev->v;
+ while (altdev_v) {
+ if (altdev_v->next && altdev_v->next->v.i == device)
+ altdev_v->next = altdev_v->next->next;
+ altdev_v = altdev_v->next;
+ }
changed |= 1;
}
if (!dm_hash_insert(s->pvid_to_pvmeta, pvid, cft) ||
- !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid_dup)) {
+ !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid)) {
dm_hash_remove(s->pvid_to_pvmeta, pvid);
out_of_mem:
unlock_pvid_to_pvmeta(s);
dm_config_destroy(cft);
- dm_free(pvid_dup);
dm_free(old);
return reply_fail("out of memory");
}
unlock_pvid_to_pvmeta(s);
- dm_free(old);
-
if (pvmeta_old_pvid)
dm_config_destroy(pvmeta_old_pvid);
- if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid)
- dm_config_destroy(pvmeta_old_dev);
+ if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid) {
+ dev_t d = dm_config_find_int64(pvmeta_old_dev->root, "pvmeta/device", 0);
+ WARN(s, "pv_found: stray device %"PRId64, d);
+ if (!device_remove(s, pvmeta_old_dev, device)) {
+ dm_hash_remove(s->pvid_to_pvmeta, old);
+ dm_config_destroy(pvmeta_old_dev);
+ }
+ }
if (metadata) {
if (!vgid)
@@ -924,7 +1166,7 @@ out_of_mem:
if (daemon_request_int(r, "metadata/seqno", -1) < 0)
return reply_fail("need VG seqno");
- if (!update_metadata(s, vgname, vgid, metadata, &seqno_old))
+ if (!update_metadata(s, vgname, vgid, metadata, &seqno_old, pvid))
return reply_fail("metadata update failed");
changed |= (seqno_old != dm_config_find_int(metadata, "metadata/seqno", -1));
} else {
@@ -972,6 +1214,39 @@ out_of_mem:
NULL);
}
+static response vg_clear_outdated_pvs(lvmetad_state *s, request r)
+{
+ struct dm_config_tree *outdated_pvs;
+ const char *vgid = daemon_request_str(r, "vgid", NULL);
+
+ if (!vgid)
+ return reply_fail("need VG UUID");
+
+ if ((outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid))) {
+ dm_config_destroy(outdated_pvs);
+ dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
+ }
+ return daemon_reply_simple("OK", NULL);
+}
+
+static void vg_info_update(lvmetad_state *s, const char *uuid,
+ struct dm_config_node *metadata)
+{
+ struct vg_info *info;
+ int64_t cache_version;
+
+ cache_version = dm_config_find_int64(metadata, "metadata/seqno", -1);
+ if (cache_version == -1)
+ return;
+
+ info = (struct vg_info *) dm_hash_lookup(s->vgid_to_info, uuid);
+ if (!info)
+ return;
+
+ if (cache_version >= info->external_version)
+ info->flags &= ~VGFL_INVALID;
+}
+
static response vg_update(lvmetad_state *s, request r)
{
struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
@@ -987,8 +1262,10 @@ static response vg_update(lvmetad_state *s, request r)
/* TODO defer metadata update here; add a separate vg_commit
* call; if client does not commit, die */
- if (!update_metadata(s, vgname, vgid, metadata, NULL))
+ if (!update_metadata(s, vgname, vgid, metadata, NULL, NULL))
return reply_fail("metadata update failed");
+
+ vg_info_update(s, vgid, metadata);
}
return daemon_reply_simple("OK", NULL);
}
@@ -1009,6 +1286,71 @@ static response vg_remove(lvmetad_state *s, request r)
return daemon_reply_simple("OK", NULL);
}
+static response set_global_info(lvmetad_state *s, request r)
+{
+ const int global_invalid = daemon_request_int(r, "global_invalid", -1);
+
+ if (global_invalid == 1)
+ s->flags |= GLFL_INVALID;
+
+ else if (global_invalid == 0)
+ s->flags &= ~GLFL_INVALID;
+
+ return daemon_reply_simple("OK", NULL);
+}
+
+static response get_global_info(lvmetad_state *s, request r)
+{
+ return daemon_reply_simple("OK", "global_invalid = %d",
+ (s->flags & GLFL_INVALID) ? 1 : 0,
+ NULL);
+}
+
+static response set_vg_info(lvmetad_state *s, request r)
+{
+ struct dm_config_tree *vg;
+ struct vg_info *info;
+ const char *uuid = daemon_request_str(r, "uuid", NULL);
+ const int64_t new_version = daemon_request_int(r, "version", -1);
+ int64_t cache_version;
+
+ if (!uuid)
+ goto out;
+
+ if (new_version == -1)
+ goto out;
+
+ vg = dm_hash_lookup(s->vgid_to_metadata, uuid);
+ if (!vg)
+ goto out;
+
+ if (!new_version)
+ goto inval;
+
+ cache_version = dm_config_find_int64(vg->root, "metadata/seqno", -1);
+
+ if (cache_version != -1 && new_version != -1 && cache_version >= new_version)
+ goto out;
+inval:
+ info = dm_hash_lookup(s->vgid_to_info, uuid);
+ if (!info) {
+ info = malloc(sizeof(struct vg_info));
+ if (!info)
+ goto bad;
+ memset(info, 0, sizeof(struct vg_info));
+ if (!dm_hash_insert(s->vgid_to_info, uuid, (void*)info))
+ goto bad;
+ }
+
+ info->external_version = new_version;
+ info->flags |= VGFL_INVALID;
+
+out:
+ return daemon_reply_simple("OK", NULL);
+bad:
+ return reply_fail("out of memory");
+}
+
static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr)
{
struct dm_hash_node *n;
@@ -1046,6 +1388,52 @@ static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char
buffer_append(buf, "}\n");
}
+static void _dump_info_version(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+ char *append;
+ struct dm_hash_node *n = dm_hash_get_first(ht);
+ struct vg_info *info;
+
+ buffer_append(buf, name);
+ buffer_append(buf, " {\n");
+
+ while (n) {
+ const char *key = dm_hash_get_key(ht, n);
+ info = dm_hash_get_data(ht, n);
+ buffer_append(buf, " ");
+ (void) dm_asprintf(&append, "%s = %lld", key, (long long)info->external_version);
+ if (append)
+ buffer_append(buf, append);
+ buffer_append(buf, "\n");
+ dm_free(append);
+ n = dm_hash_get_next(ht, n);
+ }
+ buffer_append(buf, "}\n");
+}
+
+static void _dump_info_flags(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+ char *append;
+ struct dm_hash_node *n = dm_hash_get_first(ht);
+ struct vg_info *info;
+
+ buffer_append(buf, name);
+ buffer_append(buf, " {\n");
+
+ while (n) {
+ const char *key = dm_hash_get_key(ht, n);
+ info = dm_hash_get_data(ht, n);
+ buffer_append(buf, " ");
+ (void) dm_asprintf(&append, "%s = %llx", key, (long long)info->flags);
+ if (append)
+ buffer_append(buf, append);
+ buffer_append(buf, "\n");
+ dm_free(append);
+ n = dm_hash_get_next(ht, n);
+ }
+ buffer_append(buf, "}\n");
+}
+
static response dump(lvmetad_state *s)
{
response res = { 0 };
@@ -1068,6 +1456,9 @@ static response dump(lvmetad_state *s)
buffer_append(b, "\n# VGID to VGNAME mapping\n\n");
_dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0);
+ buffer_append(b, "\n# VGID to outdated PVs mapping\n\n");
+ _dump_cft(b, s->vgid_to_outdated_pvs, "outdated_pvs/vgid");
+
buffer_append(b, "\n# VGNAME to VGID mapping\n\n");
_dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0);
@@ -1077,6 +1468,12 @@ static response dump(lvmetad_state *s)
buffer_append(b, "\n# DEVICE to PVID mapping\n\n");
_dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1);
+ buffer_append(b, "\n# VGID to INFO version mapping\n\n");
+ _dump_info_version(b, s->vgid_to_info, "vgid_to_info", 0);
+
+ buffer_append(b, "\n# VGID to INFO flags mapping\n\n");
+ _dump_info_flags(b, s->vgid_to_info, "vgid_to_info", 0);
+
unlock_pvid_to_vgid(s);
unlock_pvid_to_pvmeta(s);
unlock_vgid_to_metadata(s);
@@ -1098,7 +1495,7 @@ static response handler(daemon_state s, client_handle h, request r)
return daemon_reply_simple("OK", NULL);
}
- if (strcmp(token, state->token) && strcmp(rq, "dump")) {
+ if (strcmp(token, state->token) && strcmp(rq, "dump") && strcmp(token, "skip")) {
pthread_mutex_unlock(&state->token_lock);
return daemon_reply_simple("token_mismatch",
"expected = %s", state->token,
@@ -1127,6 +1524,9 @@ static response handler(daemon_state s, client_handle h, request r)
if (!strcmp(rq, "vg_update"))
return vg_update(state, r);
+ if (!strcmp(rq, "vg_clear_outdated_pvs"))
+ return vg_clear_outdated_pvs(state, r);
+
if (!strcmp(rq, "vg_remove"))
return vg_remove(state, r);
@@ -1139,6 +1539,15 @@ static response handler(daemon_state s, client_handle h, request r)
if (!strcmp(rq, "vg_list"))
return vg_list(state, r);
+ if (!strcmp(rq, "set_global_info"))
+ return set_global_info(state, r);
+
+ if (!strcmp(rq, "get_global_info"))
+ return get_global_info(state, r);
+
+ if (!strcmp(rq, "set_vg_info"))
+ return set_vg_info(state, r);
+
if (!strcmp(rq, "dump"))
return dump(state);
diff --git a/daemons/lvmetad/testclient.c b/daemons/lvmetad/testclient.c
index 8ea068d09..59aedc690 100644
--- a/daemons/lvmetad/testclient.c
+++ b/daemons/lvmetad/testclient.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2011-2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include "tool.h"
+
#include "lvmetad-client.h"
#include "label.h"
#include "lvmcache.h"
@@ -109,7 +124,7 @@ int main(int argc, char **argv) {
if (argc > 1) {
int i;
- struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0);
+ struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0, 1, 1);
for (i = 1; i < argc; ++i) {
const char *uuid = NULL;
scan(h, argv[i]);
diff --git a/daemons/lvmlockd/.gitignore b/daemons/lvmlockd/.gitignore
new file mode 100644
index 000000000..6ba6109ff
--- /dev/null
+++ b/daemons/lvmlockd/.gitignore
@@ -0,0 +1,2 @@
+lvmlockctl
+lvmlockd
diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in
new file mode 100644
index 000000000..a2aca301b
--- /dev/null
+++ b/daemons/lvmlockd/Makefile.in
@@ -0,0 +1,66 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmlockd-core.c
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+ SOURCES += lvmlockd-sanlock.c
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+ SOURCES += lvmlockd-dlm.c
+endif
+
+TARGETS = lvmlockd lvmlockctl
+
+.PHONY: install_lvmlockd
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+ LIBS += -lsanlock_client
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+ LIBS += -ldlm_lt
+endif
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+
+lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmlockctl: lvmlockctl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmlockctl.o $(LVMLIBS)
+
+install_lvmlockd: lvmlockd
+ $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvmlockctl: lvmlockctl
+ $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmlockd install_lvmlockctl
+
+install: install_lvm2
diff --git a/daemons/lvmlockd/lvmlockctl.c b/daemons/lvmlockd/lvmlockctl.c
new file mode 100644
index 000000000..14be4fc4c
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockctl.c
@@ -0,0 +1,751 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmlockd-client.h"
+
+#include <stddef.h>
+#include <getopt.h>
+#include <signal.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syslog.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+static int quit = 0;
+static int info = 0;
+static int dump = 0;
+static int wait_opt = 0;
+static int force_opt = 0;
+static int kill_vg = 0;
+static int drop_vg = 0;
+static int gl_enable = 0;
+static int gl_disable = 0;
+static int stop_lockspaces = 0;
+static char *arg_vg_name = NULL;
+
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE+1];
+static int dump_len;
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+daemon_handle _lvmlockd;
+
+#define log_error(fmt, args...) \
+do { \
+ printf(fmt "\n", ##args); \
+} while (0)
+
+#define MAX_LINE 512
+
+/* copied from lvmlockd-internal.h */
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+/*
+ * lvmlockd dumps the client info before the lockspaces,
+ * so we can look up client info when printing lockspace info.
+ */
+
+#define MAX_CLIENTS 100
+
+struct client_info {
+ uint32_t client_id;
+ int pid;
+ char name[MAX_NAME+1];
+};
+
+static struct client_info clients[MAX_CLIENTS];
+static int num_clients;
+
+static void save_client_info(char *line)
+{
+ uint32_t pid = 0;
+ int fd = 0;
+ int pi = 0;
+ uint32_t client_id = 0;
+ char name[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=client pid=%u fd=%d pi=%d id=%u name=%s",
+ &pid, &fd, &pi, &client_id, name);
+
+ clients[num_clients].client_id = client_id;
+ clients[num_clients].pid = pid;
+ strcpy(clients[num_clients].name, name);
+ num_clients++;
+}
+
+static void find_client_info(uint32_t client_id, uint32_t *pid, char *cl_name)
+{
+ int i;
+
+ for (i = 0; i < num_clients; i++) {
+ if (clients[i].client_id == client_id) {
+ *pid = clients[i].pid;
+ strcpy(cl_name, clients[i].name);
+ return;
+ }
+ }
+}
+
+static int first_ls = 1;
+
+static void format_info_ls(char *line)
+{
+ char ls_name[MAX_NAME+1] = { 0 };
+ char vg_name[MAX_NAME+1] = { 0 };
+ char vg_uuid[MAX_NAME+1] = { 0 };
+ char vg_sysid[MAX_NAME+1] = { 0 };
+ char lock_args[MAX_ARGS+1] = { 0 };
+ char lock_type[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=ls ls_name=%s vg_name=%s vg_uuid=%s vg_sysid=%s vg_args=%s lm_type=%s",
+ ls_name, vg_name, vg_uuid, vg_sysid, lock_args, lock_type);
+
+ if (!first_ls)
+ printf("\n");
+ first_ls = 0;
+
+ printf("VG %s lock_type=%s %s\n", vg_name, lock_type, vg_uuid);
+
+ printf("LS %s %s\n", lock_type, ls_name);
+}
+
+static void format_info_ls_action(char *line)
+{
+ uint32_t client_id = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ char version[MAX_NAME+1] = { 0 };
+ char op[MAX_NAME+1] = { 0 };
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=ls_action client_id=%u %s %s op=%s",
+ &client_id, flags, version, op);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ printf("OP %s pid %u (%s)\n", op, pid, cl_name);
+}
+
+static void format_info_r(char *line, char *r_name_out, char *r_type_out)
+{
+ char r_name[MAX_NAME+1] = { 0 };
+ char r_type[4] = { 0 };
+ char mode[4] = { 0 };
+ char sh_count[MAX_NAME+1] = { 0 };
+ uint32_t ver = 0;
+
+ sscanf(line, "info=r name=%s type=%s mode=%s %s version=%u",
+ r_name, r_type, mode, sh_count, &ver);
+
+ strcpy(r_name_out, r_name);
+ strcpy(r_type_out, r_type);
+
+ /* when mode is not un, wait and print each lk line */
+ if (strcmp(mode, "un"))
+ return;
+
+ /* when mode is un, there will be no lk lines, so print now */
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LK GL un ver %u\n", ver);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LK VG un ver %u\n", ver);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LK LV un %s\n", r_name);
+ }
+}
+
+static void format_info_lk(char *line, char *r_name, char *r_type)
+{
+ char mode[4] = { 0 };
+ uint32_t ver = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ uint32_t client_id = 0;
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ if (!r_name[0] || !r_type[0]) {
+ printf("format_info_lk error r_name %s r_type %s\n", r_name, r_type);
+ printf("%s\n", line);
+ return;
+ }
+
+ sscanf(line, "info=lk mode=%s version=%u %s client_id=%u",
+ mode, &ver, flags, &client_id);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LK GL %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LK VG %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LK LV %s %s\n", mode, r_name);
+ }
+}
+
+static void format_info_r_action(char *line, char *r_name, char *r_type)
+{
+ uint32_t client_id = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ char version[MAX_NAME+1] = { 0 };
+ char op[MAX_NAME+1] = { 0 };
+ char rt[4] = { 0 };
+ char mode[4] = { 0 };
+ char lm[MAX_NAME+1] = { 0 };
+ char result[MAX_NAME+1] = { 0 };
+ char lm_rv[MAX_NAME+1] = { 0 };
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ if (!r_name[0] || !r_type[0]) {
+ printf("format_info_r_action error r_name %s r_type %s\n", r_name, r_type);
+ printf("%s\n", line);
+ return;
+ }
+
+ sscanf(line, "info=r_action client_id=%u %s %s op=%s rt=%s mode=%s %s %s %s",
+ &client_id, flags, version, op, rt, mode, lm, result, lm_rv);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ if (strcmp(op, "lock")) {
+ printf("OP %s pid %u (%s)\n", op, pid, cl_name);
+ return;
+ }
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LW GL %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LW VG %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LW LV %s %s\n", mode, r_name);
+ }
+}
+
+static void format_info_line(char *line, char *r_name, char *r_type)
+{
+ if (!strncmp(line, "info=structs ", strlen("info=structs "))) {
+ /* only print this in the raw info dump */
+
+ } else if (!strncmp(line, "info=client ", strlen("info=client "))) {
+ save_client_info(line);
+
+ } else if (!strncmp(line, "info=ls ", strlen("info=ls "))) {
+ format_info_ls(line);
+
+ } else if (!strncmp(line, "info=ls_action ", strlen("info=ls_action "))) {
+ format_info_ls_action(line);
+
+ } else if (!strncmp(line, "info=r ", strlen("info=r "))) {
+ /*
+ * r_name/r_type are reset when a new resource is found.
+ * They are reused for the lock and action lines that
+ * follow a resource line.
+ */
+ memset(r_name, 0, MAX_NAME+1);
+ memset(r_type, 0, MAX_NAME+1);
+ format_info_r(line, r_name, r_type);
+
+ } else if (!strncmp(line, "info=lk ", strlen("info=lk "))) {
+ /* will use info from previous r */
+ format_info_lk(line, r_name, r_type);
+
+ } else if (!strncmp(line, "info=r_action ", strlen("info=r_action "))) {
+ /* will use info from previous r */
+ format_info_r_action(line, r_name, r_type);
+ } else {
+ printf("UN %s\n", line);
+ }
+}
+
+static void format_info(void)
+{
+ char line[MAX_LINE];
+ char r_name[MAX_NAME+1];
+ char r_type[MAX_NAME+1];
+ int i, j;
+
+ j = 0;
+ memset(line, 0, sizeof(line));
+
+ for (i = 0; i < dump_len; i++) {
+ line[j++] = dump_buf[i];
+
+ if ((line[j-1] == '\n') || (line[j-1] == '\0')) {
+ format_info_line(line, r_name, r_type);
+ j = 0;
+ memset(line, 0, sizeof(line));
+ }
+ }
+}
+
+
+static daemon_reply _lvmlockd_send(const char *req_name, ...)
+{
+ va_list ap;
+ daemon_reply repl;
+ daemon_request req;
+
+ req = daemon_request_make(req_name);
+
+ va_start(ap, req_name);
+ daemon_request_extend_v(req, ap);
+ va_end(ap);
+
+ repl = daemon_send(_lvmlockd, req);
+
+ daemon_request_destroy(req);
+
+ return repl;
+}
+
+/* See the same in lib/locking/lvmlockd.c */
+#define NO_LOCKD_RESULT -1000
+
+static int _lvmlockd_result(daemon_reply reply, int *result)
+{
+ int reply_result;
+
+ if (reply.error) {
+ log_error("lvmlockd_result reply error %d", reply.error);
+ return 0;
+ }
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("lvmlockd_result bad response");
+ return 0;
+ }
+
+ reply_result = daemon_reply_int(reply, "op_result", NO_LOCKD_RESULT);
+ if (reply_result == -1000) {
+ log_error("lvmlockd_result no op_result");
+ return 0;
+ }
+
+ *result = reply_result;
+
+ return 1;
+}
+
+static int do_quit(void)
+{
+ daemon_reply reply;
+ int rv = 0;
+
+ reply = daemon_send_simple(_lvmlockd, "quit", NULL);
+
+ if (reply.error) {
+ log_error("reply error %d", reply.error);
+ rv = reply.error;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static int setup_dump_socket(void)
+{
+ int s, rv;
+
+ s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (s < 0)
+ return s;
+
+ memset(&dump_addr, 0, sizeof(dump_addr));
+ dump_addr.sun_family = AF_LOCAL;
+ strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+ dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+ rv = bind(s, (struct sockaddr *) &dump_addr, dump_addrlen);
+ if (rv < 0) {
+ rv = -errno;
+ if (!close(s))
+ log_error("failed to close dump socket");
+ return rv;
+ }
+
+ return s;
+}
+
+static int do_dump(const char *req_name)
+{
+ daemon_reply reply;
+ int result;
+ int fd, rv = 0;
+ int count = 0;
+
+ fd = setup_dump_socket();
+ if (fd < 0) {
+ log_error("socket error %d", fd);
+ return fd;
+ }
+
+ reply = daemon_send_simple(_lvmlockd, req_name, NULL);
+
+ if (reply.error) {
+ log_error("reply error %d", reply.error);
+ rv = reply.error;
+ goto out;
+ }
+
+ result = daemon_reply_int(reply, "result", 0);
+ dump_len = daemon_reply_int(reply, "dump_len", 0);
+
+ daemon_reply_destroy(reply);
+
+ if (result < 0) {
+ rv = result;
+ log_error("result %d", result);
+ }
+
+ if (!dump_len)
+ goto out;
+
+ memset(dump_buf, 0, sizeof(dump_buf));
+
+retry:
+ rv = recvfrom(fd, dump_buf + count, dump_len - count, MSG_WAITALL,
+ (struct sockaddr *)&dump_addr, &dump_addrlen);
+ if (rv < 0) {
+ log_error("recvfrom error %d %d", rv, errno);
+ rv = -errno;
+ goto out;
+ }
+ count += rv;
+
+ if (count < dump_len)
+ goto retry;
+
+ rv = 0;
+ if ((info && dump) || !strcmp(req_name, "dump"))
+ printf("%s\n", dump_buf);
+ else
+ format_info();
+out:
+ if (close(fd))
+ log_error("failed to close dump socket %d", fd);
+ return rv;
+}
+
+static int do_able(const char *req_name)
+{
+ daemon_reply reply;
+ int result;
+ int rv;
+
+ reply = _lvmlockd_send(req_name,
+ "cmd = %s", "lvmlockctl",
+ "pid = %d", getpid(),
+ "vg_name = %s", arg_vg_name,
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static int do_stop_lockspaces(void)
+{
+ daemon_reply reply;
+ char opts[32];
+ int result;
+ int rv;
+
+ memset(opts, 0, sizeof(opts));
+
+ if (wait_opt)
+ strcat(opts, "wait ");
+ if (force_opt)
+ strcat(opts, "force ");
+
+ reply = _lvmlockd_send("stop_all",
+ "cmd = %s", "lvmlockctl",
+ "pid = %d", getpid(),
+ "opts = %s", opts[0] ? opts : "none",
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static int do_kill(void)
+{
+ daemon_reply reply;
+ int result;
+ int rv;
+
+ syslog(LOG_EMERG, "Lost access to sanlock lease storage in VG %s.", arg_vg_name);
+ /* These two lines explain the manual alternative to the FIXME below. */
+ syslog(LOG_EMERG, "Immediately deactivate LVs in VG %s.", arg_vg_name);
+ syslog(LOG_EMERG, "Once VG is unused, run lvmlockctl --drop %s.", arg_vg_name);
+
+ /*
+ * It may not be strictly necessary to notify lvmlockd of the kill, but
+ * lvmlockd can use this information to avoid attempting any new lock
+ * requests in the VG (which would fail anyway), and can return an
+ * error indicating that the VG has been killed.
+ */
+
+ reply = _lvmlockd_send("kill_vg",
+ "cmd = %s", "lvmlockctl",
+ "pid = %d", getpid(),
+ "vg_name = %s", arg_vg_name,
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+
+ /*
+ * FIXME: here is where we should implement a strong form of
+ * blkdeactivate, and if it completes successfully, automatically call
+ * do_drop() afterward. (The drop step may not always be necessary
+ * if the lvm commands run while shutting things down release all the
+ * leases.)
+ *
+ * run_strong_blkdeactivate();
+ * do_drop();
+ */
+
+ return rv;
+}
+
+static int do_drop(void)
+{
+ daemon_reply reply;
+ int result;
+ int rv;
+
+ syslog(LOG_WARNING, "Dropping locks for VG %s.", arg_vg_name);
+
+ /*
+ * Check for misuse by looking for any active LVs in the VG
+ * and refusing this operation if found? One possible way
+ * to kill LVs (e.g. if fs cannot be unmounted) is to suspend
+ * them, or replace them with the error target. In that
+ * case the LV will still appear to be active, but it is
+ * safe to release the lock.
+ */
+
+ reply = _lvmlockd_send("drop_vg",
+ "cmd = %s", "lvmlockctl",
+ "pid = %d", getpid(),
+ "vg_name = %s", arg_vg_name,
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static void print_usage(void)
+{
+ printf("lvmlockctl options\n");
+ printf("Options:\n");
+ printf("--help | -h\n");
+ printf(" Show this help information.\n");
+ printf("--quit | -q\n");
+ printf(" Tell lvmlockd to quit.\n");
+ printf("--info | -i\n");
+ printf(" Print lock state information from lvmlockd.\n");
+ printf("--dump | -d\n");
+ printf(" Print log buffer from lvmlockd.\n");
+ printf("--wait | -w 0|1\n");
+ printf(" Wait option for other commands.\n");
+ printf("--force | -f 0|1>\n");
+ printf(" Force option for other commands.\n");
+ printf("--kill | -k <vg_name>\n");
+ printf(" Kill access to the vg when sanlock cannot renew lease.\n");
+ printf("--drop | -r <vg_name>\n");
+ printf(" Clear locks for the vg after it has been killed and is no longer used.\n");
+ printf("--gl-enable <vg_name>\n");
+ printf(" Tell lvmlockd to enable the global lock in a sanlock vg.\n");
+ printf("--gl-disable <vg_name>\n");
+ printf(" Tell lvmlockd to disable the global lock in a sanlock vg.\n");
+ printf("--stop-lockspaces | -S\n");
+ printf(" Stop all lockspaces.\n");
+}
+
+static int read_options(int argc, char *argv[])
+{
+ int option_index = 0;
+ int c;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"quit", no_argument, 0, 'q' },
+ {"info", no_argument, 0, 'i' },
+ {"dump", no_argument, 0, 'd' },
+ {"wait", required_argument, 0, 'w' },
+ {"force", required_argument, 0, 'f' },
+ {"kill", required_argument, 0, 'k' },
+ {"drop", required_argument, 0, 'r' },
+ {"gl-enable", required_argument, 0, 'E' },
+ {"gl-disable", required_argument, 0, 'D' },
+ {"stop-lockspaces", no_argument, 0, 'S' },
+ {0, 0, 0, 0 }
+ };
+
+ if (argc == 1) {
+ print_usage();
+ exit(0);
+ }
+
+ while (1) {
+ c = getopt_long(argc, argv, "hqidE:D:w:k:r:S", long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'h':
+ /* --help */
+ print_usage();
+ exit(0);
+ case 'q':
+ /* --quit */
+ quit = 1;
+ break;
+ case 'i':
+ /* --info */
+ info = 1;
+ break;
+ case 'd':
+ /* --dump */
+ dump = 1;
+ break;
+ case 'w':
+ wait_opt = atoi(optarg);
+ break;
+ case 'k':
+ kill_vg = 1;
+ arg_vg_name = strdup(optarg);
+ break;
+ case 'r':
+ drop_vg = 1;
+ arg_vg_name = strdup(optarg);
+ break;
+ case 'E':
+ gl_enable = 1;
+ arg_vg_name = strdup(optarg);
+ break;
+ case 'D':
+ gl_disable = 1;
+ arg_vg_name = strdup(optarg);
+ break;
+ case 'S':
+ stop_lockspaces = 1;
+ break;
+ default:
+ print_usage();
+ exit(1);
+ }
+ }
+
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int rv = 0;
+
+ rv = read_options(argc, argv);
+ if (rv < 0)
+ return rv;
+
+ _lvmlockd = lvmlockd_open(NULL);
+
+ if (_lvmlockd.socket_fd < 0 || _lvmlockd.error) {
+ log_error("Cannot connect to lvmlockd.");
+ return -1;
+ }
+
+ if (quit) {
+ rv = do_quit();
+ goto out;
+ }
+
+ if (info) {
+ rv = do_dump("info");
+ goto out;
+ }
+
+ if (dump) {
+ rv = do_dump("dump");
+ goto out;
+ }
+
+ if (kill_vg) {
+ rv = do_kill();
+ goto out;
+ }
+
+ if (drop_vg) {
+ rv = do_drop();
+ goto out;
+ }
+
+ if (gl_enable) {
+ rv = do_able("enable_gl");
+ goto out;
+ }
+
+ if (gl_disable) {
+ rv = do_able("disable_gl");
+ goto out;
+ }
+
+ if (stop_lockspaces) {
+ rv = do_stop_lockspaces();
+ goto out;
+ }
+
+out:
+ lvmlockd_close(_lvmlockd);
+ return rv;
+}
diff --git a/daemons/lvmlockd/lvmlockd-client.h b/daemons/lvmlockd/lvmlockd-client.h
new file mode 100644
index 000000000..0a1424f5e
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-client.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_CLIENT_H
+#define _LVM_LVMLOCKD_CLIENT_H
+
+#include "daemon-client.h"
+
+#define LVMLOCKD_SOCKET DEFAULT_RUN_DIR "/lvmlockd.socket"
+
+/* Wrappers to open/close connection */
+
+static inline daemon_handle lvmlockd_open(const char *sock)
+{
+ daemon_info lvmlockd_info = {
+ .path = "lvmlockd",
+ .socket = sock ?: LVMLOCKD_SOCKET,
+ .protocol = "lvmlockd",
+ .protocol_version = 1,
+ .autostart = 0
+ };
+
+ return daemon_open(lvmlockd_info);
+}
+
+static inline void lvmlockd_close(daemon_handle h)
+{
+ return daemon_close(h);
+}
+
+/*
+ * Errors returned as the lvmlockd result value.
+ */
+#define ENOLS 210 /* lockspace not found */
+#define ESTARTING 211 /* lockspace is starting */
+#define EARGS 212
+#define EHOSTID 213
+#define EMANAGER 214
+#define EPREPARE 215
+#define ELOCKD 216
+#define EVGKILLED 217 /* sanlock lost access to leases and VG is killed. */
+#define ELOCKIO 218 /* sanlock io errors during lock op, may be transient. */
+
+#endif /* _LVM_LVMLOCKD_CLIENT_H */
diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c
new file mode 100644
index 000000000..b99cb0bba
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-core.c
@@ -0,0 +1,5896 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+#define _REENTRANT
+
+#include "tool.h"
+
+#include "daemon-io.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "lvm-version.h"
+#include "lvmetad-client.h"
+#include "lvmlockd-client.h"
+
+/* #include <assert.h> */
+#include <errno.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <poll.h>
+#include <signal.h>
+#include <getopt.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/utsname.h>
+#include <sys/un.h>
+
+#define EXTERN
+#include "lvmlockd-internal.h"
+
+/*
+ * Basic operation of lvmlockd
+ *
+ * lvmlockd main process runs main_loop() which uses poll().
+ * poll listens for new connections from lvm commands and for
+ * messages from existing connected lvm commands.
+ *
+ * lvm command starts and connects to lvmlockd.
+ *
+ * lvmlockd receives a connection request from command and adds a
+ * 'struct client' to keep track of the connection to the command.
+ * The client's fd is added to the set of fd's in poll().
+ *
+ * lvm command sends a lock request to lvmlockd. The lock request
+ * can be for the global lock, a vg lock, or an lv lock.
+ *
+ * lvmlockd main_loop/poll sees a message from an existing client.
+ * It sets client.recv = 1, then wakes up client_thread_main.
+ *
+ * client_thread_main iterates through client structs (cl), looking
+ * for any that need processing, finds the one with cl->recv set,
+ * and calls client_recv_action(cl).
+ *
+ * client_recv_action(cl) reads the message/request from the client,
+ * allocates a new 'struct action' (act) to represent the request,
+ * sets the act with what is found in the request, then looks at
+ * the specific operation in act->op (LD_OP_FOO) to decide what to
+ * do with the action:
+ *
+ * . If the action is to start a lockspace, create a new thread
+ * to manage that lockspace: add_lockspace(act).
+ *
+ * . If the action is a lock request, pass the act to the thread
+ * that is managing that lockspace: add_lock_action(act).
+ *
+ * . Other misc actions are are passed to the worker_thread:
+ * add_work_action(act).
+ *
+ * Onec the client_thread has passed the action off to another
+ * thread to process, it goes back to waiting for more client
+ * handling work to do.
+ *
+ * The thread that was given the action by the client_thread
+ * now processes that action according to the operation, act->op.
+ * This is either a lockspace_thread (for lock ops or ops that
+ * add/rem a lockspace), or the worker_thread. See below for
+ * how these ops are processed by these threads. When the
+ * given thread is done processing the action, the result is
+ * set in act->result, and the act struct for the completed action
+ * is passed back to the client_thread (client_results list).
+ *
+ * The client_thread takes completed actions (from client_results
+ * list), and sends the result back to the client that sent the
+ * request represented by the action. The act struct is then freed.
+ *
+ * This completes the cycle of work between lvm commands (clients)
+ * and lvmlockd. In summary:
+ *
+ * - main process polls for new client connections and new requests
+ * from lvm commands
+ * - client_thread reads requests from clients
+ * - client_thread creates an action struct for each request
+ * - client_thread passes the act to another thread for processing
+ * - other threads pass completed act structs back to client_thread
+ * - client_thread sends the act result back to the client and frees the act
+ *
+ *
+ * Lockspace threads:
+ * Each lockd VG has its own lockspace that contains locks for that VG.
+ * Each 'struct lockspace' is managed by a separate lockspace_thread.
+ * When the lockspace_thread is first created, the first thing it does
+ * is join the lockspace in the lock manager. This can take a long time.
+ * If the join fails, the thread exits. After the join, the thread
+ * enters a loop waiting for lock actions to perform in the lockspace.
+ *
+ * The request to remove/leave a lockspace causes a flag to be set in
+ * the lockspace struct. When the lockspace_thread sees this flag
+ * set, it leaves the lockspace, and exits.
+ *
+ * When the client_thread passes a new action to a lockspace_thread,
+ * i.e. a new lock request, the lockspace_thread identifies which resource
+ * is being locked (GL, VG, LV), and gets the 'struct resource' (r) for it.
+ * r->type will be LD_RT_GL, LD_RT_VG, or LD_RT_LV. r->name is the
+ * resource name, and is fixed for GL and VG resources, but is based on
+ * the LV name for LV resources. The act is added to the resource's
+ * list of actions: r->actions, i.e. outstanding lock requests on the
+ * resource.
+ *
+ * The lockspace thread then iterates through each resource in the
+ * lockspace, processing any outstanding actions on each: res_process(ls, r).
+ *
+ * res_process() compares the outstanding actions/requests in r->actions
+ * against any existing locks on the resource in r->locks. If the
+ * action is blocked by existing locks, it's left on r->actions. If not,
+ * the action/request is passed to the lock manager. If the result from
+ * the lock manager is success, a new 'struct lock' is created for the
+ * action and saved on r->locks. The result is set in act->result and
+ * the act is passed back to the client_thread to be returned to the client.
+ */
+
+static const char *lvmlockd_protocol = "lvmlockd";
+static const int lvmlockd_protocol_version = 1;
+static int daemon_quit;
+static int adopt_opt;
+
+static daemon_handle lvmetad_handle;
+static pthread_mutex_t lvmetad_mutex;
+static int lvmetad_connected;
+
+/*
+ * We use a separate socket for dumping daemon info.
+ * This will not interfere with normal operations, and allows
+ * free-form debug data to be dumped instead of the libdaemon
+ * protocol that wants all data in the cft format.
+ * 1MB should fit all the info we need to dump.
+ */
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE];
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+/*
+ * Main program polls client connections, adds new clients,
+ * adds work for client thread.
+ *
+ * pollfd_mutex is used for adding vs removing entries,
+ * and for resume vs realloc.
+ */
+#define POLL_FD_UNUSED -1 /* slot if free */
+#define POLL_FD_IGNORE -2 /* slot is used but ignore in poll */
+#define ADD_POLL_SIZE 16 /* increment slots by this amount */
+
+static pthread_mutex_t pollfd_mutex;
+static struct pollfd *pollfd;
+static int pollfd_size;
+static int pollfd_maxi;
+static int listen_pi;
+static int listen_fd;
+static int restart_pi;
+static int restart_fds[2];
+
+/*
+ * Each lockspace has its own thread to do locking.
+ * The lockspace thread makes synchronous lock requests to dlm/sanlock.
+ * Every vg with a lockd type, i.e. "dlm", "sanlock", should be on this list.
+ *
+ * lockspaces_inactive holds old ls structs for vgs that have been
+ * stopped, or for vgs that failed to start. The old ls structs
+ * are removed from the inactive list and freed when a new ls with
+ * the same name is started and added to the standard lockspaces list.
+ * Keeping this bit of "history" for the ls allows us to return a
+ * more informative error message if a vg lock request is made for
+ * an ls that has been stopped or failed to start.
+ */
+static pthread_mutex_t lockspaces_mutex;
+static struct list_head lockspaces;
+static struct list_head lockspaces_inactive;
+
+/*
+ * Client thread reads client requests and writes client results.
+ */
+static pthread_t client_thread;
+static pthread_mutex_t client_mutex;
+static pthread_cond_t client_cond;
+static struct list_head client_list; /* connected clients */
+static struct list_head client_results; /* actions to send back to clients */
+static uint32_t client_ids; /* 0 and ADOPT_CLIENT_ID are skipped */
+static int client_stop; /* stop the thread */
+static int client_work; /* a client on client_list has work to do */
+
+#define ADOPT_CLIENT_ID 0xFFFFFFFF /* special client_id for adopt actions */
+static struct list_head adopt_results; /* special start actions from adopt_locks() */
+
+/*
+ * Worker thread performs misc non-locking actions, e.g. init/free.
+ */
+static pthread_t worker_thread;
+static pthread_mutex_t worker_mutex;
+static pthread_cond_t worker_cond;
+static struct list_head worker_list; /* actions for worker_thread */
+static int worker_stop; /* stop the thread */
+static int worker_wake; /* wake the thread without adding work */
+
+/*
+ * The content of every log_foo() statement is saved in the
+ * circular buffer, which can be dumped to a client and printed.
+ */
+#define LOG_LINE_SIZE 256
+#define LOG_DUMP_SIZE DUMP_BUF_SIZE
+#define LOG_SYSLOG_PRIO LOG_WARNING
+static char log_dump[LOG_DUMP_SIZE];
+static unsigned int log_point;
+static unsigned int log_wrap;
+static pthread_mutex_t log_mutex;
+static int syslog_priority = LOG_SYSLOG_PRIO;
+
+/*
+ * Structure pools to avoid repeated malloc/free.
+ */
+#define MAX_UNUSED_ACTION 64
+#define MAX_UNUSED_CLIENT 64
+#define MAX_UNUSED_RESOURCE 64
+#define MAX_UNUSED_LOCK 64
+static pthread_mutex_t unused_struct_mutex;
+static struct list_head unused_action;
+static struct list_head unused_client;
+static struct list_head unused_resource;
+static struct list_head unused_lock;
+static int unused_action_count;
+static int unused_client_count;
+static int unused_resource_count;
+static int unused_lock_count;
+static int resource_lm_data_size; /* max size of lm_data from sanlock|dlm */
+static int alloc_new_structs; /* used for initializing in setup_structs */
+
+#define DO_STOP 1
+#define NO_STOP 0
+#define DO_FREE 1
+#define NO_FREE 0
+#define DO_FORCE 1
+#define NO_FORCE 0
+
+static int add_lock_action(struct action *act);
+static int str_to_lm(const char *str);
+static int clear_lockspace_inactive(char *name);
+static int setup_dump_socket(void);
+static void send_dump_buf(int fd, int dump_len);
+static int dump_info(int *dump_len);
+static int dump_log(int *dump_len);
+
+static int _syslog_name_to_num(const char *name)
+{
+ if (!strcmp(name, "emerg"))
+ return LOG_EMERG;
+ if (!strcmp(name, "alert"))
+ return LOG_ALERT;
+ if (!strcmp(name, "crit"))
+ return LOG_CRIT;
+ if (!strcmp(name, "err") || !strcmp(name, "error"))
+ return LOG_ERR;
+ if (!strcmp(name, "warning") || !strcmp(name, "warn"))
+ return LOG_WARNING;
+ if (!strcmp(name, "notice"))
+ return LOG_NOTICE;
+ if (!strcmp(name, "info"))
+ return LOG_INFO;
+ if (!strcmp(name, "debug"))
+ return LOG_DEBUG;
+ return LOG_WARNING;
+}
+
+static const char *_syslog_num_to_name(int num)
+{
+ switch (num) {
+ case LOG_EMERG:
+ return "emerg";
+ case LOG_ALERT:
+ return "alert";
+ case LOG_CRIT:
+ return "crit";
+ case LOG_ERR:
+ return "err";
+ case LOG_WARNING:
+ return "warning";
+ case LOG_NOTICE:
+ return "notice";
+ case LOG_INFO:
+ return "info";
+ case LOG_DEBUG:
+ return "debug";
+ }
+ return "unknown";
+}
+
+static uint64_t monotime(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec;
+}
+
+static void log_save_line(int len, char *line,
+ char *log_buf, unsigned int *point, unsigned int *wrap)
+{
+ unsigned int p = *point;
+ unsigned int w = *wrap;
+ int i;
+
+ if (len < LOG_DUMP_SIZE - p) {
+ memcpy(log_buf + p, line, len);
+ p += len;
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ goto out;
+ }
+
+ for (i = 0; i < len; i++) {
+ log_buf[p++] = line[i];
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ }
+ out:
+ *point = p;
+ *wrap = w;
+}
+
+void log_level(int level, const char *fmt, ...)
+{
+ char line[LOG_LINE_SIZE];
+ va_list ap;
+ int len = LOG_LINE_SIZE - 1;
+ int ret, pos = 0;
+
+ memset(line, 0, sizeof(line));
+
+ ret = snprintf(line, len, "%llu ", (unsigned long long)time(NULL));
+ pos += ret;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(line + pos, len - pos, fmt, ap);
+ va_end(ap);
+
+ if (ret >= len - pos)
+ pos = len - 1;
+ else
+ pos += ret;
+
+ line[pos++] = '\n';
+ line[pos++] = '\0';
+
+ pthread_mutex_lock(&log_mutex);
+ log_save_line(pos - 1, line, log_dump, &log_point, &log_wrap);
+ pthread_mutex_unlock(&log_mutex);
+
+ if (level <= syslog_priority)
+ syslog(level, "%s", line);
+
+ if (daemon_debug)
+ fprintf(stderr, "%s", line);
+}
+
+static int dump_log(int *dump_len)
+{
+ int tail_len;
+
+ pthread_mutex_lock(&log_mutex);
+
+ if (!log_wrap && !log_point) {
+ *dump_len = 0;
+ } else if (log_wrap) {
+ tail_len = LOG_DUMP_SIZE - log_point;
+ memcpy(dump_buf, log_dump+log_point, tail_len);
+ if (log_point)
+ memcpy(dump_buf+tail_len, log_dump, log_point);
+ *dump_len = LOG_DUMP_SIZE;
+ } else {
+ memcpy(dump_buf, log_dump, log_point-1);
+ *dump_len = log_point-1;
+ }
+ pthread_mutex_unlock(&log_mutex);
+
+ return 0;
+}
+
+struct lockspace *alloc_lockspace(void)
+{
+ struct lockspace *ls;
+
+ if (!(ls = malloc(sizeof(struct lockspace)))) {
+ log_error("out of memory for lockspace");
+ return NULL;
+ }
+
+ memset(ls, 0, sizeof(struct lockspace));
+ INIT_LIST_HEAD(&ls->actions);
+ INIT_LIST_HEAD(&ls->resources);
+ pthread_mutex_init(&ls->mutex, NULL);
+ pthread_cond_init(&ls->cond, NULL);
+ return ls;
+}
+
+static struct action *alloc_action(void)
+{
+ struct action *act;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_action_count || alloc_new_structs) {
+ act = malloc(sizeof(struct action));
+ } else {
+ act = list_first_entry(&unused_action, struct action, list);
+ list_del(&act->list);
+ unused_action_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (act)
+ memset(act, 0, sizeof(struct action));
+ else
+ log_error("out of memory for action");
+ return act;
+}
+
+static struct client *alloc_client(void)
+{
+ struct client *cl;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_client_count || alloc_new_structs) {
+ cl = malloc(sizeof(struct client));
+ } else {
+ cl = list_first_entry(&unused_client, struct client, list);
+ list_del(&cl->list);
+ unused_client_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (cl)
+ memset(cl, 0, sizeof(struct client));
+ else
+ log_error("out of memory for client");
+ return cl;
+}
+
+static struct resource *alloc_resource(void)
+{
+ struct resource *r;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_resource_count || alloc_new_structs) {
+ r = malloc(sizeof(struct resource) + resource_lm_data_size);
+ } else {
+ r = list_first_entry(&unused_resource, struct resource, list);
+ list_del(&r->list);
+ unused_resource_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (r) {
+ memset(r, 0, sizeof(struct resource) + resource_lm_data_size);
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->actions);
+ } else {
+ log_error("out of memory for resource");
+ }
+ return r;
+}
+
+static struct lock *alloc_lock(void)
+{
+ struct lock *lk;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_lock_count || alloc_new_structs) {
+ lk = malloc(sizeof(struct lock));
+ } else {
+ lk = list_first_entry(&unused_lock, struct lock, list);
+ list_del(&lk->list);
+ unused_lock_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (lk)
+ memset(lk, 0, sizeof(struct lock));
+ else
+ log_error("out of memory for lock");
+ return lk;
+}
+
+static void free_action(struct action *act)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_action_count >= MAX_UNUSED_ACTION) {
+ free(act);
+ } else {
+ list_add_tail(&act->list, &unused_action);
+ unused_action_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_client(struct client *cl)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_client_count >= MAX_UNUSED_CLIENT) {
+ free(cl);
+ } else {
+ list_add_tail(&cl->list, &unused_client);
+ unused_client_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_resource(struct resource *r)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
+ free(r);
+ } else {
+ list_add_tail(&r->list, &unused_resource);
+ unused_resource_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_lock(struct lock *lk)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_lock_count >= MAX_UNUSED_LOCK) {
+ free(lk);
+ } else {
+ list_add_tail(&lk->list, &unused_lock);
+ unused_lock_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static int setup_structs(void)
+{
+ struct action *act;
+ struct client *cl;
+ struct resource *r;
+ struct lock *lk;
+ int data_san = lm_data_size_sanlock();
+ int data_dlm = lm_data_size_dlm();
+ int i;
+
+ resource_lm_data_size = data_san > data_dlm ? data_san : data_dlm;
+
+ pthread_mutex_init(&unused_struct_mutex, NULL);
+ INIT_LIST_HEAD(&unused_action);
+ INIT_LIST_HEAD(&unused_client);
+ INIT_LIST_HEAD(&unused_resource);
+ INIT_LIST_HEAD(&unused_lock);
+
+ /*
+ * For setup, force the alloc_ functions to alloc new structs instead
+ * of taking them unused. This allows alloc_struct/free_struct loop to
+ * populate the unused lists.
+ */
+ alloc_new_structs = 1;
+
+ for (i = 0; i < MAX_UNUSED_ACTION/2; i++) {
+ if (!(act = alloc_action()))
+ goto fail;
+ free_action(act);
+ }
+
+ for (i = 0; i < MAX_UNUSED_CLIENT/2; i++) {
+ if (!(cl = alloc_client()))
+ goto fail;
+ free_client(cl);
+ }
+
+ for (i = 0; i < MAX_UNUSED_RESOURCE/2; i++) {
+ if (!(r = alloc_resource()))
+ goto fail;
+ free_resource(r);
+ }
+
+ for (i = 0; i < MAX_UNUSED_LOCK/2; i++) {
+ if (!(lk = alloc_lock()))
+ goto fail;
+ free_lock(lk);
+ }
+
+ alloc_new_structs = 0;
+ return 0;
+fail:
+ alloc_new_structs = 0;
+ return -ENOMEM;
+}
+
+static int add_pollfd(int fd)
+{
+ int i, new_size;
+ struct pollfd *tmp_pollfd;
+
+ pthread_mutex_lock(&pollfd_mutex);
+ for (i = 0; i < pollfd_size; i++) {
+ if (pollfd[i].fd != POLL_FD_UNUSED)
+ continue;
+
+ pollfd[i].fd = fd;
+ pollfd[i].events = POLLIN;
+ pollfd[i].revents = 0;
+
+ if (i > pollfd_maxi)
+ pollfd_maxi = i;
+
+ pthread_mutex_unlock(&pollfd_mutex);
+ return i;
+ }
+
+ new_size = pollfd_size + ADD_POLL_SIZE;
+
+ tmp_pollfd = realloc(pollfd, new_size * sizeof(struct pollfd));
+ if (!tmp_pollfd) {
+ log_error("can't alloc new size %d for pollfd", new_size);
+ pthread_mutex_unlock(&pollfd_mutex);
+ return -ENOMEM;
+ }
+ pollfd = tmp_pollfd;
+
+ for (i = pollfd_size; i < new_size; i++) {
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+
+ i = pollfd_size;
+ pollfd[i].fd = fd;
+ pollfd[i].events = POLLIN;
+ pollfd[i].revents = 0;
+ pollfd_maxi = i;
+
+ pollfd_size = new_size;
+
+ pthread_mutex_unlock(&pollfd_mutex);
+ return i;
+}
+
+static void rem_pollfd(int pi)
+{
+ if (pi < 0) {
+ log_error("rem_pollfd %d", pi);
+ return;
+ }
+ pthread_mutex_lock(&pollfd_mutex);
+ pollfd[pi].fd = POLL_FD_UNUSED;
+ pollfd[pi].events = 0;
+ pollfd[pi].revents = 0;
+ pthread_mutex_unlock(&pollfd_mutex);
+}
+
+static const char *lm_str(int x)
+{
+ switch (x) {
+ case LD_LM_NONE:
+ return "none";
+ case LD_LM_DLM:
+ return "dlm";
+ case LD_LM_SANLOCK:
+ return "sanlock";
+ default:
+ return "lm_unknown";
+ }
+}
+
+static const char *rt_str(int x)
+{
+ switch (x) {
+ case LD_RT_GL:
+ return "gl";
+ case LD_RT_VG:
+ return "vg";
+ case LD_RT_LV:
+ return "lv";
+ default:
+ return ".";
+ };
+}
+
+static const char *op_str(int x)
+{
+ switch (x) {
+ case LD_OP_INIT:
+ return "init";
+ case LD_OP_FREE:
+ return "free";
+ case LD_OP_START:
+ return "start";
+ case LD_OP_STOP:
+ return "stop";
+ case LD_OP_LOCK:
+ return "lock";
+ case LD_OP_UPDATE:
+ return "update";
+ case LD_OP_CLOSE:
+ return "close";
+ case LD_OP_ENABLE:
+ return "enable";
+ case LD_OP_DISABLE:
+ return "disable";
+ case LD_OP_START_WAIT:
+ return "start_wait";
+ case LD_OP_STOP_ALL:
+ return "stop_all";
+ case LD_OP_RENAME_BEFORE:
+ return "rename_before";
+ case LD_OP_RENAME_FINAL:
+ return "rename_final";
+ case LD_OP_RUNNING_LM:
+ return "running_lm";
+ case LD_OP_FIND_FREE_LOCK:
+ return "find_free_lock";
+ case LD_OP_FORGET_VG_NAME:
+ return "forget_vg_name";
+ case LD_OP_KILL_VG:
+ return "kill_vg";
+ case LD_OP_DROP_VG:
+ return "drop_vg";
+ case LD_OP_DUMP_LOG:
+ return "dump_log";
+ case LD_OP_DUMP_INFO:
+ return "dump_info";
+ default:
+ return "op_unknown";
+ };
+}
+
+static const char *mode_str(int x)
+{
+ switch (x) {
+ case LD_LK_IV:
+ return "iv";
+ case LD_LK_UN:
+ return "un";
+ case LD_LK_NL:
+ return "nl";
+ case LD_LK_SH:
+ return "sh";
+ case LD_LK_EX:
+ return "ex";
+ default:
+ return ".";
+ };
+}
+
+int last_string_from_args(char *args_in, char *last)
+{
+ const char *args = args_in;
+ const char *colon, *str = NULL;
+
+ while (1) {
+ if (!args || (*args == '\0'))
+ break;
+ colon = strstr(args, ":");
+ if (!colon)
+ break;
+ str = colon;
+ args = colon + 1;
+ }
+
+ if (str) {
+ snprintf(last, MAX_ARGS, "%s", str + 1);
+ return 0;
+ }
+ return -1;
+}
+
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
+{
+ char version[MAX_ARGS+1];
+ char *major_str, *minor_str, *patch_str;
+ char *n, *d1, *d2;
+
+ memset(version, 0, sizeof(version));
+ strncpy(version, args, MAX_ARGS);
+ version[MAX_ARGS] = '\0';
+
+ n = strstr(version, ":");
+ if (n)
+ *n = '\0';
+
+ d1 = strstr(version, ".");
+ if (!d1)
+ return -1;
+
+ d2 = strstr(d1 + 1, ".");
+ if (!d2)
+ return -1;
+
+ major_str = version;
+ minor_str = d1 + 1;
+ patch_str = d2 + 1;
+
+ *d1 = '\0';
+ *d2 = '\0';
+
+ if (major)
+ *major = atoi(major_str);
+ if (minor)
+ *minor = atoi(minor_str);
+ if (patch)
+ *patch = atoi(patch_str);
+
+ return 0;
+}
+
+/*
+ * These are few enough that arrays of function pointers can
+ * be avoided.
+ */
+
+static int lm_prepare_lockspace(struct lockspace *ls, struct action *act)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_prepare_lockspace_dlm(ls);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_prepare_lockspace_sanlock(ls);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_add_lockspace(struct lockspace *ls, struct action *act, int adopt)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_add_lockspace_dlm(ls, adopt);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_add_lockspace_sanlock(ls, adopt);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_rem_lockspace(struct lockspace *ls, struct action *act, int free_vg)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_rem_lockspace_dlm(ls, free_vg);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_rem_lockspace_sanlock(ls, free_vg);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
+ uint32_t *r_version, int *retry, int adopt)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_lock_dlm(ls, r, mode, r_version, adopt);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_lock_sanlock(ls, r, mode, r_version, retry, adopt);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_convert(struct lockspace *ls, struct resource *r,
+ int mode, struct action *act, uint32_t r_version)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_convert_dlm(ls, r, mode, r_version);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_convert_sanlock(ls, r, mode, r_version);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_unlock(struct lockspace *ls, struct resource *r, struct action *act,
+ uint32_t r_version, uint32_t lmu_flags)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_unlock_dlm(ls, r, r_version, lmu_flags);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_unlock_sanlock(ls, r, r_version, lmu_flags);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_hosts(struct lockspace *ls, int notify)
+{
+ if (ls->lm_type == LD_LM_DLM)
+ return 0;
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_hosts_sanlock(ls, notify);
+ return -1;
+}
+
+static void lm_rem_resource(struct lockspace *ls, struct resource *r)
+{
+ if (ls->lm_type == LD_LM_DLM)
+ lm_rem_resource_dlm(ls, r);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ lm_rem_resource_sanlock(ls, r);
+}
+
+static int lm_find_free_lock(struct lockspace *ls, uint64_t *free_offset)
+{
+ if (ls->lm_type == LD_LM_DLM)
+ return 0;
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_find_free_lock_sanlock(ls, free_offset);
+ return -1;
+}
+
+/*
+ * While adopting locks, actions originate from the adopt_locks()
+ * function, not from a client. So, these actions (flagged ADOPT),
+ * should be passed back to the adopt_locks() function through the
+ * adopt_results list, and not be sent back to a client via the
+ * client_list/client_thread.
+ */
+
+static void add_client_result(struct action *act)
+{
+ pthread_mutex_lock(&client_mutex);
+ if (act->flags & LD_AF_ADOPT)
+ list_add_tail(&act->list, &adopt_results);
+ else
+ list_add_tail(&act->list, &client_results);
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+}
+
+static struct lock *find_lock_client(struct resource *r, uint32_t client_id)
+{
+ struct lock *lk;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ if (lk->client_id == client_id)
+ return lk;
+ }
+ return NULL;
+}
+
+static struct lock *find_lock_persistent(struct resource *r)
+{
+ struct lock *lk;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ if (lk->flags & LD_LF_PERSISTENT)
+ return lk;
+ }
+ return NULL;
+}
+
+static struct action *find_action_client(struct resource *r, uint32_t client_id)
+{
+ struct action *act;
+
+ list_for_each_entry(act, &r->actions, list) {
+ if (act->client_id != client_id)
+ continue;
+ return act;
+ }
+ return NULL;
+}
+
+static void add_work_action(struct action *act)
+{
+ pthread_mutex_lock(&worker_mutex);
+ if (!worker_stop) {
+ list_add_tail(&act->list, &worker_list);
+ pthread_cond_signal(&worker_cond);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+}
+
+static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
+{
+ struct lock *lk;
+ uint32_t r_version = 0;
+ int rv;
+
+ if (r->type == LD_RT_LV)
+ log_debug("S %s R %s res_lock mode %s (%s)", ls->name, r->name, mode_str(act->mode), act->lv_name);
+ else
+ log_debug("S %s R %s res_lock mode %s", ls->name, r->name, mode_str(act->mode));
+
+ if (r->mode == LD_LK_SH && act->mode == LD_LK_SH)
+ goto add_lk;
+
+ if (r->type == LD_RT_LV && act->lv_args[0])
+ memcpy(r->lv_args, act->lv_args, MAX_ARGS);
+
+ rv = lm_lock(ls, r, act->mode, act, &r_version, retry, act->flags & LD_AF_ADOPT);
+ if (rv == -EAGAIN)
+ return rv;
+ if (rv < 0) {
+ log_error("S %s R %s res_lock lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_lock lm done r_version %u",
+ ls->name, r->name, r_version);
+
+ if (sanlock_gl_dup && ls->sanlock_gl_enabled)
+ act->flags |= LD_AF_DUP_GL_LS;
+
+ /* lm_lock() reads new r_version */
+
+ if ((r_version > r->version) || (!r->version && !r->version_zero_valid)) {
+ /*
+ * New r_version of the lock: means that another
+ * host has changed data protected by this lock
+ * since the last time we acquired it. We
+ * should invalidate any local cache of the data
+ * protected by this lock and reread it from disk.
+ */
+ r->version = r_version;
+
+ /*
+ * When a new global lock is enabled in a new vg,
+ * it will have version zero, and the first time
+ * we use it we need to validate the global cache
+ * since we don't have any version history to know
+ * the state of the cache. The version could remain
+ * zero for a long time if no global state is changed
+ * to cause the GL version to be incremented to 1.
+ */
+ r->version_zero_valid = 1;
+
+ /*
+ * r is vglk: tell lvmetad to set the vg invalid
+ * flag, and provide the new r_version. If lvmetad finds
+ * that its cached vg has seqno less than the value
+ * we send here, it will set the vg invalid flag.
+ * lvm commands that read the vg from lvmetad, will
+ * see the invalid flag returned, will reread the
+ * vg from disk, update the lvmetad copy, and go on.
+ *
+ * r is global: tell lvmetad to set the global invalid
+ * flag. When commands see this flag returned from lvmetad,
+ * they will reread metadata from disk, update the lvmetad
+ * caches, and tell lvmetad to set global invalid to 0.
+ */
+
+ if ((r->type == LD_RT_VG) && lvmetad_connected) {
+ daemon_reply reply;
+ char *uuid;
+
+ log_debug("S %s R %s res_lock set lvmetad vg version %u",
+ ls->name, r->name, r_version);
+
+ if (!ls->vg_uuid[0] || !strcmp(ls->vg_uuid, "none"))
+ uuid = ls->name;
+ else
+ uuid = ls->vg_uuid;
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "set_vg_info",
+ "token = %s", "skip",
+ "uuid = %s", uuid,
+ "version = %d", (int)r_version,
+ NULL);
+ pthread_mutex_unlock(&lvmetad_mutex);
+
+ if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
+ log_error("set_vg_info in lvmetad failed %d", reply.error);
+ daemon_reply_destroy(reply);
+ }
+
+ if ((r->type == LD_RT_GL) && lvmetad_connected) {
+ daemon_reply reply;
+
+ log_debug("S %s R %s res_lock set lvmetad global invalid",
+ ls->name, r->name);
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "set_global_info",
+ "token = %s", "skip",
+ "global_invalid = %d", 1,
+ NULL);
+ pthread_mutex_unlock(&lvmetad_mutex);
+
+ if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK"))
+ log_error("set_global_info in lvmetad failed %d", reply.error);
+ daemon_reply_destroy(reply);
+ }
+ }
+
+ r->mode = act->mode;
+
+add_lk:
+ if (r->mode == LD_LK_SH)
+ r->sh_count++;
+
+ if (!(lk = alloc_lock()))
+ return -ENOMEM;
+
+ lk->client_id = act->client_id;
+ lk->mode = act->mode;
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ lk->flags |= LD_LF_PERSISTENT;
+ lk->client_id = 0;
+ }
+
+ list_add_tail(&lk->list, &r->locks);
+
+ return 0;
+}
+
+static int res_convert(struct lockspace *ls, struct resource *r,
+ struct lock *lk, struct action *act)
+{
+ uint32_t r_version;
+ int rv;
+
+ log_debug("S %s R %s res_convert mode %d", ls->name, r->name, act->mode);
+
+ if (act->mode == LD_LK_EX && lk->mode == LD_LK_SH && r->sh_count > 1)
+ return -EAGAIN;
+
+ /*
+ * lm_convert() writes new version (from ex)
+ * Same as lm_unlock()
+ */
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ lk->version = r->version;
+ r_version = r->version;
+ log_debug("S %s R %s res_convert r_version inc %u",
+ ls->name, r->name, r_version);
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
+ r->version = lk->version;
+ r_version = r->version;
+ log_debug("S %s R %s res_convert r_version new %u", ls->name, r->name, r_version);
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_convert(ls, r, act->mode, act, r_version);
+ if (rv < 0) {
+ log_error("S %s R %s res_convert lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_convert lm done", ls->name, r->name);
+
+ if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
+ r->sh_count = 1;
+ } else if (lk->mode == LD_LK_SH && act->mode == LD_LK_EX) {
+ r->sh_count = 0;
+ } else {
+ /* should not be possible */
+ log_error("S %s R %s res_convert invalid modes %d %d",
+ ls->name, r->name, lk->mode, act->mode);
+ return -1;
+ }
+
+ r->mode = act->mode;
+ lk->mode = act->mode;
+
+ return 0;
+}
+
+static int res_cancel(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct action *cact;
+
+ /*
+ * a client can cancel its own non-persistent lock requests,
+ * when could this happen?
+ *
+ * a client can cancel other client's persistent lock requests,
+ * when could this happen?
+ */
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ list_for_each_entry(cact, &r->actions, list) {
+ if (!(cact->flags & LD_AF_PERSISTENT))
+ continue;
+ goto do_cancel;
+ }
+ } else {
+ cact = find_action_client(r, act->client_id);
+ if (cact)
+ goto do_cancel;
+ }
+
+ return -ENOENT;
+
+do_cancel:
+ log_debug("S %s R %s res_cancel client %d", ls->name, r->name, cact->client_id);
+ cact->result = -ECANCELED;
+ list_del(&cact->list);
+ add_client_result(cact);
+
+ return -ECANCELED;
+}
+
+/*
+ * lm_unlock() writes new a r_version (from ex)
+ *
+ * The r_version of the vg resource is incremented if
+ * an "update" was received for the vg lock. The update
+ * contains the new vg seqno from the vg metadata which is
+ * used as the r_version.
+ *
+ * The r_version of the global resource is automatically
+ * incremented when it is unlocked from ex mode.
+ *
+ * r_version is incremented every time a command releases
+ * the global lock from ex.
+ */
+
+/*
+ * persistent locks will not be unlocked for OP_CLOSE/act_close
+ * because act_close->flags does not have the PERSISTENT flag
+ * set, and a persistent lk->client_id is zero, which will not
+ * match the client in act_close->client_id.
+ */
+
+static int res_unlock(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct lock *lk;
+ uint32_t r_version;
+ int rv;
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ lk = find_lock_persistent(r);
+ if (lk)
+ goto do_unlock;
+ } else {
+ lk = find_lock_client(r, act->client_id);
+ if (lk)
+ goto do_unlock;
+ }
+
+ if (act->op != LD_OP_CLOSE)
+ log_debug("S %s R %s res_unlock no locks", ls->name, r->name);
+ return -ENOENT;
+
+do_unlock:
+ if (act->op == LD_OP_CLOSE)
+ log_debug("S %s R %s res_unlock from close", ls->name, r->name);
+ else if (r->type == LD_RT_LV)
+ log_debug("S %s R %s res_unlock (%s)", ls->name, r->name, act->lv_name);
+ else
+ log_debug("S %s R %s res_unlock", ls->name, r->name);
+
+ /* send unlock to lm when last sh lock is unlocked */
+ if (lk->mode == LD_LK_SH) {
+ r->sh_count--;
+ if (r->sh_count > 0)
+ goto rem_lk;
+ }
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ lk->version = r->version;
+ r_version = r->version;
+
+ log_debug("S %s R %s res_unlock r_version inc %u", ls->name, r->name, r_version);
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
+ r->version = lk->version;
+ r_version = r->version;
+
+ log_debug("S %s R %s res_unlock r_version new %u",
+ ls->name, r->name, r_version);
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_unlock(ls, r, act, r_version, 0);
+ if (rv < 0) {
+ /* should never happen, retry? */
+ log_error("S %s R %s res_unlock lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_unlock lm done", ls->name, r->name);
+
+rem_lk:
+ list_del(&lk->list);
+ free_lock(lk);
+
+ if (list_empty(&r->locks))
+ r->mode = LD_LK_UN;
+
+ return 0;
+}
+
+static int res_update(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct lock *lk;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk) {
+ log_error("S %s R %s res_update client %u lock not found",
+ ls->name, r->name, act->client_id);
+ return -ENOENT;
+ }
+
+ if (r->mode != LD_LK_EX) {
+ log_error("S %s R %s res_update version on non-ex lock",
+ ls->name, r->name);
+ return -EINVAL;
+ }
+
+ /* lk version will be written to lm by unlock */
+
+ if (act->flags & LD_AF_NEXT_VERSION)
+ lk->version = r->version + 1;
+ else
+ lk->version = act->version;
+
+ log_debug("S %s R %s res_update lk version to %u", ls->name, r->name, lk->version);
+
+ return 0;
+}
+
+/*
+ * There is nothing to deallocate when freeing a dlm LV, the LV
+ * will simply be unlocked by rem_resource.
+ */
+
+static int free_lv(struct lockspace *ls, struct resource *r)
+{
+ if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_free_lv_sanlock(ls, r);
+ else if (ls->lm_type == LD_LM_DLM)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+/*
+ * NB. we can't do this if sanlock is holding any locks on
+ * the resource; we'd be rewriting the resource from under
+ * sanlock and would confuse or break it badly. We don't
+ * know what another host is doing, so these must be used
+ * very carefully.
+ */
+
+static int res_able(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ int rv;
+
+ if (ls->lm_type != LD_LM_SANLOCK) {
+ log_error("enable/disable only applies to sanlock");
+ return -EINVAL;
+ }
+
+ if (r->type != LD_RT_GL) {
+ log_error("enable/disable only applies to global lock");
+ return -EINVAL;
+ }
+
+ if (r->mode != LD_LK_UN) {
+ log_error("enable/disable only allowed on unlocked resource");
+ return -EINVAL;
+ }
+
+ if (act->op == LD_OP_ENABLE && gl_lsname_sanlock[0]) {
+ log_error("disable global lock in %s before enable in %s",
+ gl_lsname_sanlock, ls->name);
+ return -EINVAL;
+ }
+
+ if ((act->op == LD_OP_DISABLE) && (act->flags & LD_AF_EX_DISABLE)) {
+ rv = lm_ex_disable_gl_sanlock(ls);
+ goto out;
+ }
+
+ rv = lm_able_gl_sanlock(ls, act->op == LD_OP_ENABLE);
+out:
+ return rv;
+}
+
+/*
+ * Go through queued actions, and make lock/unlock calls on the resource
+ * based on the actions and the existing lock state.
+ *
+ * All lock operations sent to the lock manager are non-blocking.
+ * This is because sanlock does not support lock queueing.
+ * Eventually we could enhance this to take advantage of lock
+ * queueing when available (i.e. for the dlm).
+ *
+ * act_close_list: list of CLOSE actions, identifying clients that have
+ * closed/terminated their lvmlockd connection, and whose locks should
+ * be released. Do not remove these actions from act_close_list.
+ *
+ * retry_out: set to 1 if the lock manager said we should retry,
+ * meaning we should call res_process() again in a short while to retry.
+ */
+
+static void res_process(struct lockspace *ls, struct resource *r,
+ struct list_head *act_close_list, int *retry_out)
+{
+ struct action *act, *safe, *act_close;
+ struct lock *lk;
+ int lm_retry;
+ int rv;
+
+ /*
+ * handle version updates for ex locks
+ * (new version will be written by unlock)
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_UPDATE) {
+ rv = res_update(ls, r, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * handle explicit unlock actions
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if ((act->op == LD_OP_LOCK) &&
+ (act->mode == LD_LK_IV || act->mode == LD_LK_NL)) {
+ act->result = -EINVAL;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
+ rv = res_unlock(ls, r, act);
+
+ if (rv == -ENOENT && (act->flags & LD_AF_UNLOCK_CANCEL))
+ rv = res_cancel(ls, r, act);
+
+ /*
+ * possible unlock results:
+ * 0: unlock succeeded
+ * -ECANCELED: cancel succeeded
+ * -ENOENT: nothing to unlock or cancel
+ */
+
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * handle implicit unlocks due to client exit,
+ * also clear any outstanding actions for the client
+ */
+
+ list_for_each_entry(act_close, act_close_list, list) {
+ res_unlock(ls, r, act_close);
+ res_cancel(ls, r, act_close);
+ }
+
+ /*
+ * handle freeing a lock for an lv that has been removed
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
+ log_debug("S %s R %s free_lv", ls->name, r->name);
+ rv = free_lv(ls, r);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ goto r_free;
+
+ }
+ }
+
+ /*
+ * handle enable/disable
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE) {
+ rv = res_able(ls, r, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+
+ if (!rv && act->op == LD_OP_DISABLE) {
+ log_debug("S %s R %s free disabled", ls->name, r->name);
+ goto r_free;
+ }
+ }
+ }
+
+ /*
+ * transient requests on existing transient locks
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ continue;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* convert below */
+ /*
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ */
+ continue;
+ } else {
+ /* success */
+ act->result = -EALREADY;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * persistent requests on existing persistent locks
+ *
+ * persistent locks are not owned by a client, so any
+ * existing with matching mode satisfies a request.
+ * only one persistent lock is kept on a resource.
+ * a single "unowned" persistent lock satisfies
+ * any/multiple client requests for a persistent lock.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (!(act->flags & LD_AF_PERSISTENT))
+ continue;
+
+ lk = find_lock_persistent(r);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* convert below */
+ /*
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ */
+ continue;
+ } else {
+ /* success */
+ act->result = -EALREADY;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * transient requests with existing persistent locks
+ *
+ * Just grant the transient request and do not
+ * keep a record of it. Assume that the persistent
+ * lock will not go away while the transient lock
+ * is needed.
+ *
+ * This would be used when an ex, persistent lv lock
+ * exists from activation, and then something like
+ * lvextend asks for a transient ex lock to change
+ * the lv. The lv could not be unlocked by deactivation
+ * while the lvextend was running.
+ *
+ * The logic here for mixing T/P locks is not general
+ * support; there are a number of cases where it will
+ * not work: updating version number (lv locks have
+ * none), ex locks from multiple clients will not
+ * conflict, explicit un of the transient lock will fail.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ continue;
+
+ lk = find_lock_persistent(r);
+ if (!lk)
+ continue;
+
+ if ((lk->mode == LD_LK_EX) ||
+ (lk->mode == LD_LK_SH && act->mode == LD_LK_SH)) {
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ } else {
+ /* persistent lock is sh, transient request is ex */
+ /* FIXME: can we remove this case? do a convert here? */
+ log_debug("res_process %s existing persistent lock new transient", r->name);
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * persistent requests with existing transient locks
+ *
+ * If a client requests a P (persistent) lock for a T (transient)
+ * lock it already holds, we can just change T to P. Fail if the
+ * same happens for locks from different clients. Changing
+ * another client's lock from T to P may cause problems
+ * if that client tries to unlock or update version.
+ *
+ * I don't think this P/T combination will be used.
+ * It might be used if a command was able to take a P
+ * vg lock, in which case the T vg lock would already
+ * be held for reading. If the T lock was sh, it would
+ * be converted to P ex. If the T/P modes matched, the
+ * lock could just be changed from T to P.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (!(act->flags & LD_AF_PERSISTENT))
+ continue;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* FIXME: convert and change to persistent? */
+ log_debug("res_process %s existing transient lock new persistent", r->name);
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ } else {
+ lk->flags |= LD_LF_PERSISTENT;
+ lk->client_id = 0;
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * convert mode of existing locks
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ lk = find_lock_persistent(r);
+ else
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode == act->mode) {
+ /* should never happen, should be found above */
+ log_error("convert same mode");
+ continue;
+ }
+
+ /* convert fails immediately, no EAGAIN retry */
+ rv = res_convert(ls, r, lk, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ /*
+ * Cases above are all requests addressed by existing locks.
+ * Below handles the rest. Transient and persistent are
+ * handled the same, except
+ * - if mode of existing lock is incompat with requested,
+ * leave the act on r->actions
+ * - if r mode is EX, any lock action is blocked, just quit
+ *
+ * Retry a lock request that fails due to a lock conflict (-EAGAIN):
+ * if we have not exceeded max retries and lm sets lm_retry (sanlock
+ * transient conflicts from shared lock implementation), or r type
+ * is gl or vg (transient real conflicts we want to hide from command).
+ * lv lock conflicts won't be transient so don't retry them.
+ */
+
+
+ if (r->mode == LD_LK_EX)
+ return;
+
+ /*
+ * r mode is SH or UN, pass lock-sh actions to lm
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ /* grant in order, so break here */
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX)
+ break;
+
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
+ lm_retry = 0;
+
+ rv = res_lock(ls, r, act, &lm_retry);
+ if ((rv == -EAGAIN) &&
+ (act->retries <= act->max_retries) &&
+ (lm_retry || (r->type != LD_RT_LV))) {
+ /* leave act on list */
+ log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
+ act->retries++;
+ *retry_out = 1;
+ } else {
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ if (rv == -EUNATCH)
+ goto r_free;
+ }
+ }
+
+ /*
+ * r mode is SH, any ex lock action is blocked, just quit
+ */
+
+ if (r->mode == LD_LK_SH)
+ return;
+
+ /*
+ * r mode is UN, pass lock-ex action to lm
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
+ lm_retry = 0;
+
+ rv = res_lock(ls, r, act, &lm_retry);
+ if ((rv == -EAGAIN) &&
+ (act->retries <= act->max_retries) &&
+ (lm_retry || (r->type != LD_RT_LV))) {
+ /* leave act on list */
+ log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
+ act->retries++;
+ *retry_out = 1;
+ } else {
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ if (rv == -EUNATCH)
+ goto r_free;
+ break;
+ }
+ }
+
+ return;
+
+r_free:
+ /* For the EUNATCH case it may be possible there are queued actions? */
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ log_error("S %s R %s res_process r_free cancel %s client %d",
+ ls->name, r->name, op_str(act->op), act->client_id);
+ act->result = -ECANCELED;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ log_debug("S %s R %s res_process free", ls->name, r->name);
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+}
+
+#define LOCKS_EXIST_ANY 1
+#define LOCKS_EXIST_GL 2
+#define LOCKS_EXIST_VG 3
+#define LOCKS_EXIST_LV 4
+
+static int for_each_lock(struct lockspace *ls, int locks_do)
+{
+ struct resource *r;
+ struct lock *lk;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ list_for_each_entry(lk, &r->locks, list) {
+ if (locks_do == LOCKS_EXIST_ANY)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_GL && r->type == LD_RT_GL)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_VG && r->type == LD_RT_VG)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_LV && r->type == LD_RT_LV)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int clear_locks(struct lockspace *ls, int free_vg, int drop_vg)
+{
+ struct resource *r, *r_safe;
+ struct lock *lk, *lk_safe;
+ struct action *act, *act_safe;
+ uint32_t lk_version;
+ uint32_t r_version;
+ int lk_count = 0;
+ int rv;
+
+ list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
+ lk_version = 0;
+
+ list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
+ lk_count++;
+
+ /*
+ * Stopping a lockspace shouldn't happen with LV locks
+ * still held, but it will be stopped with GL and VG
+ * locks held. The drop_vg case may see LV locks.
+ */
+
+ if (lk->flags & LD_LF_PERSISTENT && !drop_vg)
+ log_error("S %s R %s clear lock persistent", ls->name, r->name);
+ else
+ log_debug("S %s R %s clear lock mode %s client %d", ls->name, r->name, mode_str(lk->mode), lk->client_id);
+
+ if (lk->version > lk_version)
+ lk_version = lk->version;
+
+ list_del(&lk->list);
+ free_lock(lk);
+ }
+
+ if (r->mode == LD_LK_UN)
+ goto r_free;
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ r_version = r->version;
+ log_debug("S %s R %s clear_locks r_version inc %u",
+ ls->name, r->name, r_version);
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk_version > r->version)) {
+ r->version = lk_version;
+ r_version = r->version;
+ log_debug("S %s R %s clear_locks r_version new %u",
+ ls->name, r->name, r_version);
+
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_unlock(ls, r, NULL, r_version, free_vg ? LMUF_FREE_VG : 0);
+ if (rv < 0) {
+ /* should never happen */
+ log_error("S %s R %s clear_locks free %d drop %d lm unlock error %d",
+ ls->name, r->name, free_vg, drop_vg, rv);
+ }
+
+ list_for_each_entry_safe(act, act_safe, &r->actions, list) {
+ log_error("S %s R %s clear_locks cancel %s client %d",
+ ls->name, r->name, op_str(act->op), act->client_id);
+ act->result = -ECANCELED;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ r_free:
+ log_debug("S %s R %s free", ls->name, r->name);
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+ }
+
+ return lk_count;
+}
+
+/*
+ * find and return the resource that is referenced by the action
+ * - there is a single gl resource per lockspace
+ * - there is a single vg resource per lockspace
+ * - there can be many lv resources per lockspace, compare names
+ */
+
+static struct resource *find_resource_act(struct lockspace *ls,
+ struct action *act,
+ int nocreate)
+{
+ struct resource *r;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ if (r->type != act->rt)
+ continue;
+
+ if (r->type == LD_RT_GL && act->rt == LD_RT_GL)
+ return r;
+
+ if (r->type == LD_RT_VG && act->rt == LD_RT_VG)
+ return r;
+
+ if (r->type == LD_RT_LV && act->rt == LD_RT_LV &&
+ !strcmp(r->name, act->lv_uuid))
+ return r;
+ }
+
+ if (nocreate)
+ return NULL;
+
+ if (!(r = alloc_resource()))
+ return NULL;
+
+ r->type = act->rt;
+
+ r->mode = LD_LK_UN;
+
+ if (r->type == LD_RT_GL)
+ strncpy(r->name, R_NAME_GL, MAX_NAME);
+ else if (r->type == LD_RT_VG)
+ strncpy(r->name, R_NAME_VG, MAX_NAME);
+ else if (r->type == LD_RT_LV)
+ strncpy(r->name, act->lv_uuid, MAX_NAME);
+
+ list_add_tail(&r->list, &ls->resources);
+
+ return r;
+}
+
+static void free_ls_resources(struct lockspace *ls)
+{
+ struct resource *r, *r_safe;
+
+ list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+ }
+}
+
+/*
+ * ls is the vg being removed that holds the global lock.
+ * check if any other vgs will be left without a global lock.
+ */
+
+static int other_sanlock_vgs_exist(struct lockspace *ls_rem)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces_inactive, list) {
+ if (ls->lm_type != LD_LM_SANLOCK)
+ continue;
+ log_debug("other sanlock vg exists inactive %s", ls->name);
+ return 1;
+ }
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->lm_type != LD_LM_SANLOCK)
+ continue;
+ if (!strcmp(ls->name, ls_rem->name))
+ continue;
+ log_debug("other sanlock vg exists %s", ls->name);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * LOCK is the main thing we're interested in; the others are unlikely.
+ */
+
+static int process_op_during_kill(struct action *act)
+{
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN)
+ return 1;
+
+ switch (act->op) {
+ case LD_OP_LOCK:
+ case LD_OP_ENABLE:
+ case LD_OP_DISABLE:
+ case LD_OP_UPDATE:
+ case LD_OP_RENAME_BEFORE:
+ case LD_OP_RENAME_FINAL:
+ case LD_OP_FIND_FREE_LOCK:
+ return 0;
+ };
+ return 1;
+}
+
+/*
+ * Process actions queued for this lockspace by
+ * client_recv_action / add_lock_action.
+ *
+ * The lockspace_thread can touch its own ls struct without holding
+ * lockspaces_mutex until it sets ls->thread_done, after which it
+ * cannot touch ls without holding lockspaces_mutex.
+ */
+
+#define LOCK_RETRY_MS 1000 /* milliseconds to delay between retry */
+
+static void *lockspace_thread_main(void *arg_in)
+{
+ struct lockspace *ls = arg_in;
+ struct resource *r, *r2;
+ struct action *add_act, *act, *safe;
+ struct action *act_op_free = NULL;
+ struct list_head tmp_act;
+ struct list_head act_close;
+ int free_vg = 0;
+ int drop_vg = 0;
+ int error = 0;
+ int adopt_flag = 0;
+ int wait_flag = 0;
+ int retry;
+ int rv;
+
+ INIT_LIST_HEAD(&act_close);
+
+ /* first action may be client add */
+ pthread_mutex_lock(&ls->mutex);
+ act = NULL;
+ add_act = NULL;
+ if (!list_empty(&ls->actions)) {
+ act = list_first_entry(&ls->actions, struct action, list);
+ if (act->op == LD_OP_START) {
+ add_act = act;
+ list_del(&add_act->list);
+
+ if (add_act->flags & LD_AF_WAIT)
+ wait_flag = 1;
+ if (add_act->flags & LD_AF_ADOPT)
+ adopt_flag = 1;
+ }
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ log_debug("S %s lm_add_lockspace %s wait %d adopt %d",
+ ls->name, lm_str(ls->lm_type), wait_flag, adopt_flag);
+
+ /*
+ * The prepare step does not wait for anything and is quick;
+ * it tells us if the parameters are valid and the lm is running.
+ */
+ error = lm_prepare_lockspace(ls, add_act);
+
+ if (add_act && (!wait_flag || error)) {
+ /* send initial join result back to client */
+ add_act->result = error;
+ add_client_result(add_act);
+ add_act = NULL;
+ }
+
+ /*
+ * The actual lockspace join can take a while.
+ */
+ if (!error) {
+ error = lm_add_lockspace(ls, add_act, adopt_flag);
+
+ log_debug("S %s lm_add_lockspace done %d", ls->name, error);
+
+ if (ls->sanlock_gl_enabled && gl_lsname_sanlock[0] &&
+ strcmp(ls->name, gl_lsname_sanlock))
+ sanlock_gl_dup = 1;
+
+ if (add_act) {
+ /* send final join result back to client */
+ add_act->result = error;
+ add_client_result(add_act);
+ }
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (error) {
+ ls->thread_stop = 1;
+ ls->create_fail = 1;
+ } else {
+ ls->create_done = 1;
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ if (error)
+ goto out_act;
+
+ while (1) {
+ pthread_mutex_lock(&ls->mutex);
+ while (!ls->thread_work) {
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ goto out_rem;
+ }
+ pthread_cond_wait(&ls->cond, &ls->mutex);
+ }
+
+ /*
+ * Process all the actions queued for this lockspace.
+ * The client thread queues actions on ls->actions.
+ *
+ * Here, take all the actions off of ls->actions, and:
+ *
+ * - For lock operations, move the act to r->actions.
+ * These lock actions/operations processed by res_process().
+ *
+ * - For non-lock operations, e.g. related to managing
+ * the lockspace, process them in this loop.
+ */
+
+ while (1) {
+ if (list_empty(&ls->actions)) {
+ ls->thread_work = 0;
+ break;
+ }
+
+ act = list_first_entry(&ls->actions, struct action, list);
+
+ if (act->op == LD_OP_KILL_VG && act->rt == LD_RT_VG) {
+ /* Continue processing until DROP_VG arrives. */
+ log_debug("S %s kill_vg", ls->name);
+ ls->kill_vg = 1;
+ list_del(&act->list);
+ act->result = 0;
+ add_client_result(act);
+ continue;
+ }
+
+ if (ls->kill_vg && !process_op_during_kill(act)) {
+ log_debug("S %s disallow op %s after kill_vg", ls->name, op_str(act->op));
+ list_del(&act->list);
+ act->result = -EVGKILLED;
+ add_client_result(act);
+ continue;
+ }
+
+ if (act->op == LD_OP_DROP_VG && act->rt == LD_RT_VG) {
+ /*
+ * If leases are released after i/o errors begin
+ * but before lvmlockctl --kill, then the VG is not
+ * killed, but drop is still needed to clean up the
+ * VG, so in that case there would be a drop op without
+ * a preceding kill op.
+ */
+ if (!ls->kill_vg)
+ log_debug("S %s received drop without kill", ls->name);
+ log_debug("S %s drop_vg", ls->name);
+ ls->thread_work = 0;
+ ls->thread_stop = 1;
+ drop_vg = 1;
+ break;
+ }
+
+ if (act->op == LD_OP_STOP) {
+ /* thread_stop is already set */
+ ls->thread_work = 0;
+ break;
+ }
+
+ if (act->op == LD_OP_FREE && act->rt == LD_RT_VG) {
+ /* vgremove */
+ log_debug("S %s checking for lockspace hosts", ls->name);
+ rv = lm_hosts(ls, 1);
+ if (rv) {
+ /*
+ * Checking for hosts here in addition to after the
+ * main loop allows vgremove to fail and be rerun
+ * after the ls is stopped on other hosts.
+ */
+ log_error("S %s lockspace hosts %d", ls->name, rv);
+ list_del(&act->list);
+ act->result = -EBUSY;
+ add_client_result(act);
+ continue;
+ }
+ ls->thread_work = 0;
+ ls->thread_stop = 1;
+ free_vg = 1;
+ break;
+ }
+
+ if (act->op == LD_OP_RENAME_BEFORE && act->rt == LD_RT_VG) {
+ /* vgrename */
+ log_debug("S %s checking for lockspace hosts", ls->name);
+ rv = lm_hosts(ls, 1);
+ if (rv) {
+ log_error("S %s lockspace hosts %d", ls->name, rv);
+ list_del(&act->list);
+ act->result = -EBUSY;
+ add_client_result(act);
+ continue;
+ }
+ ls->thread_work = 0;
+ ls->thread_stop = 1;
+ /* Do we want to check hosts again below like vgremove? */
+ break;
+ }
+
+ if (act->op == LD_OP_FIND_FREE_LOCK && act->rt == LD_RT_VG) {
+ uint64_t free_offset = 0;
+ log_debug("S %s find free lock", ls->name);
+ rv = lm_find_free_lock(ls, &free_offset);
+ log_debug("S %s find free lock %d offset %llu",
+ ls->name, rv, (unsigned long long)free_offset);
+ ls->free_lock_offset = free_offset;
+ list_del(&act->list);
+ act->result = rv;
+ add_client_result(act);
+ continue;
+ }
+
+ list_del(&act->list);
+
+ /* applies to all resources */
+ if (act->op == LD_OP_CLOSE) {
+ list_add(&act->list, &act_close);
+ continue;
+ }
+
+ /*
+ * All the other op's are for locking.
+ * Find the specific resource that the lock op is for,
+ * and add the act to the resource's list of lock ops.
+ *
+ * (This creates a new resource if the one named in
+ * the act is not found.)
+ */
+
+ r = find_resource_act(ls, act, (act->op == LD_OP_FREE) ? 1 : 0);
+ if (!r) {
+ act->result = (act->op == LD_OP_FREE) ? -ENOENT : -ENOMEM;
+ add_client_result(act);
+ continue;
+ }
+
+ list_add_tail(&act->list, &r->actions);
+
+ log_debug("S %s R %s action %s %s", ls->name, r->name,
+ op_str(act->op), mode_str(act->mode));
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ /*
+ * Process the lock operations that have been queued for each
+ * resource.
+ */
+
+ retry = 0;
+
+ list_for_each_entry_safe(r, r2, &ls->resources, list)
+ res_process(ls, r, &act_close, &retry);
+
+ list_for_each_entry_safe(act, safe, &act_close, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ if (retry) {
+ ls->thread_work = 1;
+ usleep(LOCK_RETRY_MS * 1000);
+ }
+ }
+
+out_rem:
+ log_debug("S %s stopping", ls->name);
+
+ /*
+ * For sanlock, we need to unlock any existing locks
+ * before removing the lockspace, otherwise the sanlock
+ * daemon will kill us when the lockspace goes away.
+ * For dlm, we leave with force, so all locks will
+ * automatically be dropped when we leave the lockspace,
+ * so unlocking all before leaving could be skipped.
+ *
+ * Blindly dropping all existing locks must only be
+ * allowed in emergency/force situations, otherwise it's
+ * obviously dangerous, since the lock holders are still
+ * operating under the assumption that they hold the lock.
+ * drop_vg drops all existing locks, but should only
+ * happen when the VG access has been forcibly and
+ * succesfully terminated.
+ *
+ * For vgremove of a sanlock vg, the vg lock will be held,
+ * and possibly the gl lock if this vg holds the gl.
+ * sanlock vgremove wants to unlock-rename these locks.
+ */
+
+ log_debug("S %s clearing locks", ls->name);
+
+ rv = clear_locks(ls, free_vg, drop_vg);
+
+ /*
+ * Tell any other hosts in the lockspace to leave it
+ * before we remove it (for vgremove). We do this
+ * before leaving the lockspace ourself because we
+ * need to be in the lockspace to see others.
+ */
+
+ if (free_vg) {
+ log_debug("S %s checking for lockspace hosts", ls->name);
+ rv = lm_hosts(ls, 1);
+ if (rv)
+ log_error("S %s other lockspace hosts %d", ls->name, rv);
+ }
+
+ /*
+ * Leave the lockspace.
+ */
+
+ rv = lm_rem_lockspace(ls, NULL, free_vg);
+
+ log_debug("S %s rem_lockspace done %d", ls->name, rv);
+
+out_act:
+ /*
+ * Move remaining actions to results; this will usually (always?)
+ * be only the stop action.
+ */
+ INIT_LIST_HEAD(&tmp_act);
+
+ pthread_mutex_lock(&ls->mutex);
+ list_for_each_entry_safe(act, safe, &ls->actions, list) {
+ if (act->op == LD_OP_FREE) {
+ act_op_free = act;
+ act->result = 0;
+ } else if (act->op == LD_OP_STOP)
+ act->result = 0;
+ else if (act->op == LD_OP_DROP_VG)
+ act->result = 0;
+ else if (act->op == LD_OP_RENAME_BEFORE)
+ act->result = 0;
+ else
+ act->result = -ENOLS;
+ list_del(&act->list);
+ list_add_tail(&act->list, &tmp_act);
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ /*
+ * If this freed a sanlock vg that had gl enabled, and other sanlock
+ * vgs exist, return a flag so the command can warn that the gl has
+ * been removed and may need to be enabled in another sanlock vg.
+ */
+
+ if (free_vg && ls->sanlock_gl_enabled && act_op_free) {
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (other_sanlock_vgs_exist(ls))
+ act_op_free->flags |= LD_AF_WARN_GL_REMOVED;
+ pthread_mutex_unlock(&lockspaces_mutex);
+ }
+
+ pthread_mutex_lock(&client_mutex);
+ list_for_each_entry_safe(act, safe, &tmp_act, list) {
+ list_del(&act->list);
+ list_add_tail(&act->list, &client_results);
+ }
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls->thread_done = 1;
+ ls->free_vg = free_vg;
+ ls->drop_vg = drop_vg;
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /*
+ * worker_thread will join this thread, and free the
+ * ls or move it to lockspaces_inactive.
+ */
+ pthread_mutex_lock(&worker_mutex);
+ worker_wake = 1;
+ pthread_cond_signal(&worker_cond);
+ pthread_mutex_unlock(&worker_mutex);
+
+ return NULL;
+}
+
+int lockspaces_empty(void)
+{
+ int rv;
+ pthread_mutex_lock(&lockspaces_mutex);
+ rv = list_empty(&lockspaces);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return rv;
+}
+
+/*
+ * lockspaces_mutex is locked
+ *
+ * When duplicate sanlock global locks have been seen,
+ * this function has a secondary job of counting the
+ * number of lockspaces that exist with the gl enabled,
+ * with the side effect of setting sanlock_gl_dup back to
+ * zero when the duplicates have been removed/disabled.
+ */
+
+static struct lockspace *find_lockspace_name(char *ls_name)
+{
+ struct lockspace *ls_found = NULL;
+ struct lockspace *ls;
+ int gl_count = 0;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (!strcmp(ls->name, ls_name))
+ ls_found = ls;
+
+ if (!sanlock_gl_dup && ls_found)
+ return ls_found;
+
+ if (sanlock_gl_dup && ls->sanlock_gl_enabled)
+ gl_count++;
+ }
+
+ /* this is the side effect we want from this function */
+ if (sanlock_gl_dup && gl_count < 2)
+ sanlock_gl_dup = 0;
+
+ return ls_found;
+}
+
+/*
+ * If lvm_<vg_name> is longer than max lockspace name (64) we just ignore the
+ * extra characters. For sanlock vgs, the name is shortened further to 48 in
+ * the sanlock code.
+ */
+
+static int vg_ls_name(const char *vg_name, char *ls_name)
+{
+ if (strlen(vg_name) + 4 > MAX_NAME) {
+ log_error("vg name too long %s", vg_name);
+ return -1;
+ }
+
+ snprintf(ls_name, MAX_NAME, "%s%s", LVM_LS_PREFIX, vg_name);
+ return 0;
+}
+
+/* FIXME: add mutex for gl_lsname_ ? */
+
+static void gl_ls_name(char *ls_name)
+{
+ if (gl_use_dlm)
+ memcpy(ls_name, gl_lsname_dlm, MAX_NAME);
+ else if (gl_use_sanlock)
+ memcpy(ls_name, gl_lsname_sanlock, MAX_NAME);
+ else
+ memset(ls_name, 0, MAX_NAME);
+}
+
+/*
+ * When this function returns an error, the caller needs to deal
+ * with act (in the cases where act exists).
+ */
+
+static int add_lockspace_thread(const char *ls_name,
+ const char *vg_name,
+ const char *vg_uuid,
+ int lm_type, const char *vg_args,
+ struct action *act)
+{
+ struct lockspace *ls, *ls2;
+ struct resource *r;
+ uint32_t version = 0;
+ int rv;
+
+ if (act)
+ version = act->version;
+
+ log_debug("add_lockspace_thread %s %s version %u",
+ lm_str(lm_type), ls_name, version);
+
+ if (!(ls = alloc_lockspace()))
+ return -ENOMEM;
+
+ strncpy(ls->name, ls_name, MAX_NAME);
+ ls->lm_type = lm_type;
+
+ if (act)
+ ls->start_client_id = act->client_id;
+
+ if (vg_uuid)
+ strncpy(ls->vg_uuid, vg_uuid, 64);
+
+ if (vg_name)
+ strncpy(ls->vg_name, vg_name, MAX_NAME);
+
+ if (vg_args)
+ strncpy(ls->vg_args, vg_args, MAX_ARGS);
+
+ if (act)
+ ls->host_id = act->host_id;
+
+ if (!(r = alloc_resource())) {
+ free(ls);
+ return -ENOMEM;
+ }
+
+ r->type = LD_RT_VG;
+ r->mode = LD_LK_UN;
+ r->version = version;
+ strncpy(r->name, R_NAME_VG, MAX_NAME);
+ list_add_tail(&r->list, &ls->resources);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls2 = find_lockspace_name(ls->name);
+ if (ls2) {
+ if (ls2->thread_stop)
+ rv = -EAGAIN;
+ else
+ rv = -EEXIST;
+ pthread_mutex_unlock(&lockspaces_mutex);
+ free_resource(r);
+ free(ls);
+ return rv;
+ }
+
+ /*
+ * act will be null when this lockspace is added automatically/internally
+ * and not by an explicit client action that wants a result.
+ */
+ if (act)
+ list_add(&act->list, &ls->actions);
+
+ clear_lockspace_inactive(ls->name);
+
+ list_add_tail(&ls->list, &lockspaces);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ rv = pthread_create(&ls->thread, NULL, lockspace_thread_main, ls);
+ if (rv < 0) {
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_del(&ls->list);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ free_resource(r);
+ free(ls);
+ return rv;
+ }
+
+ return 0;
+}
+
+/*
+ * There is no add_sanlock_global_lockspace or
+ * rem_sanlock_global_lockspace because with sanlock,
+ * the global lockspace is one of the vg lockspaces.
+ */
+
+static int add_dlm_global_lockspace(struct action *act)
+{
+ int rv;
+
+ if (gl_running_dlm)
+ return -EEXIST;
+ gl_running_dlm = 1;
+
+ /*
+ * There's a short period after which a previous gl lockspace thread
+ * has set gl_running_dlm = 0, but before its ls struct has been
+ * deleted, during which this add_lockspace_thread() can fail with
+ * -EAGAIN.
+ */
+
+ rv = add_lockspace_thread(gl_lsname_dlm, NULL, NULL, LD_LM_DLM, NULL, act);
+ if (rv < 0) {
+ log_error("add_dlm_global_lockspace add_lockspace_thread %d", rv);
+ gl_running_dlm = 0;
+ }
+
+ return rv;
+}
+
+/*
+ * If dlm gl lockspace is the only one left, then stop it.
+ * This is not used for an explicit rem_lockspace action from
+ * the client, only for auto remove.
+ */
+
+static int rem_dlm_global_lockspace(void)
+{
+ struct lockspace *ls, *ls_gl = NULL;
+ int others = 0;
+ int rv = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (!strcmp(ls->name, gl_lsname_dlm)) {
+ ls_gl = ls;
+ continue;
+ }
+ if (ls->thread_stop)
+ continue;
+ others++;
+ break;
+ }
+
+ if (others) {
+ rv = -EAGAIN;
+ goto out;
+ }
+
+ if (!ls_gl) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ ls = ls_gl;
+ pthread_mutex_lock(&ls->mutex);
+ ls->thread_stop = 1;
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ rv = 0;
+out:
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return rv;
+}
+
+/*
+ * When the first dlm lockspace is added for a vg, automatically add a separate
+ * dlm lockspace for the global lock.
+ *
+ * For sanlock, a separate lockspace is not used for the global lock, but the
+ * gl lock lives in a vg lockspace, (although it's recommended to create a
+ * special vg dedicated to holding the gl).
+ */
+
+static int add_lockspace(struct action *act)
+{
+ char ls_name[MAX_NAME+1];
+ int rv;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ /*
+ * FIXME: I don't think this is used any more.
+ * Remove it, or add the ability to start the global
+ * dlm lockspace using lvmlockctl?
+ */
+ if (act->rt == LD_RT_GL) {
+ if (gl_use_dlm) {
+ rv = add_dlm_global_lockspace(act);
+ return rv;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (act->rt == LD_RT_VG) {
+ if (gl_use_dlm) {
+ rv = add_dlm_global_lockspace(NULL);
+ if (rv < 0 && rv != -EEXIST)
+ return rv;
+ }
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ rv = add_lockspace_thread(ls_name, act->vg_name, act->vg_uuid,
+ act->lm_type, act->vg_args,
+ act);
+
+ if (rv)
+ log_error("add_lockspace %s add_lockspace_thread %d", ls_name, rv);
+ return rv;
+ }
+
+ log_error("add_lockspace bad type %d", act->rt);
+ return -1;
+}
+
+/*
+ * vgchange --lock-stop vgname will lock the vg ex, then send a stop,
+ * so we exect to find the ex vg lock held here, and will automatically
+ * unlock it when stopping.
+ *
+ * Should we attempt to stop the lockspace containing the gl last?
+ */
+
+static int rem_lockspace(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ int force = act->flags & LD_AF_FORCE;
+ int rt = act->rt;
+
+ if (act->rt == LD_RT_GL && act->lm_type != LD_LM_DLM)
+ return -EINVAL;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ if (act->rt == LD_RT_GL)
+ gl_ls_name(ls_name);
+ else
+ vg_ls_name(act->vg_name, ls_name);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls = find_lockspace_name(ls_name);
+ if (!ls) {
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -ENOLS;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -ESTALE;
+ }
+
+ if (!force && for_each_lock(ls, LOCKS_EXIST_LV)) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -EBUSY;
+ }
+ ls->thread_work = 1;
+ ls->thread_stop = 1;
+ list_add_tail(&act->list, &ls->actions);
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /*
+ * The dlm global lockspace was automatically added when
+ * the first dlm vg lockspace was added, now reverse that
+ * by automatically removing the dlm global lockspace when
+ * the last dlm vg lockspace is removed.
+ */
+
+ if (rt == LD_RT_VG && gl_use_dlm)
+ rem_dlm_global_lockspace();
+
+ return 0;
+}
+
+/*
+ * count how many lockspaces started by this client are still starting;
+ * the client will use this to wait for all its start operations to finish
+ * (START_WAIT).
+ */
+
+static int count_lockspace_starting(uint32_t client_id)
+{
+ struct lockspace *ls;
+ int count = 0;
+ int done = 0;
+ int fail = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->start_client_id != client_id)
+ continue;
+
+ if (!ls->create_done && !ls->create_fail) {
+ count++;
+ continue;
+ }
+
+ if (ls->create_done)
+ done++;
+ if (ls->create_fail)
+ fail++;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ log_debug("count_lockspace_starting client %u count %d done %d fail %d",
+ client_id, count, done, fail);
+
+ return count;
+}
+
+/* lockspaces_mutex is held */
+static struct lockspace *find_lockspace_inactive(char *ls_name)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces_inactive, list) {
+ if (!strcmp(ls->name, ls_name))
+ return ls;
+ }
+
+ return NULL;
+}
+
+/* lockspaces_mutex is held */
+static int clear_lockspace_inactive(char *ls_name)
+{
+ struct lockspace *ls;
+
+ ls = find_lockspace_inactive(ls_name);
+ if (ls) {
+ list_del(&ls->list);
+ free(ls);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int forget_lockspace_inactive(char *vg_name)
+{
+ char ls_name[MAX_NAME+1];
+ int found;
+
+ memset(ls_name, 0, sizeof(ls_name));
+ vg_ls_name(vg_name, ls_name);
+
+ log_debug("forget_lockspace_inactive %s", ls_name);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ found = clear_lockspace_inactive(ls_name);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (found)
+ return 0;
+ return -ENOENT;
+}
+
+static void free_lockspaces_inactive(void)
+{
+ struct lockspace *ls, *safe;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry_safe(ls, safe, &lockspaces_inactive, list) {
+ list_del(&ls->list);
+ free(ls);
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+/*
+ * Loop through all lockspaces, and:
+ * - if do_stop is set, stop any that are not stopped
+ * - if do_free is set, join any that are done stopping (and free ls)
+ *
+ * do_stop will not stop an ls with lv locks unless force is set.
+ *
+ * This function does not block or wait for anything.
+ *
+ * do_stop (no do_free):
+ * returns count of lockspaces that need stop (have locks and no force)
+ *
+ * do_free (no do_stop):
+ * returns count of lockspaces that are stopped and need freeing
+ *
+ * do_stop and do_free:
+ * returns sum of the previous two
+ */
+
+static int for_each_lockspace(int do_stop, int do_free, int do_force)
+{
+ struct lockspace *ls, *safe;
+ int need_stop = 0;
+ int need_free = 0;
+ int stop_count = 0;
+ int free_count = 0;
+ int done;
+ int stop;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+
+ if (do_stop) {
+ list_for_each_entry(ls, &lockspaces, list) {
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ continue;
+ }
+
+ if (!do_force && for_each_lock(ls, LOCKS_EXIST_ANY)) {
+ need_stop++;
+ } else {
+ ls->thread_work = 1;
+ ls->thread_stop = 1;
+ pthread_cond_signal(&ls->cond);
+ stop_count++;
+ }
+ pthread_mutex_unlock(&ls->mutex);
+ }
+ }
+
+ if (do_free) {
+ list_for_each_entry_safe(ls, safe, &lockspaces, list) {
+
+ pthread_mutex_lock(&ls->mutex);
+ done = ls->thread_done;
+ stop = ls->thread_stop;
+ pthread_mutex_unlock(&ls->mutex);
+
+ /* This ls has locks and force is not set. */
+ if (!stop)
+ continue;
+
+ /*
+ * Once thread_done is set, we know that the lockspace_thread
+ * will not be using/touching the ls struct. Any other
+ * thread touches the ls struct under lockspaces_mutex.
+ */
+ if (done) {
+ pthread_join(ls->thread, NULL);
+ list_del(&ls->list);
+
+
+ /* In future we may need to free ls->actions here */
+ free_ls_resources(ls);
+
+ if (ls->free_vg)
+ free(ls);
+ else
+ list_add(&ls->list, &lockspaces_inactive);
+ free_count++;
+ } else {
+ need_free++;
+ }
+ }
+ }
+
+ if (list_empty(&lockspaces)) {
+ if (!gl_type_static) {
+ gl_use_dlm = 0;
+ gl_use_sanlock = 0;
+ }
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (stop_count || free_count || need_stop || need_free) {
+ log_debug("for_each_lockspace do_stop %d do_free %d "
+ "stop_count %d free_count %d need_stop %d need_free %d",
+ do_stop, do_free, stop_count, free_count, need_stop, need_free);
+ }
+
+ return need_stop + need_free;
+}
+
+/*
+ * This is only called when the daemon is exiting so the sleep/retry
+ * loop doesn't have any adverse impact.
+ */
+
+static void for_each_lockspace_retry(int do_stop, int do_free, int do_force)
+{
+ int count;
+
+ while (1) {
+ count = for_each_lockspace(do_stop, do_free, do_force);
+ if (!count)
+ break;
+
+ log_debug("for_each_lockspace_retry remaining %d", count);
+ sleep(1);
+ }
+}
+
+static int work_init_vg(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ int rv = 0;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ /*
+ * The max dlm ls name is 64 and the max sanlock ls name is 48. So,
+ * after the "lvm_" prefix, only the first 60/44 characters of the VG
+ * name are used for the lockspace name. This will cause a collision
+ * in the lock manager if two different VG names have the first 60/44
+ * chars in common. At the time of vgcreate (here), check if any other
+ * VG's are known that would collide. If the collision is not detected
+ * at vgcreate time, it will be detected at start time and add_lockspace
+ * will fail for the second of the two matching ls names.
+ */
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if ((ls->lm_type == LD_LM_SANLOCK) && !strncmp(ls->name, ls_name, 48)) {
+ rv = -EEXIST;
+ break;
+ }
+ if ((ls->lm_type == LD_LM_DLM) && !strcmp(ls->name, ls_name)) {
+ rv = -EEXIST;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (rv == -EEXIST) {
+ log_error("Existing lockspace name %s matches new %s VG names %s %s",
+ ls->name, ls_name, ls->vg_name, act->vg_name);
+ return rv;
+ }
+
+ if (act->lm_type == LD_LM_SANLOCK)
+ rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
+ else if (act->lm_type == LD_LM_DLM)
+ rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
+ else
+ rv = -EINVAL;
+
+ return rv;
+}
+
+static int work_rename_vg(struct action *act)
+{
+ char ls_name[MAX_NAME+1];
+ int rv = 0;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ if (act->lm_type == LD_LM_SANLOCK)
+ rv = lm_rename_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
+ else if (act->lm_type == LD_LM_DLM)
+ return 0;
+ else
+ rv = -EINVAL;
+
+ return rv;
+}
+
+static void work_test_gl(void)
+{
+ struct lockspace *ls;
+ int is_enabled = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->lm_type != LD_LM_SANLOCK)
+ continue;
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->create_done && !ls->thread_stop) {
+ is_enabled = lm_gl_is_enabled(ls);
+ if (is_enabled) {
+ log_debug("S %s worker found gl_is_enabled", ls->name);
+ strncpy(gl_lsname_sanlock, ls->name, MAX_NAME);
+ }
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ if (is_enabled)
+ break;
+ }
+
+ if (!is_enabled)
+ log_debug("worker found no gl_is_enabled");
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+static int work_init_lv(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ char vg_args[MAX_ARGS+1];
+ char lv_args[MAX_ARGS+1];
+ uint64_t free_offset = 0;
+ int lm_type = 0;
+ int rv = 0;
+
+ memset(ls_name, 0, sizeof(ls_name));
+ memset(vg_args, 0, sizeof(vg_args));
+ memset(lv_args, 0, sizeof(lv_args));
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls = find_lockspace_name(ls_name);
+ if (ls) {
+ lm_type = ls->lm_type;
+ memcpy(vg_args, ls->vg_args, MAX_ARGS);
+ free_offset = ls->free_lock_offset;
+ ls->free_lock_offset = 0;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (!ls) {
+ lm_type = act->lm_type;
+ memcpy(vg_args, act->vg_args, MAX_ARGS);
+ }
+
+ if (act->lm_type != lm_type) {
+ log_error("init_lv ls_name %s wrong lm_type %d %d",
+ ls_name, act->lm_type, lm_type);
+ return -EINVAL;
+ }
+
+ if (lm_type == LD_LM_SANLOCK) {
+ rv = lm_init_lv_sanlock(ls_name, act->vg_name, act->lv_uuid,
+ vg_args, lv_args, free_offset);
+
+ memcpy(act->lv_args, lv_args, MAX_ARGS);
+ return rv;
+
+ } else if (act->lm_type == LD_LM_DLM) {
+ return 0;
+ } else {
+ log_error("init_lv ls_name %s bad lm_type %d", ls_name, act->lm_type);
+ return -EINVAL;
+ }
+}
+
+/*
+ * When an action is queued for the worker_thread, it is processed right away.
+ * After processing, some actions need to be retried again in a short while.
+ * These actions are put on the delayed_list, and the worker_thread will
+ * process these delayed actions again in SHORT_DELAY_PERIOD.
+ */
+
+#define SHORT_DELAY_PERIOD 2
+#define LONG_DELAY_PERIOD 60
+
+static void *worker_thread_main(void *arg_in)
+{
+ struct list_head delayed_list;
+ struct timespec ts;
+ struct action *act, *safe;
+ uint64_t last_delayed_time = 0;
+ int delay_sec = LONG_DELAY_PERIOD;
+ int rv;
+
+ INIT_LIST_HEAD(&delayed_list);
+
+ while (1) {
+ pthread_mutex_lock(&worker_mutex);
+ clock_gettime(CLOCK_REALTIME, &ts);
+ ts.tv_sec += delay_sec;
+ rv = 0;
+ act = NULL;
+
+ while (list_empty(&worker_list) && !worker_stop && !worker_wake && !rv) {
+ rv = pthread_cond_timedwait(&worker_cond, &worker_mutex, &ts);
+ }
+ worker_wake = 0;
+
+ if (worker_stop) {
+ pthread_mutex_unlock(&worker_mutex);
+ goto out;
+ }
+
+ if (!list_empty(&worker_list)) {
+ act = list_first_entry(&worker_list, struct action, list);
+ list_del(&act->list);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+
+ /*
+ * Do new work actions before processing delayed work actions.
+ */
+
+ if (!act)
+ goto delayed_work;
+
+ if (act->op == LD_OP_RUNNING_LM) {
+ int run_sanlock = lm_is_running_sanlock();
+ int run_dlm = lm_is_running_dlm();
+
+ if (run_sanlock && run_dlm)
+ act->result = -EXFULL;
+ else if (!run_sanlock && !run_dlm)
+ act->result = -ENOLCK;
+ else if (run_sanlock)
+ act->result = LD_LM_SANLOCK;
+ else if (run_dlm)
+ act->result = LD_LM_DLM;
+ add_client_result(act);
+
+ } else if ((act->op == LD_OP_LOCK) && (act->flags & LD_AF_SEARCH_LS)) {
+ /*
+ * worker_thread used as a helper to search existing
+ * sanlock vgs for an enabled gl.
+ */
+ log_debug("work search for gl");
+ work_test_gl();
+
+ /* try again to find a gl lockspace for this act */
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ act->result = rv;
+ add_client_result(act);
+ }
+
+ } else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_VG)) {
+ log_debug("work init_vg %s", act->vg_name);
+ act->result = work_init_vg(act);
+ add_client_result(act);
+
+ } else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_LV)) {
+ log_debug("work init_lv %s/%s uuid %s", act->vg_name, act->lv_name, act->lv_uuid);
+ act->result = work_init_lv(act);
+ add_client_result(act);
+
+ } else if ((act->op == LD_OP_RENAME_FINAL) && (act->rt == LD_RT_VG)) {
+ log_debug("work rename_vg %s", act->vg_name);
+ act->result = work_rename_vg(act);
+ add_client_result(act);
+
+ } else if (act->op == LD_OP_START_WAIT) {
+ act->result = count_lockspace_starting(act->client_id);
+ if (!act->result)
+ add_client_result(act);
+ else
+ list_add(&act->list, &delayed_list);
+
+ } else if (act->op == LD_OP_STOP_ALL) {
+ act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
+ if (!act->result || !(act->flags & LD_AF_WAIT))
+ add_client_result(act);
+ else
+ list_add(&act->list, &delayed_list);
+
+ } else {
+ log_error("work unknown op %d", act->op);
+ act->result = -EINVAL;
+ add_client_result(act);
+ }
+
+ delayed_work:
+ /*
+ * We may want to track retry times per action so that
+ * we can delay different actions by different amounts.
+ */
+
+ if (monotime() - last_delayed_time < SHORT_DELAY_PERIOD) {
+ delay_sec = 1;
+ continue;
+ }
+ last_delayed_time = monotime();
+
+ list_for_each_entry_safe(act, safe, &delayed_list, list) {
+ if (act->op == LD_OP_START_WAIT) {
+ log_debug("work delayed start_wait for client %u", act->client_id);
+ act->result = count_lockspace_starting(act->client_id);
+ if (!act->result) {
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ } else if (act->op == LD_OP_STOP_ALL) {
+ log_debug("work delayed stop_all");
+ act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
+ if (!act->result) {
+ list_del(&act->list);
+ act->result = 0;
+ add_client_result(act);
+ }
+ }
+ }
+
+ /*
+ * This is not explicitly queued work, and not delayed work,
+ * but lockspace thread cleanup that's needed when a
+ * lockspace has been stopped/removed or failed to start.
+ */
+
+ for_each_lockspace(NO_STOP, DO_FREE, NO_FORCE);
+
+ if (list_empty(&delayed_list))
+ delay_sec = LONG_DELAY_PERIOD;
+ else
+ delay_sec = 1;
+ }
+out:
+ list_for_each_entry_safe(act, safe, &delayed_list, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ pthread_mutex_lock(&worker_mutex);
+ list_for_each_entry_safe(act, safe, &worker_list, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+ return NULL;
+}
+
+static int setup_worker_thread(void)
+{
+ int rv;
+
+ INIT_LIST_HEAD(&worker_list);
+
+ pthread_mutex_init(&worker_mutex, NULL);
+ pthread_cond_init(&worker_cond, NULL);
+
+ rv = pthread_create(&worker_thread, NULL, worker_thread_main, NULL);
+ if (rv)
+ return -1;
+ return 0;
+}
+
+static void close_worker_thread(void)
+{
+ pthread_mutex_lock(&worker_mutex);
+ worker_stop = 1;
+ pthread_cond_signal(&worker_cond);
+ pthread_mutex_unlock(&worker_mutex);
+ pthread_join(worker_thread, NULL);
+}
+
+/* client_mutex is locked */
+static struct client *find_client_work(void)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->recv || cl->dead)
+ return cl;
+ }
+ return NULL;
+}
+
+/* client_mutex is locked */
+static struct client *find_client_id(uint32_t id)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->id == id)
+ return cl;
+ }
+ return NULL;
+}
+
+/* client_mutex is locked */
+static struct client *find_client_pi(int pi)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->pi == pi)
+ return cl;
+ }
+ return NULL;
+}
+
+/*
+ * wake up poll() because we have added an fd
+ * back into pollfd and poll() needs to be restarted
+ * to recognize it.
+ */
+static void restart_poll(void)
+{
+ int rv;
+ rv = write(restart_fds[1], "w", 1);
+ if (!rv || rv < 0)
+ log_debug("restart_poll write %d", errno);
+}
+
+/* poll will take requests from client again, cl->mutex must be held */
+static void client_resume(struct client *cl)
+{
+ if (cl->dead)
+ return;
+
+ if (!cl->poll_ignore || cl->fd == -1 || cl->pi == -1) {
+ /* shouldn't happen */
+ log_error("client_resume %d bad state ig %d fd %d pi %d",
+ cl->id, cl->poll_ignore, cl->fd, cl->pi);
+ return;
+ }
+
+ pthread_mutex_lock(&pollfd_mutex);
+ if (pollfd[cl->pi].fd != POLL_FD_IGNORE) {
+ log_error("client_resume %d pi %d fd %d not IGNORE",
+ cl->id, cl->pi, cl->fd);
+ }
+ pollfd[cl->pi].fd = cl->fd;
+ pollfd[cl->pi].events = POLLIN;
+ pthread_mutex_unlock(&pollfd_mutex);
+
+ restart_poll();
+}
+
+/* called from client_thread, cl->mutex is held */
+static void client_send_result(struct client *cl, struct action *act)
+{
+ response res;
+ char result_flags[128];
+ int dump_len = 0;
+ int dump_fd = -1;
+
+ if (cl->dead) {
+ log_debug("client send %d skip dead", cl->id);
+ return;
+ }
+
+ memset(result_flags, 0, sizeof(result_flags));
+
+ buffer_init(&res.buffer);
+
+ /*
+ * EUNATCH is returned when the global lock existed,
+ * but had been disabled when we tried to lock it,
+ * so we removed it, and no longer have a gl to lock.
+ */
+
+ if (act->result == -EUNATCH)
+ act->result = -ENOLS;
+
+ /*
+ * init_vg with dlm|sanlock returns vg_args
+ * init_lv with sanlock returns lv_args
+ */
+
+ if (act->result == -ENOLS) {
+ /*
+ * The lockspace could not be found, in which case
+ * the caller may want to know if any lockspaces exist
+ * or if lockspaces exist, but not one with the global lock.
+ * Given this detail, it may be able to procede without
+ * the lock.
+ */
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (list_empty(&lockspaces))
+ strcat(result_flags, "NO_LOCKSPACES,");
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (gl_use_sanlock && !gl_lsname_sanlock[0])
+ strcat(result_flags, "NO_GL_LS,");
+ else if (gl_use_dlm && !gl_lsname_dlm[0])
+ strcat(result_flags, "NO_GL_LS,");
+ else
+ strcat(result_flags, "NO_GL_LS,");
+ }
+
+ if (act->flags & LD_AF_DUP_GL_LS)
+ strcat(result_flags, "DUP_GL_LS,");
+
+ if (act->flags & LD_AF_INACTIVE_LS)
+ strcat(result_flags, "INACTIVE_LS,");
+
+ if (act->flags & LD_AF_ADD_LS_ERROR)
+ strcat(result_flags, "ADD_LS_ERROR,");
+
+ if (act->flags & LD_AF_WARN_GL_REMOVED)
+ strcat(result_flags, "WARN_GL_REMOVED,");
+
+ if (act->op == LD_OP_INIT) {
+ /*
+ * init is a special case where lock args need
+ * to be passed back to the client.
+ */
+ const char *vg_args = "none";
+ const char *lv_args = "none";
+
+ if (act->vg_args[0])
+ vg_args = act->vg_args;
+
+ if (act->lv_args[0])
+ lv_args = act->lv_args;
+
+ log_debug("send %s[%d.%u] %s %s rv %d vg_args %s lv_args %s",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt),
+ act->result, vg_args ? vg_args : "", lv_args ? lv_args : "");
+
+ res = daemon_reply_simple("OK",
+ "op = %d", act->op,
+ "op_result = %d", act->result,
+ "lm_result = %d", act->lm_rv,
+ "vg_lock_args = %s", vg_args,
+ "lv_lock_args = %s", lv_args,
+ "result_flags = %s", result_flags[0] ? result_flags : "none",
+ NULL);
+
+ } else if (act->op == LD_OP_DUMP_LOG || act->op == LD_OP_DUMP_INFO) {
+ /*
+ * lvmlockctl creates the unix socket then asks us to write to it.
+ * FIXME: move processing this to a new dedicated query thread to
+ * avoid having a large data dump interfere with normal operation
+ * of the client thread?
+ */
+
+ dump_fd = setup_dump_socket();
+ if (dump_fd < 0)
+ act->result = dump_fd;
+ else if (act->op == LD_OP_DUMP_LOG)
+ act->result = dump_log(&dump_len);
+ else if (act->op == LD_OP_DUMP_INFO)
+ act->result = dump_info(&dump_len);
+ else
+ act->result = -EINVAL;
+
+ log_debug("send %s[%d.%u] dump result %d dump_len %d",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ act->result, dump_len);
+
+ res = daemon_reply_simple("OK",
+ "result = %d", act->result,
+ "dump_len = %d", dump_len,
+ NULL);
+ } else {
+ /*
+ * A normal reply.
+ */
+
+ log_debug("send %s[%d.%u] %s %s rv %d %s %s",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt),
+ act->result, (act->result == -ENOLS) ? "ENOLS" : "", result_flags);
+
+ res = daemon_reply_simple("OK",
+ "op = %d", act->op,
+ "lock_type = %s", lm_str(act->lm_type),
+ "op_result = %d", act->result,
+ "lm_result = %d", act->lm_rv,
+ "result_flags = %s", result_flags[0] ? result_flags : "none",
+ NULL);
+ }
+
+ buffer_write(cl->fd, &res.buffer);
+ buffer_destroy(&res.buffer);
+
+ client_resume(cl);
+
+ if (dump_fd >= 0) {
+ /* To avoid deadlock, send data here after the reply. */
+ send_dump_buf(dump_fd, dump_len);
+ close(dump_fd);
+ }
+}
+
+/* called from client_thread */
+static void client_purge(struct client *cl)
+{
+ struct lockspace *ls;
+ struct action *act;
+
+ /*
+ * If the client made no lock requests, there can be
+ * no locks to release for it.
+ */
+ if (!cl->lock_ops)
+ return;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (!(act = alloc_action()))
+ continue;
+
+ act->op = LD_OP_CLOSE;
+ act->client_id = cl->id;
+
+ pthread_mutex_lock(&ls->mutex);
+ if (!ls->thread_stop) {
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ } else {
+ free_action(act);
+ }
+ pthread_mutex_unlock(&ls->mutex);
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+static int add_lock_action(struct action *act)
+{
+ struct lockspace *ls = NULL;
+ char ls_name[MAX_NAME+1];
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ /* Determine which lockspace this action is for, and set ls_name. */
+
+ if (act->rt == LD_RT_GL && gl_use_sanlock &&
+ (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE))
+ vg_ls_name(act->vg_name, ls_name);
+ else if (act->rt == LD_RT_GL)
+ gl_ls_name(ls_name);
+ else
+ vg_ls_name(act->vg_name, ls_name);
+
+ retry:
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (ls_name[0])
+ ls = find_lockspace_name(ls_name);
+ if (!ls) {
+ int ls_inactive = 0;
+ int ls_create_fail = 0;
+
+ if (ls_name[0])
+ ls = find_lockspace_inactive(ls_name);
+ if (ls) {
+ ls_inactive = 1;
+ ls_create_fail = ls->create_fail;
+ ls = NULL;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (act->op == LD_OP_UPDATE && act->rt == LD_RT_VG) {
+ log_debug("lockspace not found ignored for vg update");
+ return -ENOLS;
+
+ } else if (act->flags & LD_AF_SEARCH_LS) {
+ /* fail if we've already tried searching for the ls */
+ log_debug("lockspace search repeated %s", ls_name);
+ return -ENOLS;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_sanlock) {
+ /* gl may have been enabled in an existing vg */
+ log_debug("gl lockspace not found check sanlock vgs");
+ act->flags |= LD_AF_SEARCH_LS;
+ add_work_action(act);
+ return 0;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_dlm) {
+ log_debug("gl lockspace not found add dlm global");
+ act->flags |= LD_AF_SEARCH_LS;
+ act->flags |= LD_AF_WAIT_STARTING;
+ add_dlm_global_lockspace(NULL);
+ gl_ls_name(ls_name);
+ goto retry;
+
+ } else if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
+ log_debug("lockspace not found ignored for unlock");
+ return -ENOLS;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_VG && ls_inactive) {
+ /* ls has been stopped or previously failed to start */
+ log_debug("lockspace inactive create_fail %d %s",
+ ls_create_fail, ls_name);
+ act->flags |= LD_AF_INACTIVE_LS;
+ if (ls_create_fail)
+ act->flags |= LD_AF_ADD_LS_ERROR;
+ return -ENOLS;
+ } else {
+ log_debug("lockspace not found %s", ls_name);
+ return -ENOLS;
+ }
+ }
+
+ if (act->lm_type == LD_LM_NONE) {
+ /* return to the command the type we are using */
+ act->lm_type = ls->lm_type;
+ } else if (act->lm_type != ls->lm_type) {
+ /* should not happen */
+ log_error("S %s add_lock_action bad lm_type %d ls %d",
+ ls_name, act->lm_type, ls->lm_type);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -EINVAL;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop && ls->thread_done) {
+ log_debug("lockspace is done finish cleanup %s", ls_name);
+ pthread_join(ls->thread, NULL);
+ list_del(&ls->list);
+ pthread_mutex_unlock(&ls->mutex);
+ free_ls_resources(ls);
+ free(ls);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ goto retry;
+ }
+
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_error("lockspace is stopping %s", ls_name);
+ return -ESTALE;
+ }
+
+ if (!ls->create_fail && !ls->create_done && !(act->flags & LD_AF_WAIT_STARTING)) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_debug("lockspace is starting %s", ls_name);
+ return -ESTARTING;
+ }
+
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /* lockspace_thread_main / res_process take it from here */
+
+ return 0;
+}
+
+static int str_to_op_rt(const char *req_name, int *op, int *rt)
+{
+ if (!req_name)
+ goto out;
+
+ if (!strcmp(req_name, "hello")) {
+ *op = LD_OP_HELLO;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "quit")) {
+ *op = LD_OP_QUIT;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "info")) {
+ *op = LD_OP_DUMP_INFO;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "dump")) {
+ *op = LD_OP_DUMP_LOG;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "init_vg")) {
+ *op = LD_OP_INIT;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "init_lv")) {
+ *op = LD_OP_INIT;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "free_vg")) {
+ *op = LD_OP_FREE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "free_lv")) {
+ *op = LD_OP_FREE;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "start_vg")) {
+ *op = LD_OP_START;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "stop_vg")) {
+ *op = LD_OP_STOP;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "start_wait")) {
+ *op = LD_OP_START_WAIT;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "stop_all")) {
+ *op = LD_OP_STOP_ALL;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_gl")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_vg")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_lv")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "vg_update")) {
+ *op = LD_OP_UPDATE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "enable_gl")) {
+ *op = LD_OP_ENABLE;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "disable_gl")) {
+ *op = LD_OP_DISABLE;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "rename_vg_before")) {
+ *op = LD_OP_RENAME_BEFORE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "rename_vg_final")) {
+ *op = LD_OP_RENAME_FINAL;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "running_lm")) {
+ *op = LD_OP_RUNNING_LM;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "find_free_lock")) {
+ *op = LD_OP_FIND_FREE_LOCK;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "forget_vg_name")) {
+ *op = LD_OP_FORGET_VG_NAME;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "kill_vg")) {
+ *op = LD_OP_KILL_VG;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "drop_vg")) {
+ *op = LD_OP_DROP_VG;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+out:
+ return -1;
+}
+
+static int str_to_mode(const char *str)
+{
+ if (!str)
+ goto out;
+ if (!strcmp(str, "un"))
+ return LD_LK_UN;
+ if (!strcmp(str, "nl"))
+ return LD_LK_NL;
+ if (!strcmp(str, "sh"))
+ return LD_LK_SH;
+ if (!strcmp(str, "ex"))
+ return LD_LK_EX;
+out:
+ return LD_LK_IV;
+}
+
+static int str_to_lm(const char *str)
+{
+ if (!str || !strcmp(str, "none"))
+ return LD_LM_NONE;
+ if (!strcmp(str, "sanlock"))
+ return LD_LM_SANLOCK;
+ if (!strcmp(str, "dlm"))
+ return LD_LM_DLM;
+ return -2;
+}
+
+static uint32_t str_to_opts(const char *str)
+{
+ uint32_t flags = 0;
+
+ if (!str)
+ goto out;
+ if (strstr(str, "persistent"))
+ flags |= LD_AF_PERSISTENT;
+ if (strstr(str, "unlock_cancel"))
+ flags |= LD_AF_UNLOCK_CANCEL;
+ if (strstr(str, "next_version"))
+ flags |= LD_AF_NEXT_VERSION;
+ if (strstr(str, "wait"))
+ flags |= LD_AF_WAIT;
+ if (strstr(str, "force"))
+ flags |= LD_AF_FORCE;
+ if (strstr(str, "ex_disable"))
+ flags |= LD_AF_EX_DISABLE;
+ if (strstr(str, "enable"))
+ flags |= LD_AF_ENABLE;
+ if (strstr(str, "disable"))
+ flags |= LD_AF_DISABLE;
+out:
+ return flags;
+}
+
+/*
+ * dump info
+ * client_list: each client struct
+ * lockspaces: each lockspace struct
+ * lockspace actions: each action struct
+ * lockspace resources: each resource struct
+ * lockspace resource actions: each action struct
+ * lockspace resource locks: each lock struct
+ */
+
+static int setup_dump_socket(void)
+{
+ int s;
+
+ s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (s < 0)
+ return s;
+
+ memset(&dump_addr, 0, sizeof(dump_addr));
+ dump_addr.sun_family = AF_LOCAL;
+ strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+ dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+ return s;
+}
+
+#define MAX_SEND_LEN 65536
+#define RESEND_DELAY_US 1000
+#define RESEND_DELAY_US_MAX 500000
+
+static void send_dump_buf(int fd, int dump_len)
+{
+ int pos = 0;
+ int ret;
+ int send_len;
+ int delay = 0;
+
+ if (!dump_len)
+ return;
+repeat:
+ if (dump_len - pos < MAX_SEND_LEN)
+ send_len = dump_len - pos;
+ else
+ send_len = MAX_SEND_LEN;
+
+ ret = sendto(fd, dump_buf + pos, send_len, MSG_NOSIGNAL | MSG_DONTWAIT,
+ (struct sockaddr *)&dump_addr, dump_addrlen);
+ if (ret < 0) {
+ if ((errno == EAGAIN || errno == EINTR) && (delay < RESEND_DELAY_US_MAX)) {
+ usleep(RESEND_DELAY_US);
+ delay += RESEND_DELAY_US;
+ goto repeat;
+ }
+ log_error("send_dump_buf delay %d errno %d", delay, errno);
+ return;
+ }
+
+ pos += ret;
+
+ if (pos < dump_len)
+ goto repeat;
+
+ log_debug("send_dump_buf delay %d total %d", delay, pos);
+}
+
+static int print_structs(const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "unused_action_count=%d "
+ "unused_client_count=%d "
+ "unused_resource_count=%d "
+ "unused_lock_count=%d\n",
+ prefix,
+ unused_action_count,
+ unused_client_count,
+ unused_resource_count,
+ unused_lock_count);
+}
+
+static int print_client(struct client *cl, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "pid=%d "
+ "fd=%d "
+ "pi=%d "
+ "id=%u "
+ "name=%s\n",
+ prefix,
+ cl->pid,
+ cl->fd,
+ cl->pi,
+ cl->id,
+ cl->name[0] ? cl->name : ".");
+}
+
+static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "ls_name=%s "
+ "vg_name=%s "
+ "vg_uuid=%s "
+ "vg_sysid=%s "
+ "vg_args=%s "
+ "lm_type=%s "
+ "host_id=%llu "
+ "create_fail=%d "
+ "create_done=%d "
+ "thread_work=%d "
+ "thread_stop=%d "
+ "thread_done=%d "
+ "kill_vg=%d "
+ "drop_vg=%d "
+ "sanlock_gl_enabled=%d\n",
+ prefix,
+ ls->name,
+ ls->vg_name,
+ ls->vg_uuid,
+ ls->vg_sysid[0] ? ls->vg_sysid : ".",
+ ls->vg_args,
+ lm_str(ls->lm_type),
+ (unsigned long long)ls->host_id,
+ ls->create_fail ? 1 : 0,
+ ls->create_done ? 1 : 0,
+ ls->thread_work ? 1 : 0,
+ ls->thread_stop ? 1 : 0,
+ ls->thread_done ? 1 : 0,
+ ls->kill_vg,
+ ls->drop_vg,
+ ls->sanlock_gl_enabled ? 1 : 0);
+}
+
+static int print_action(struct action *act, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "client_id=%u "
+ "flags=0x%x "
+ "version=%u "
+ "op=%s "
+ "rt=%s "
+ "mode=%s "
+ "lm_type=%s "
+ "result=%d "
+ "lm_rv=%d\n",
+ prefix,
+ act->client_id,
+ act->flags,
+ act->version,
+ op_str(act->op),
+ rt_str(act->rt),
+ mode_str(act->mode),
+ lm_str(act->lm_type),
+ act->result,
+ act->lm_rv);
+}
+
+static int print_resource(struct resource *r, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "name=%s "
+ "type=%s "
+ "mode=%s "
+ "sh_count=%d "
+ "version=%u\n",
+ prefix,
+ r->name,
+ rt_str(r->type),
+ mode_str(r->mode),
+ r->sh_count,
+ r->version);
+}
+
+static int print_lock(struct lock *lk, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "mode=%s "
+ "version=%u "
+ "flags=0x%x "
+ "client_id=%u\n",
+ prefix,
+ mode_str(lk->mode),
+ lk->version,
+ lk->flags,
+ lk->client_id);
+}
+
+static int dump_info(int *dump_len)
+{
+ struct client *cl;
+ struct lockspace *ls;
+ struct resource *r;
+ struct lock *lk;
+ struct action *act;
+ int len, pos, ret;
+ int rv = 0;
+
+ memset(dump_buf, 0, sizeof(dump_buf));
+ len = sizeof(dump_buf);
+ pos = 0;
+
+ /*
+ * memory
+ */
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ ret = print_structs("structs", pos, len);
+ if (ret >= len - pos) {
+ pthread_mutex_unlock(&unused_struct_mutex);
+ return -ENOSPC;
+ }
+ pos += ret;
+ pthread_mutex_unlock(&unused_struct_mutex);
+
+ /*
+ * clients
+ */
+
+ pthread_mutex_lock(&client_mutex);
+ list_for_each_entry(cl, &client_list, list) {
+ ret = print_client(cl, "client", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ break;
+ }
+ pos += ret;
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ if (rv < 0)
+ return rv;
+
+ /*
+ * lockspaces with their action/resource/lock info
+ */
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+
+ ret = print_lockspace(ls, "ls", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+
+ list_for_each_entry(act, &ls->actions, list) {
+ ret = print_action(act, "ls_action", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(r, &ls->resources, list) {
+ ret = print_resource(r, "r", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ ret = print_lock(lk, "lk", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(act, &r->actions, list) {
+ ret = print_action(act, "r_action", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+ }
+ }
+out:
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ *dump_len = pos;
+
+ return rv;
+}
+
+/* called from client_thread, cl->mutex is held */
+static void client_recv_action(struct client *cl)
+{
+ request req;
+ response res;
+ struct action *act;
+ const char *cl_name;
+ const char *vg_name;
+ const char *vg_uuid;
+ const char *vg_sysid;
+ const char *str;
+ int64_t val;
+ uint32_t opts = 0;
+ int result = 0;
+ int cl_pid;
+ int op, rt, lm, mode;
+ int rv;
+
+ buffer_init(&req.buffer);
+
+ rv = buffer_read(cl->fd, &req.buffer);
+ if (!rv) {
+ if (errno == ECONNRESET) {
+ log_debug("client recv %d ECONNRESET", cl->id);
+ cl->dead = 1;
+ } else {
+ log_error("client recv %d buffer_read error %d", cl->id, errno);
+ }
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ req.cft = dm_config_from_string(req.buffer.mem);
+ if (!req.cft) {
+ log_error("client recv %d config_from_string error", cl->id);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ str = daemon_request_str(req, "request", NULL);
+ rv = str_to_op_rt(str, &op, &rt);
+ if (rv < 0) {
+ log_error("client recv %d bad request name \"%s\"", cl->id, str ? str : "");
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ if (op == LD_OP_HELLO || op == LD_OP_QUIT) {
+
+ /*
+ * FIXME: add the client command name to the hello messages
+ * so it can be saved in cl->name here.
+ */
+
+ result = 0;
+
+ if (op == LD_OP_QUIT) {
+ log_debug("op quit");
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (list_empty(&lockspaces))
+ daemon_quit = 1;
+ else
+ result = -EBUSY;
+ pthread_mutex_unlock(&lockspaces_mutex);
+ }
+
+ buffer_init(&res.buffer);
+
+ res = daemon_reply_simple("OK",
+ "result = %d", result,
+ "protocol = %s", lvmlockd_protocol,
+ "version = %d", lvmlockd_protocol_version,
+ NULL);
+ buffer_write(cl->fd, &res.buffer);
+ buffer_destroy(&res.buffer);
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ cl_name = daemon_request_str(req, "cmd", NULL);
+ cl_pid = daemon_request_int(req, "pid", 0);
+ vg_name = daemon_request_str(req, "vg_name", NULL);
+ vg_uuid = daemon_request_str(req, "vg_uuid", NULL);
+ vg_sysid = daemon_request_str(req, "vg_sysid", NULL);
+ str = daemon_request_str(req, "mode", NULL);
+ mode = str_to_mode(str);
+ str = daemon_request_str(req, "opts", NULL);
+ opts = str_to_opts(str);
+ str = daemon_request_str(req, "vg_lock_type", NULL);
+ lm = str_to_lm(str);
+
+ if (cl_pid && cl_pid != cl->pid)
+ log_error("client recv bad message pid %d client %d", cl_pid, cl->pid);
+
+ /* FIXME: do this in hello message instead */
+ if (!cl->name[0] && cl_name)
+ strncpy(cl->name, cl_name, MAX_NAME);
+
+ if (!gl_use_dlm && !gl_use_sanlock && (lm > 0)) {
+ if (lm == LD_LM_DLM && lm_support_dlm())
+ gl_use_dlm = 1;
+ else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
+ gl_use_sanlock = 1;
+
+ log_debug("set gl_use_%s", lm_str(lm));
+ }
+
+ if (!(act = alloc_action())) {
+ log_error("No memory for action");
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ act->client_id = cl->id;
+ act->op = op;
+ act->rt = rt;
+ act->mode = mode;
+ act->flags = opts;
+ act->lm_type = lm;
+
+ if (vg_name && strcmp(vg_name, "none"))
+ strncpy(act->vg_name, vg_name, MAX_NAME);
+
+ if (vg_uuid && strcmp(vg_uuid, "none"))
+ strncpy(act->vg_uuid, vg_uuid, 64);
+
+ if (vg_sysid && strcmp(vg_sysid, "none"))
+ strncpy(act->vg_sysid, vg_sysid, MAX_NAME);
+
+ str = daemon_request_str(req, "lv_name", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->lv_name, str, MAX_NAME);
+
+ str = daemon_request_str(req, "lv_uuid", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->lv_uuid, str, MAX_NAME);
+
+ val = daemon_request_int(req, "version", 0);
+ if (val)
+ act->version = (uint32_t)val;
+
+ str = daemon_request_str(req, "vg_lock_args", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->vg_args, str, MAX_ARGS);
+
+ str = daemon_request_str(req, "lv_lock_args", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->lv_args, str, MAX_ARGS);
+
+ /* start_vg will include lvmlocal.conf local/host_id here */
+ val = daemon_request_int(req, "host_id", 0);
+ if (val)
+ act->host_id = val;
+
+ act->max_retries = daemon_request_int(req, "max_retries", DEFAULT_MAX_RETRIES);
+
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+
+ log_debug("recv %s[%d.%u] %s %s \"%s\" mode %s flags %x",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt), act->vg_name, mode_str(act->mode), opts);
+
+ if (lm == LD_LM_DLM && !lm_support_dlm()) {
+ log_debug("dlm not supported");
+ rv = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ if (lm == LD_LM_SANLOCK && !lm_support_sanlock()) {
+ log_debug("sanlock not supported");
+ rv = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ if (act->op == LD_OP_LOCK && act->mode != LD_LK_UN)
+ cl->lock_ops = 1;
+
+ switch (act->op) {
+ case LD_OP_START:
+ rv = add_lockspace(act);
+ break;
+ case LD_OP_STOP:
+ rv = rem_lockspace(act);
+ break;
+ case LD_OP_DUMP_LOG:
+ case LD_OP_DUMP_INFO:
+ /* The client thread reply will copy and send the dump. */
+ add_client_result(act);
+ rv = 0;
+ break;
+ case LD_OP_INIT:
+ case LD_OP_START_WAIT:
+ case LD_OP_STOP_ALL:
+ case LD_OP_RENAME_FINAL:
+ case LD_OP_RUNNING_LM:
+ add_work_action(act);
+ rv = 0;
+ break;
+ case LD_OP_LOCK:
+ case LD_OP_UPDATE:
+ case LD_OP_ENABLE:
+ case LD_OP_DISABLE:
+ case LD_OP_FREE:
+ case LD_OP_RENAME_BEFORE:
+ case LD_OP_FIND_FREE_LOCK:
+ case LD_OP_KILL_VG:
+ case LD_OP_DROP_VG:
+ rv = add_lock_action(act);
+ break;
+ case LD_OP_FORGET_VG_NAME:
+ act->result = forget_lockspace_inactive(act->vg_name);
+ add_client_result(act);
+ break;
+ default:
+ rv = -EINVAL;
+ };
+
+out:
+ if (rv < 0) {
+ act->result = rv;
+ add_client_result(act);
+ }
+}
+
+static void *client_thread_main(void *arg_in)
+{
+ struct client *cl;
+ struct action *act;
+
+ while (1) {
+ pthread_mutex_lock(&client_mutex);
+ while (!client_work && list_empty(&client_results)) {
+ if (client_stop) {
+ pthread_mutex_unlock(&client_mutex);
+ goto out;
+ }
+ pthread_cond_wait(&client_cond, &client_mutex);
+ }
+
+ /*
+ * Send outgoing results back to clients
+ */
+
+ if (!list_empty(&client_results)) {
+ act = list_first_entry(&client_results, struct action, list);
+ list_del(&act->list);
+ cl = find_client_id(act->client_id);
+ pthread_mutex_unlock(&client_mutex);
+
+ if (cl) {
+ pthread_mutex_lock(&cl->mutex);
+ client_send_result(cl, act);
+ pthread_mutex_unlock(&cl->mutex);
+ } else {
+ log_debug("no client for result");
+ }
+ free_action(act);
+ continue;
+ }
+
+ /*
+ * Queue incoming actions for lockspace threads
+ */
+
+ if (client_work) {
+ cl = find_client_work();
+ if (!cl)
+ client_work = 0;
+ pthread_mutex_unlock(&client_mutex);
+
+ if (!cl)
+ continue;
+
+ pthread_mutex_lock(&cl->mutex);
+
+ if (cl->recv) {
+ cl->recv = 0;
+ client_recv_action(cl);
+ }
+
+ if (cl->dead) {
+ /*
+ log_debug("client rem %d pi %d fd %d ig %d",
+ cl->id, cl->pi, cl->fd, cl->poll_ignore);
+ */
+ /*
+ * If cl->dead was set in main_loop, then the
+ * fd has already been closed and the pollfd
+ * entry is already unused.
+ * main_loop set dead=1, ignore=0, pi=-1, fd=-1
+ *
+ * if cl->dead was not set in main_loop, but
+ * set in client_recv_action, then the main_loop
+ * should be ignoring this client fd.
+ * main_loop set ignore=1
+ */
+
+ if (cl->poll_ignore) {
+ log_debug("client close %d pi %d fd %d",
+ cl->id, cl->pi, cl->fd);
+ /* assert cl->pi != -1 */
+ /* assert pollfd[pi].fd == FD_IGNORE */
+ if (close(cl->fd))
+ log_error("client close %d pi %d fd %d failed",
+ cl->id, cl->pi, cl->fd);
+ rem_pollfd(cl->pi);
+ cl->pi = -1;
+ cl->fd = -1;
+ cl->poll_ignore = 0;
+ } else {
+ /* main thread should have closed */
+ if (cl->pi != -1 || cl->fd != -1) {
+ log_error("client %d bad state pi %d fd %d",
+ cl->id, cl->pi, cl->fd);
+ }
+ }
+ pthread_mutex_unlock(&cl->mutex);
+
+ pthread_mutex_lock(&client_mutex);
+ list_del(&cl->list);
+ pthread_mutex_unlock(&client_mutex);
+
+ client_purge(cl);
+
+ free_client(cl);
+ } else {
+ pthread_mutex_unlock(&cl->mutex);
+ }
+ }
+ pthread_mutex_unlock(&client_mutex);
+ }
+out:
+ return NULL;
+}
+
+static int setup_client_thread(void)
+{
+ int rv;
+
+ INIT_LIST_HEAD(&client_list);
+ INIT_LIST_HEAD(&client_results);
+
+ pthread_mutex_init(&client_mutex, NULL);
+ pthread_cond_init(&client_cond, NULL);
+
+ rv = pthread_create(&client_thread, NULL, client_thread_main, NULL);
+ if (rv)
+ return -1;
+ return 0;
+}
+
+static void close_client_thread(void)
+{
+ pthread_mutex_lock(&client_mutex);
+ client_stop = 1;
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+ pthread_join(client_thread, NULL);
+}
+
+/*
+ * Get a list of all VGs with a lockd type (sanlock|dlm) from lvmetad.
+ * We'll match this list against a list of existing lockspaces that are
+ * found in the lock manager.
+ *
+ * For each of these VGs, also create a struct resource on ls->resources to
+ * represent each LV in the VG that uses a lock. For each of these LVs
+ * that are active, we'll attempt to adopt a lock.
+ */
+
+static int get_lockd_vgs(struct list_head *vg_lockd)
+{
+ struct list_head update_vgs;
+ daemon_reply reply;
+ struct dm_config_node *cn;
+ struct dm_config_node *metadata;
+ struct dm_config_node *md_cn;
+ struct dm_config_node *lv_cn;
+ struct lockspace *ls, *safe;
+ struct resource *r;
+ const char *vg_name;
+ const char *vg_uuid;
+ const char *lv_uuid;
+ const char *lock_type;
+ const char *lock_args;
+ char find_str_path[PATH_MAX];
+ int mutex_unlocked = 0;
+ int rv = 0;
+
+ INIT_LIST_HEAD(&update_vgs);
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "vg_list",
+ "token = %s", "skip",
+ NULL);
+
+ if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("vg_list from lvmetad failed %d", reply.error);
+ rv = -EINVAL;
+ goto destroy;
+ }
+
+ if (!(cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
+ log_error("get_lockd_vgs no vgs");
+ rv = -EINVAL;
+ goto destroy;
+ }
+
+ /* create an update_vgs list of all vg uuids */
+
+ for (cn = cn->child; cn; cn = cn->sib) {
+ vg_uuid = cn->key;
+
+ if (!(ls = alloc_lockspace())) {
+ rv = -ENOMEM;
+ break;
+ }
+
+ strncpy(ls->vg_uuid, vg_uuid, 64);
+ list_add_tail(&ls->list, &update_vgs);
+ log_debug("get_lockd_vgs %s", vg_uuid);
+ }
+ destroy:
+ daemon_reply_destroy(reply);
+
+ if (rv < 0)
+ goto out;
+
+ /* get vg_name and lock_type for each vg uuid entry in update_vgs */
+
+ list_for_each_entry(ls, &update_vgs, list) {
+ reply = daemon_send_simple(lvmetad_handle, "vg_lookup",
+ "token = %s", "skip",
+ "uuid = %s", ls->vg_uuid,
+ NULL);
+
+ if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("vg_lookup from lvmetad failed %d", reply.error);
+ rv = -EINVAL;
+ goto next;
+ }
+
+ vg_name = daemon_reply_str(reply, "name", NULL);
+ if (!vg_name) {
+ log_error("get_lockd_vgs %s no name", ls->vg_uuid);
+ rv = -EINVAL;
+ goto next;
+ }
+
+ strncpy(ls->vg_name, vg_name, MAX_NAME);
+
+ metadata = dm_config_find_node(reply.cft->root, "metadata");
+ if (!metadata) {
+ log_error("get_lockd_vgs %s name %s no metadata",
+ ls->vg_uuid, ls->vg_name);
+ rv = -EINVAL;
+ goto next;
+ }
+
+ lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
+ ls->lm_type = str_to_lm(lock_type);
+
+ if ((ls->lm_type != LD_LM_SANLOCK) && (ls->lm_type != LD_LM_DLM)) {
+ log_debug("get_lockd_vgs %s not lockd type", ls->vg_name);
+ continue;
+ }
+
+ lock_args = dm_config_find_str(metadata, "metadata/lock_args", NULL);
+ if (lock_args)
+ strncpy(ls->vg_args, lock_args, MAX_ARGS);
+
+ log_debug("get_lockd_vgs %s lock_type %s lock_args %s",
+ ls->vg_name, lock_type, lock_args ?: "none");
+
+ /*
+ * Make a record (struct resource) of each lv that uses a lock.
+ * For any lv that uses a lock, we'll check if the lv is active
+ * and if so try to adopt a lock for it.
+ */
+
+ for (md_cn = metadata->child; md_cn; md_cn = md_cn->sib) {
+ if (strcmp(md_cn->key, "logical_volumes"))
+ continue;
+
+ for (lv_cn = md_cn->child; lv_cn; lv_cn = lv_cn->sib) {
+ snprintf(find_str_path, PATH_MAX, "%s/lock_type", lv_cn->key);
+ lock_type = dm_config_find_str(lv_cn, find_str_path, NULL);
+
+ if (!lock_type)
+ continue;
+
+ snprintf(find_str_path, PATH_MAX, "%s/lock_args", lv_cn->key);
+ lock_args = dm_config_find_str(lv_cn, find_str_path, NULL);
+
+ snprintf(find_str_path, PATH_MAX, "%s/id", lv_cn->key);
+ lv_uuid = dm_config_find_str(lv_cn, find_str_path, NULL);
+
+ if (!lv_uuid) {
+ log_error("get_lock_vgs no lv id for name %s", lv_cn->key);
+ continue;
+ }
+
+ if (!(r = alloc_resource())) {
+ rv = -ENOMEM;
+ goto next;
+ }
+
+ r->type = LD_RT_LV;
+ strncpy(r->name, lv_uuid, MAX_NAME);
+ if (lock_args)
+ strncpy(r->lv_args, lock_args, MAX_ARGS);
+ list_add_tail(&r->list, &ls->resources);
+ log_debug("get_lockd_vgs %s lv %s %s (name %s)",
+ ls->vg_name, r->name, lock_args ? lock_args : "", lv_cn->key);
+ }
+ }
+ next:
+ daemon_reply_destroy(reply);
+
+ if (rv < 0)
+ break;
+ }
+ pthread_mutex_unlock(&lvmetad_mutex);
+ mutex_unlocked = 1;
+out:
+ /* Return lockd VG's on the vg_lockd list. */
+
+ list_for_each_entry_safe(ls, safe, &update_vgs, list) {
+ list_del(&ls->list);
+
+ if ((ls->lm_type == LD_LM_SANLOCK) || (ls->lm_type == LD_LM_DLM))
+ list_add_tail(&ls->list, vg_lockd);
+ else
+ free(ls);
+ }
+
+ if (!mutex_unlocked)
+ pthread_mutex_unlock(&lvmetad_mutex);
+
+ return rv;
+}
+
+static char _dm_uuid[64];
+
+static char *get_dm_uuid(char *dm_name)
+{
+ struct dm_info info;
+ struct dm_task *dmt;
+ const char *uuid;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ goto fail_out;
+
+ if (!dm_task_set_name(dmt, dm_name))
+ goto fail;
+
+ if (!dm_task_run(dmt))
+ goto fail;
+
+ if (!dm_task_get_info(dmt, &info))
+ goto fail;
+
+ if (!info.exists)
+ goto fail;
+
+ uuid = dm_task_get_uuid(dmt);
+ if (!uuid) {
+ log_error("Failed to get uuid for device %s", dm_name);
+ goto fail;
+ }
+
+ if (strncmp(uuid, "LVM", 3)) {
+ log_debug("dm device %s is not from LVM", dm_name);
+ goto fail;
+ }
+
+ memset(_dm_uuid, 0, sizeof(_dm_uuid));
+ strncpy(_dm_uuid, uuid, sizeof(_dm_uuid)-1);
+ dm_task_destroy(dmt);
+ return _dm_uuid;
+
+fail:
+ dm_task_destroy(dmt);
+fail_out:
+ return NULL;
+}
+
+/*
+ * dm reports the LV uuid as:
+ * LVM-ydpRIdDWBDX25upmj2k0D4deat6oxH8er03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
+ *
+ * the lock name for the LV is:
+ * r03T0f-4xM8-rPIV-8XqI-hwv3-h8Y7-xRWjMr
+ *
+ * This function formats both as:
+ * r03T0f4xM8rPIV8XqIhwv3h8Y7xRWjMr
+ *
+ * and returns 1 if they match.
+ */
+
+static int match_dm_uuid(char *dm_uuid, char *lv_lock_uuid)
+{
+ char buf1[64];
+ char buf2[64];
+ int i, j;
+
+ memset(buf1, 0, sizeof(buf1));
+ memset(buf2, 0, sizeof(buf2));
+
+ for (i = 0, j = 0; i < strlen(lv_lock_uuid); i++) {
+ if (lv_lock_uuid[i] == '-')
+ continue;
+ buf1[j] = lv_lock_uuid[i];
+ j++;
+ }
+
+ for (i = 36, j = 0; i < 69; i++) {
+ buf2[j] = dm_uuid[i];
+ j++;
+ }
+
+ if (!strcmp(buf1, buf2))
+ return 1;
+ return 0;
+}
+
+/*
+ * All LVs with a lock_type are on ls->resources.
+ * Remove any that are not active. The remaining
+ * will have locks adopted.
+ */
+
+static int remove_inactive_lvs(struct list_head *vg_lockd)
+{
+ struct lockspace *ls;
+ struct resource *r, *rsafe;
+ struct dm_names *names;
+ struct dm_task *dmt;
+ char *dm_uuid;
+ char *vgname, *lvname, *layer;
+ char namebuf[MAX_NAME+1];
+ unsigned next = 0;
+ int rv = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
+ return -1;
+
+ if (!dm_task_run(dmt)) {
+ log_error("Failed to get dm devices");
+ rv = -1;
+ goto ret;
+ }
+
+ if (!(names = dm_task_get_names(dmt))) {
+ log_error("Failed to get dm names");
+ rv = -1;
+ goto ret;
+ }
+
+ if (!names->dev) {
+ log_debug("dm names none found");
+ goto out;
+ }
+
+ /*
+ * For each dm name, compare it to each lv in each lockd vg.
+ */
+
+ do {
+ names = (struct dm_names *)((char *) names + next);
+
+ dm_uuid = get_dm_uuid(names->name);
+ if (!dm_uuid)
+ goto next_dmname;
+
+ vgname = NULL;
+ lvname = NULL;
+ layer = NULL;
+
+ memset(namebuf, 0, sizeof(namebuf));
+ strncpy(namebuf, names->name, MAX_NAME);
+ vgname = namebuf;
+
+ if (!dm_split_lvm_name(NULL, namebuf, &vgname, &lvname, &layer)) {
+ log_error("failed to split dm name %s", namebuf);
+ goto next_dmname;
+ }
+
+ log_debug("adopt remove_inactive dm name %s dm uuid %s vgname %s lvname %s",
+ names->name, dm_uuid, vgname, lvname);
+
+ if (!vgname || !lvname) {
+ log_debug("dm name %s invalid split vg %s lv %s layer %s",
+ names->name, vgname ? vgname : "", lvname ? lvname : "", layer ? layer : "");
+ goto next_dmname;
+ }
+
+ list_for_each_entry(ls, vg_lockd, list) {
+ if (strcmp(vgname, ls->vg_name))
+ continue;
+
+ if (!strcmp(lvname, "lvmlock"))
+ continue;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ if (!match_dm_uuid(dm_uuid, r->name))
+ continue;
+
+ /* Found an active LV in a lockd VG. */
+ log_debug("dm device %s adopt in vg %s lv %s",
+ names->name, ls->vg_name, r->name);
+ r->adopt = 1;
+ goto next_dmname;
+ }
+ }
+next_dmname:
+ next = names->next;
+ } while (next);
+
+out:
+ /* Remove any struct resources that do not need locks adopted. */
+ list_for_each_entry(ls, vg_lockd, list) {
+ list_for_each_entry_safe(r, rsafe, &ls->resources, list) {
+ if (r->adopt) {
+ r->adopt = 0;
+ } else {
+ log_debug("lockd vg %s remove inactive lv %s", ls->vg_name, r->name);
+ list_del(&r->list);
+ free_resource(r);
+ }
+ }
+ }
+ret:
+ dm_task_destroy(dmt);
+ return rv;
+}
+
+static void adopt_locks(void)
+{
+ struct list_head ls_found;
+ struct list_head vg_lockd;
+ struct list_head to_unlock;
+ struct lockspace *ls, *lsafe;
+ struct lockspace *ls1, *l1safe;
+ struct lockspace *ls2, *l2safe;
+ struct resource *r, *rsafe;
+ struct action *act, *asafe;
+ int count_start = 0, count_start_done = 0, count_start_fail = 0;
+ int count_adopt = 0, count_adopt_done = 0, count_adopt_fail = 0;
+ int found, rv;
+
+ INIT_LIST_HEAD(&adopt_results);
+
+ INIT_LIST_HEAD(&ls_found);
+ INIT_LIST_HEAD(&vg_lockd);
+ INIT_LIST_HEAD(&to_unlock);
+
+ /*
+ * Get list of lockspaces from lock managers.
+ * Get list of VGs from lvmetad with a lockd type.
+ * Get list of active lockd type LVs from /dev.
+ *
+ * ECONNREFUSED means the lock manager is not running.
+ * This is expected for at least one of them.
+ */
+
+ rv = lm_get_lockspaces_dlm(&ls_found);
+ if ((rv < 0) && (rv != -ECONNREFUSED))
+ goto fail;
+
+ rv = lm_get_lockspaces_sanlock(&ls_found);
+ if ((rv < 0) && (rv != -ECONNREFUSED))
+ goto fail;
+
+ if (list_empty(&ls_found)) {
+ log_debug("No lockspaces found to adopt");
+ return;
+ }
+
+ /*
+ * Adds a struct lockspace to vg_lockd for each lockd VG.
+ * Adds a struct resource to ls->resources for each LV.
+ */
+ rv = get_lockd_vgs(&vg_lockd);
+ if (rv < 0) {
+ log_error("adopt_locks get_lockd_vgs failed");
+ goto fail;
+ }
+
+ /*
+ * For each resource on each lockspace, check if the
+ * corresponding LV is active. If so, leave the
+ * resource struct, if not free the resource struct.
+ * The remain entries need to have locks adopted.
+ */
+ rv = remove_inactive_lvs(&vg_lockd);
+ if (rv < 0) {
+ log_error("adopt_locks remove_inactive_lvs failed");
+ goto fail;
+ }
+
+ list_for_each_entry(ls, &ls_found, list) {
+ if (ls->lm_type == LD_LM_DLM)
+ gl_use_dlm = 1;
+
+ log_debug("adopt %s lockspace %s vg %s",
+ lm_str(ls->lm_type), ls->name, ls->vg_name);
+ }
+
+ if (!gl_use_dlm)
+ gl_use_sanlock = 1;
+
+ list_for_each_entry(ls, &vg_lockd, list) {
+ log_debug("adopt lvmetad vg %s lock_type %s lock_args %s",
+ ls->vg_name, lm_str(ls->lm_type), ls->vg_args);
+
+ list_for_each_entry(r, &ls->resources, list)
+ log_debug("adopt lv %s %s", ls->vg_name, r->name);
+ }
+
+ /*
+ * Compare and merge the list of lockspaces in ls_found
+ * and the list of lockd VGs in vg_lockd.
+ *
+ * An ls from ls_found may not have had any active lvs when
+ * previous lvmlockd died, but the ls should still be joined,
+ * and checked for GL/VG locks.
+ *
+ * An ls from vg_lockd with active lvs should be in ls_found.
+ * If it's not then we might want to join the ls and acquire locks
+ * for the active lvs (as opposed to adopting orphans for them.)
+ * The orphan lock in the ls should have prevented the ls in
+ * the lock manager from going away.
+ *
+ * If an ls in vg_lockd has no active lvs and does not have
+ * a matching entry in ls_found, then skip it.
+ *
+ * An ls in ls_found should always have a matching ls in
+ * vg_lockd. If it doesn't, then maybe the vg has been
+ * removed even though the lockspace for the vg is still
+ * in the lock manager. Just leave the ls in the lm
+ * alone, and skip the ls_found entry.
+ */
+
+ list_for_each_entry_safe(ls1, l1safe, &ls_found, list) {
+
+ /* The dlm global lockspace is special and doesn't match a VG. */
+ if (!strcmp(ls1->name, gl_lsname_dlm)) {
+ list_del(&ls1->list);
+ free(ls1);
+ continue;
+ }
+
+ found = 0;
+
+ list_for_each_entry_safe(ls2, l2safe, &vg_lockd, list) {
+ if (strcmp(ls1->vg_name, ls2->vg_name))
+ continue;
+
+ /*
+ * LS in both ls_found and vg_lockd.
+ */
+ log_debug("ls %s matches vg %s", ls1->name, ls2->vg_name);
+ memcpy(ls1->vg_uuid, ls2->vg_uuid, 64);
+ memcpy(ls1->vg_args, ls2->vg_args, MAX_ARGS);
+ list_for_each_entry_safe(r, rsafe, &ls2->resources, list) {
+ list_del(&r->list);
+ list_add(&r->list, &ls1->resources);
+ }
+ list_del(&ls2->list);
+ free(ls2);
+ found = 1;
+ break;
+ }
+
+ /*
+ * LS in ls_found, not in vg_lockd.
+ * An lvm lockspace found in the lock manager has no
+ * corresponding VG in lvmetad. This shouldn't usually
+ * happen, but it's possible the VG could have been removed
+ * while the orphaned lockspace from it was still around.
+ * Report an error and leave the ls in the lm alone.
+ */
+ if (!found) {
+ log_error("No VG %s found for lockspace %s %s",
+ ls1->vg_name, ls1->name, lm_str(ls1->lm_type));
+ list_del(&ls1->list);
+ free(ls1);
+ }
+ }
+
+ /*
+ * LS in vg_lockd, not in ls_found.
+ * lockd vgs from lvmetad that do not have an existing lockspace.
+ * This wouldn't be unusual; we just skip the vg.
+ * But, if the vg has active lvs, then it should have had locks
+ * and a lockspace. Should we attempt to join the lockspace and
+ * acquire (not adopt) locks for these LVs?
+ */
+
+ list_for_each_entry_safe(ls, lsafe, &vg_lockd, list) {
+ if (!list_empty(&ls->resources)) {
+ /* We should have found a lockspace. */
+ /* add this ls and acquire locks for ls->resources? */
+ log_error("No lockspace %s %s found for VG %s with active LVs",
+ ls->name, lm_str(ls->lm_type), ls->vg_name);
+ } else {
+ /* The VG wasn't started in the previous lvmlockd. */
+ log_debug("No ls found for vg %s", ls->vg_name);
+ }
+
+ list_del(&ls->list);
+ free(ls);
+ }
+
+ /*
+ * Create and queue start actions to add lockspaces.
+ */
+
+ if (gl_use_dlm) {
+ if (!(act = alloc_action()))
+ goto fail;
+ log_debug("adopt add dlm global lockspace");
+ act->op = LD_OP_START;
+ act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
+ act->rt = LD_RT_GL;
+ act->lm_type = LD_LM_DLM;
+ act->client_id = ADOPT_CLIENT_ID;
+ add_dlm_global_lockspace(act);
+ count_start++;
+ }
+
+ list_for_each_entry_safe(ls, lsafe, &ls_found, list) {
+ if (!(act = alloc_action()))
+ goto fail;
+ act->op = LD_OP_START;
+ act->flags = (LD_AF_ADOPT | LD_AF_WAIT);
+ act->rt = LD_RT_VG;
+ act->lm_type = ls->lm_type;
+ act->client_id = ADOPT_CLIENT_ID;
+ strncpy(act->vg_name, ls->vg_name, MAX_NAME);
+ memcpy(act->vg_uuid, ls->vg_uuid, 64);
+ memcpy(act->vg_args, ls->vg_args, MAX_ARGS);
+ act->host_id = ls->host_id;
+
+ /* set act->version from lvmetad data? */
+
+ log_debug("adopt add %s vg lockspace %s", lm_str(act->lm_type), act->vg_name);
+
+ rv = add_lockspace_thread(ls->name, act->vg_name, act->vg_uuid,
+ act->lm_type, act->vg_args, act);
+ if (rv < 0) {
+ log_error("Failed to create lockspace thread for VG %s", ls->vg_name);
+ list_del(&ls->list);
+ free(ls);
+ free_action(act);
+ count_start_fail++;
+ continue;
+ }
+
+ /*
+ * When the lockspace_thread is done with the start act,
+ * it will see the act ADOPT flag and move the act onto
+ * the adopt_results list for us to collect below.
+ */
+ count_start++;
+ }
+
+ log_debug("adopt starting %d lockspaces", count_start);
+
+ /*
+ * Wait for all start/rejoin actions to complete. Each start action
+ * queued above will appear on the adopt_results list when finished.
+ */
+
+ while (count_start_done < count_start) {
+ sleep(1);
+ act = NULL;
+
+ pthread_mutex_lock(&client_mutex);
+ if (!list_empty(&adopt_results)) {
+ act = list_first_entry(&adopt_results, struct action, list);
+ list_del(&act->list);
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ if (!act)
+ continue;
+
+ if (act->result < 0) {
+ log_error("adopt add lockspace failed vg %s %d", act->vg_name, act->result);
+ count_start_fail++;
+ }
+
+ free_action(act);
+ count_start_done++;
+ }
+
+ log_debug("adopt started %d lockspaces done %d fail %d",
+ count_start, count_start_done, count_start_fail);
+
+ /*
+ * Create lock-adopt actions for active LVs (ls->resources),
+ * and GL/VG locks (we don't know if these locks were held
+ * and orphaned by the last lvmlockd, so try to adopt them
+ * to see.)
+ *
+ * A proper struct lockspace now exists on the lockspaces list
+ * for each ls in ls_found. Lock ops for one of those
+ * lockspaces can be done as OP_LOCK actions queued using
+ * add_lock_action();
+ *
+ * Start by attempting to adopt the lock in the most likely
+ * mode it was left in (ex for lvs, sh for vg/gl). If
+ * the mode is wrong, the lm will return an error and we
+ * try again with the other mode.
+ */
+
+ list_for_each_entry(ls, &ls_found, list) {
+
+ /*
+ * Adopt orphan LV locks.
+ */
+
+ list_for_each_entry(r, &ls->resources, list) {
+ if (!(act = alloc_action()))
+ goto fail;
+ act->op = LD_OP_LOCK;
+ act->rt = LD_RT_LV;
+ act->mode = LD_LK_EX;
+ act->flags = (LD_AF_ADOPT | LD_AF_PERSISTENT);
+ act->client_id = ADOPT_CLIENT_ID;
+ act->lm_type = ls->lm_type;
+ strncpy(act->vg_name, ls->vg_name, MAX_NAME);
+ strncpy(act->lv_uuid, r->name, MAX_NAME);
+ strncpy(act->lv_args, r->lv_args, MAX_ARGS);
+
+ log_debug("adopt lock for lv %s %s", act->vg_name, act->lv_uuid);
+
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ log_error("adopt add_lock_action lv %s %s error %d", act->vg_name, act->lv_uuid, rv);
+ count_adopt_fail++;
+ free_action(act);
+ } else {
+ count_adopt++;
+ }
+ }
+
+ /*
+ * Adopt orphan VG lock.
+ */
+
+ if (!(act = alloc_action()))
+ goto fail;
+ act->op = LD_OP_LOCK;
+ act->rt = LD_RT_VG;
+ act->mode = LD_LK_SH;
+ act->flags = LD_AF_ADOPT;
+ act->client_id = ADOPT_CLIENT_ID;
+ act->lm_type = ls->lm_type;
+ strncpy(act->vg_name, ls->vg_name, MAX_NAME);
+
+ log_debug("adopt lock for vg %s", act->vg_name);
+
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ log_error("adopt add_lock_action vg %s error %d", act->vg_name, rv);
+ count_adopt_fail++;
+ free_action(act);
+ } else {
+ count_adopt++;
+ }
+ }
+
+ /*
+ * Adopt orphan GL lock.
+ */
+
+ if (!(act = alloc_action()))
+ goto fail;
+ act->op = LD_OP_LOCK;
+ act->rt = LD_RT_GL;
+ act->mode = LD_LK_SH;
+ act->flags = LD_AF_ADOPT;
+ act->client_id = ADOPT_CLIENT_ID;
+ act->lm_type = (gl_use_sanlock ? LD_LM_SANLOCK : LD_LM_DLM);
+
+ log_debug("adopt lock for gl");
+
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ log_error("adopt add_lock_action gl %s error %d", act->vg_name, rv);
+ count_adopt_fail++;
+ free_action(act);
+ } else {
+ count_adopt++;
+ }
+
+ /*
+ * Wait for lock-adopt actions to complete. The completed
+ * actions are passed back here via the adopt_results list.
+ */
+
+ while (count_adopt_done < count_adopt) {
+ sleep(1);
+ act = NULL;
+
+ pthread_mutex_lock(&client_mutex);
+ if (!list_empty(&adopt_results)) {
+ act = list_first_entry(&adopt_results, struct action, list);
+ list_del(&act->list);
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ if (!act)
+ continue;
+
+ /*
+ * lock adopt results
+ */
+
+ if (act->result == -EUCLEAN) {
+ /*
+ * Adopt failed because the orphan has a different mode
+ * than initially requested. Repeat the lock-adopt operation
+ * with the other mode. N.B. this logic depends on first
+ * trying sh then ex for GL/VG locks, and ex then sh for
+ * LV locks.
+ */
+
+ if ((act->rt != LD_RT_LV) && (act->mode == LD_LK_SH)) {
+ /* GL/VG locks: attempt to adopt ex after sh failed. */
+ act->mode = LD_LK_EX;
+ rv = add_lock_action(act);
+
+ } else if ((act->rt == LD_RT_LV) && (act->mode == LD_LK_EX)) {
+ /* LV locks: attempt to adopt sh after ex failed. */
+ act->mode = LD_LK_SH;
+ rv = add_lock_action(act);
+
+ } else {
+ log_error("Failed to adopt %s lock in vg %s error %d",
+ rt_str(act->rt), act->vg_name, act->result);
+ count_adopt_fail++;
+ count_adopt_done++;
+ free_action(act);
+ rv = 0;
+ }
+
+ if (rv < 0) {
+ log_error("adopt add_lock_action again %s", act->vg_name);
+ count_adopt_fail++;
+ count_adopt_done++;
+ free_action(act);
+ }
+
+ } else if (act->result == -ENOENT) {
+ /*
+ * No orphan lock exists. This is common for GL/VG locks
+ * because they may not have been held when lvmlockd exited.
+ * It's also expected for LV types that do not use a lock.
+ */
+
+ if (act->rt == LD_RT_LV) {
+ /* Unexpected, we should have found an orphan. */
+ log_error("Failed to adopt LV lock for %s %s error %d",
+ act->vg_name, act->lv_uuid, act->result);
+ count_adopt_fail++;
+ } else {
+ /* Normal, no GL/VG lock was orphaned. */
+ log_debug("Did not adopt %s lock in vg %s error %d",
+ rt_str(act->rt), act->vg_name, act->result);
+ }
+
+ count_adopt_done++;
+ free_action(act);
+
+ } else if (act->result < 0) {
+ /*
+ * Some unexpected error.
+ */
+
+ log_error("adopt lock rt %s vg %s lv %s error %d",
+ rt_str(act->rt), act->vg_name, act->lv_uuid, act->result);
+ count_adopt_fail++;
+ count_adopt_done++;
+ free_action(act);
+
+ } else {
+ /*
+ * Adopt success.
+ */
+
+ if (act->rt == LD_RT_LV) {
+ log_debug("adopt success lv %s %s %s", act->vg_name, act->lv_uuid, mode_str(act->mode));
+ free_action(act);
+ } else if (act->rt == LD_RT_VG) {
+ log_debug("adopt success vg %s %s", act->vg_name, mode_str(act->mode));
+ list_add_tail(&act->list, &to_unlock);
+ } else if (act->rt == LD_RT_GL) {
+ log_debug("adopt success gl %s %s", act->vg_name, mode_str(act->mode));
+ list_add_tail(&act->list, &to_unlock);
+ }
+ count_adopt_done++;
+ }
+ }
+
+ /*
+ * Release adopted GL/VG locks.
+ * The to_unlock actions were the ones used to lock-adopt the GL/VG locks;
+ * now use them to do the unlocks. These actions will again be placed
+ * on adopt_results for us to collect because they have the ADOPT flag set.
+ */
+
+ count_adopt = 0;
+ count_adopt_done = 0;
+
+ list_for_each_entry_safe(act, asafe, &to_unlock, list) {
+ list_del(&act->list);
+
+ if (act->mode == LD_LK_EX) {
+ /*
+ * FIXME: we probably want to check somehow that
+ * there's no lvm command still running that's
+ * using this ex lock and changing things.
+ */
+ log_warn("adopt releasing ex %s lock %s",
+ rt_str(act->rt), act->vg_name);
+ }
+
+ act->mode = LD_LK_UN;
+
+ log_debug("adopt unlock for %s %s", rt_str(act->rt), act->vg_name);
+
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ log_error("adopt unlock add_lock_action error %d", rv);
+ free_action(act);
+ } else {
+ count_adopt++;
+ }
+ }
+
+ /* Wait for the unlocks to complete. */
+
+ while (count_adopt_done < count_adopt) {
+ sleep(1);
+ act = NULL;
+
+ pthread_mutex_lock(&client_mutex);
+ if (!list_empty(&adopt_results)) {
+ act = list_first_entry(&adopt_results, struct action, list);
+ list_del(&act->list);
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ if (!act)
+ continue;
+
+ if (act->result < 0)
+ log_error("adopt unlock error %d", act->result);
+
+ count_adopt_done++;
+ free_action(act);
+ }
+
+
+ /* FIXME: purge any remaining orphan locks in each rejoined ls? */
+
+ if (count_start_fail || count_adopt_fail)
+ goto fail;
+
+ log_debug("adopt_locks done");
+ return;
+
+fail:
+ log_error("adopt_locks failed, reset host");
+}
+
+static int get_peer_pid(int fd)
+{
+ struct ucred cred;
+ unsigned int len = sizeof(cred);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len) != 0)
+ return -1;
+
+ return cred.pid;
+}
+
+static void process_listener(int poll_fd)
+{
+ struct client *cl;
+ int fd, pi;
+
+ /* assert poll_fd == listen_fd */
+
+ fd = accept(listen_fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ if (!(cl = alloc_client())) {
+ if (!close(fd))
+ log_error("failed to close lockd poll fd");
+ return;
+ }
+
+ pi = add_pollfd(fd);
+ if (pi < 0) {
+ log_error("process_listener add_pollfd error %d", pi);
+ free_client(cl);
+ return;
+ }
+
+ cl->pi = pi;
+ cl->fd = fd;
+ cl->pid = get_peer_pid(fd);
+
+ pthread_mutex_init(&cl->mutex, NULL);
+
+ pthread_mutex_lock(&client_mutex);
+ client_ids++;
+
+ if (client_ids == ADOPT_CLIENT_ID)
+ client_ids++;
+ if (!client_ids)
+ client_ids++;
+
+ cl->id = client_ids;
+ list_add_tail(&cl->list, &client_list);
+ pthread_mutex_unlock(&client_mutex);
+
+ log_debug("client add id %d pi %d fd %d", cl->id, cl->pi, cl->fd);
+}
+
+/*
+ * main loop polls on pipe[0] so that a thread can
+ * restart the poll by writing to pipe[1].
+ */
+static int setup_restart(void)
+{
+ if (pipe(restart_fds)) {
+ log_error("setup_restart pipe error %d", errno);
+ return -1;
+ }
+
+ restart_pi = add_pollfd(restart_fds[0]);
+ if (restart_pi < 0)
+ return restart_pi;
+
+ return 0;
+}
+
+/*
+ * thread wrote 'w' to restart_fds[1] to restart poll()
+ * after adding an fd back into pollfd.
+ */
+static void process_restart(int fd)
+{
+ char wake[1];
+ int rv;
+
+ /* assert fd == restart_fds[0] */
+
+ rv = read(restart_fds[0], wake, 1);
+ if (!rv || rv < 0)
+ log_debug("process_restart error %d", errno);
+}
+
+static void sigterm_handler(int sig __attribute__((unused)))
+{
+ daemon_quit = 1;
+}
+
+static int main_loop(daemon_state *ds_arg)
+{
+ struct client *cl;
+ int i, rv, is_recv, is_dead;
+
+ signal(SIGTERM, &sigterm_handler);
+
+ rv = setup_structs();
+ if (rv < 0) {
+ log_error("Can't allocate memory");
+ return rv;
+ }
+
+ strcpy(gl_lsname_dlm, S_NAME_GL_DLM);
+
+ INIT_LIST_HEAD(&lockspaces);
+ INIT_LIST_HEAD(&lockspaces_inactive);
+ pthread_mutex_init(&lockspaces_mutex, NULL);
+ pthread_mutex_init(&pollfd_mutex, NULL);
+ pthread_mutex_init(&log_mutex, NULL);
+
+ openlog("lvmlockd", LOG_CONS | LOG_PID, LOG_DAEMON);
+ log_warn("lvmlockd started");
+
+ listen_fd = ds_arg->socket_fd;
+ listen_pi = add_pollfd(listen_fd);
+
+ setup_client_thread();
+ setup_worker_thread();
+ setup_restart();
+
+ pthread_mutex_init(&lvmetad_mutex, NULL);
+ lvmetad_handle = lvmetad_open(NULL);
+ if (lvmetad_handle.error || lvmetad_handle.socket_fd < 0)
+ log_error("lvmetad_open error %d", lvmetad_handle.error);
+ else
+ lvmetad_connected = 1;
+
+ /*
+ * Attempt to rejoin lockspaces and adopt locks from a previous
+ * instance of lvmlockd that left behind lockspaces/locks.
+ */
+ if (adopt_opt)
+ adopt_locks();
+
+ while (1) {
+ rv = poll(pollfd, pollfd_maxi + 1, -1);
+ if ((rv == -1 && errno == EINTR) || daemon_quit) {
+ if (daemon_quit) {
+ int count;
+ /* first sigterm would trigger stops, and
+ second sigterm may finish the joins. */
+ count = for_each_lockspace(DO_STOP, DO_FREE, NO_FORCE);
+ if (!count)
+ break;
+ log_debug("ignore shutdown for %d lockspaces", count);
+ daemon_quit = 0;
+ }
+ continue;
+ }
+ if (rv < 0) {
+ log_error("poll errno %d", errno);
+ break;
+ }
+
+ for (i = 0; i <= pollfd_maxi; i++) {
+ if (pollfd[i].fd < 0)
+ continue;
+
+ is_recv = 0;
+ is_dead = 0;
+
+ if (pollfd[i].revents & POLLIN)
+ is_recv = 1;
+ if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL))
+ is_dead = 1;
+
+ if (!is_recv && !is_dead)
+ continue;
+
+ if (i == listen_pi) {
+ process_listener(pollfd[i].fd);
+ continue;
+ }
+
+ if (i == restart_pi) {
+ process_restart(pollfd[i].fd);
+ continue;
+ }
+
+ /*
+ log_debug("poll pi %d fd %d revents %x",
+ i, pollfd[i].fd, pollfd[i].revents);
+ */
+
+ pthread_mutex_lock(&client_mutex);
+ cl = find_client_pi(i);
+ if (cl) {
+ pthread_mutex_lock(&cl->mutex);
+
+ if (cl->recv) {
+ /* should not happen */
+ log_error("main client %d already recv", cl->id);
+
+ } else if (cl->dead) {
+ /* should not happen */
+ log_error("main client %d already dead", cl->id);
+
+ } else if (is_dead) {
+ log_debug("close %s[%d.%u] fd %d",
+ cl->name[0] ? cl->name : "client",
+ cl->pid, cl->id, cl->fd);
+ cl->dead = 1;
+ cl->pi = -1;
+ cl->fd = -1;
+ cl->poll_ignore = 0;
+ if (close(pollfd[i].fd))
+ log_error("close fd %d failed", pollfd[i].fd);
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+
+ } else if (is_recv) {
+ cl->recv = 1;
+ cl->poll_ignore = 1;
+ pollfd[i].fd = POLL_FD_IGNORE;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+
+ pthread_mutex_unlock(&cl->mutex);
+
+ client_work = 1;
+ pthread_cond_signal(&client_cond);
+
+ /* client_thread will pick up and work on any
+ client with cl->recv or cl->dead set */
+
+ } else {
+ /* don't think this can happen */
+ log_error("no client for index %d fd %d",
+ i, pollfd[i].fd);
+ if (close(pollfd[i].fd))
+ log_error("close fd %d failed", pollfd[i].fd);
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ /* After set_dead, should we scan pollfd for
+ last unused slot and reduce pollfd_maxi? */
+ }
+ }
+
+ for_each_lockspace_retry(DO_STOP, DO_FREE, DO_FORCE);
+ free_lockspaces_inactive();
+ close_worker_thread();
+ close_client_thread();
+ closelog();
+ daemon_close(lvmetad_handle);
+ return 0;
+}
+
+static void usage(char *prog, FILE *file)
+{
+ fprintf(file, "Usage:\n");
+ fprintf(file, "%s [options]\n\n", prog);
+ fprintf(file, " --help | -h\n");
+ fprintf(file, " Show this help information.\n");
+ fprintf(file, " --version | -V\n");
+ fprintf(file, " Show version of lvmlockd.\n");
+ fprintf(file, " --test | -T\n");
+ fprintf(file, " Test mode, do not call lock manager.\n");
+ fprintf(file, " --foreground | -f\n");
+ fprintf(file, " Don't fork.\n");
+ fprintf(file, " --daemon-debug | -D\n");
+ fprintf(file, " Don't fork and print debugging to stdout.\n");
+ fprintf(file, " --pid-file | -p <path>\n");
+ fprintf(file, " Set path to the pid file. [%s]\n", LVMLOCKD_PIDFILE);
+ fprintf(file, " --socket-path | -s <path>\n");
+ fprintf(file, " Set path to the socket to listen on. [%s]\n", LVMLOCKD_SOCKET);
+ fprintf(file, " --syslog-priority | -S err|warning|debug\n");
+ fprintf(file, " Write log messages from this level up to syslog. [%s]\n", _syslog_num_to_name(LOG_SYSLOG_PRIO));
+ fprintf(file, " --gl-type | -g <str>\n");
+ fprintf(file, " Set global lock type to be dlm|sanlock.\n");
+ fprintf(file, " --host-id | -i <num>\n");
+ fprintf(file, " Set the local sanlock host id.\n");
+ fprintf(file, " --host-id-file | -F <path>\n");
+ fprintf(file, " A file containing the local sanlock host_id.\n");
+ fprintf(file, " --sanlock-timeout | -o <seconds>\n");
+ fprintf(file, " Set the sanlock lockspace I/O timeout.\n");
+ fprintf(file, " --adopt | -A 0|1\n");
+ fprintf(file, " Adopt locks from a previous instance of lvmlockd.\n");
+}
+
+int main(int argc, char *argv[])
+{
+ daemon_state ds = {
+ .daemon_main = main_loop,
+ .daemon_init = NULL,
+ .daemon_fini = NULL,
+ .pidfile = getenv("LVM_LVMLOCKD_PIDFILE"),
+ .socket_path = getenv("LVM_LVMLOCKD_SOCKET"),
+ .protocol = lvmlockd_protocol,
+ .protocol_version = lvmlockd_protocol_version,
+ .name = "lvmlockd",
+ };
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"version", no_argument, 0, 'V' },
+ {"test", no_argument, 0, 'T' },
+ {"foreground", no_argument, 0, 'f' },
+ {"daemon-debug", no_argument, 0, 'D' },
+ {"pid-file", required_argument, 0, 'p' },
+ {"socket-path", required_argument, 0, 's' },
+ {"gl-type", required_argument, 0, 'g' },
+ {"host-id", required_argument, 0, 'i' },
+ {"host-id-file", required_argument, 0, 'F' },
+ {"adopt", required_argument, 0, 'A' },
+ {"syslog-priority", required_argument, 0, 'S' },
+ {"sanlock-timeout", required_argument, 0, 'o' },
+ {0, 0, 0, 0 }
+ };
+
+ while (1) {
+ int c;
+ int lm;
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "hVTfDp:s:l:g:S:I:A:o:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case '0':
+ break;
+ case 'h':
+ usage(argv[0], stdout);
+ exit(EXIT_SUCCESS);
+ case 'V':
+ printf("lvmlockd version: " LVM_VERSION "\n");
+ exit(EXIT_SUCCESS);
+ case 'T':
+ daemon_test = 1;
+ break;
+ case 'f':
+ ds.foreground = 1;
+ break;
+ case 'D':
+ ds.foreground = 1;
+ daemon_debug = 1;
+ break;
+ case 'p':
+ ds.pidfile = strdup(optarg);
+ break;
+ case 's':
+ ds.socket_path = strdup(optarg);
+ break;
+ case 'g':
+ lm = str_to_lm(optarg);
+ if (lm == LD_LM_DLM && lm_support_dlm())
+ gl_use_dlm = 1;
+ else if (lm == LD_LM_SANLOCK && lm_support_sanlock())
+ gl_use_sanlock = 1;
+ else {
+ fprintf(stderr, "invalid gl-type option");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'i':
+ daemon_host_id = atoi(optarg);
+ break;
+ case 'F':
+ daemon_host_id_file = strdup(optarg);
+ break;
+ case 'o':
+ sanlock_io_timeout = atoi(optarg);
+ break;
+ case 'A':
+ adopt_opt = atoi(optarg);
+ break;
+ case 'S':
+ syslog_priority = _syslog_name_to_num(optarg);
+ break;
+ case '?':
+ default:
+ usage(argv[0], stdout);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (!ds.pidfile)
+ ds.pidfile = LVMLOCKD_PIDFILE;
+
+ if (!ds.socket_path)
+ ds.socket_path = LVMLOCKD_SOCKET;
+
+ /* runs daemon_main/main_loop */
+ daemon_start(ds);
+
+ return 0;
+}
diff --git a/daemons/lvmlockd/lvmlockd-dlm.c b/daemons/lvmlockd/lvmlockd-dlm.c
new file mode 100644
index 000000000..c96866b25
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+
+#include "tool.h"
+
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "xlate.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+/*
+ * Using synchronous _wait dlm apis so do not define _REENTRANT and
+ * link with non-threaded version of library, libdlm_lt.
+ */
+#include "libdlm.h"
+
+#include <pthread.h>
+#include <stddef.h>
+#include <poll.h>
+#include <errno.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <byteswap.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <sys/socket.h>
+
+struct lm_dlm {
+ dlm_lshandle_t *dh;
+};
+
+struct rd_dlm {
+ struct dlm_lksb lksb;
+ struct val_blk *vb;
+};
+
+int lm_data_size_dlm(void)
+{
+ return sizeof(struct rd_dlm);
+}
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for dlm is
+ * vg_version_string:undefined:cluster_name
+ *
+ * lv_lock_args are not used for dlm
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+static int cluster_name_from_args(char *vg_args, char *clustername)
+{
+ return last_string_from_args(vg_args, clustername);
+}
+
+static int check_args_version(char *vg_args)
+{
+ unsigned int major = 0;
+ int rv;
+
+ rv = version_from_args(vg_args, &major, NULL, NULL);
+ if (rv < 0) {
+ log_error("check_args_version %s error %d", vg_args, rv);
+ return rv;
+ }
+
+ if (major > VG_LOCK_ARGS_MAJOR) {
+ log_error("check_args_version %s major %d %d", vg_args, major, VG_LOCK_ARGS_MAJOR);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* This will be set after dlm_controld is started. */
+#define DLM_CLUSTER_NAME_PATH "/sys/kernel/config/dlm/cluster/cluster_name"
+
+static int read_cluster_name(char *clustername)
+{
+ static const char close_error_msg[] = "read_cluster_name: close_error %d";
+ char *n;
+ int fd;
+ int rv;
+
+ if (daemon_test) {
+ sprintf(clustername, "%s", "test");
+ return 0;
+ }
+
+ fd = open(DLM_CLUSTER_NAME_PATH, O_RDONLY);
+ if (fd < 0) {
+ log_debug("read_cluster_name: open error %d, check dlm_controld", fd);
+ return fd;
+ }
+
+ rv = read(fd, clustername, MAX_ARGS);
+ if (rv < 0) {
+ log_error("read_cluster_name: cluster name read error %d, check dlm_controld", fd);
+ if (close(fd))
+ log_error(close_error_msg, fd);
+ return rv;
+ }
+
+ n = strstr(clustername, "\n");
+ if (n)
+ *n = '\0';
+ if (close(fd))
+ log_error(close_error_msg, fd);
+ return 0;
+}
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ char clustername[MAX_ARGS+1];
+ char lock_args_version[MAX_ARGS+1];
+ int rv;
+
+ memset(clustername, 0, sizeof(clustername));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+ rv = read_cluster_name(clustername);
+ if (rv < 0)
+ return -EMANAGER;
+
+ if (strlen(clustername) + strlen(lock_args_version) + 2 > MAX_ARGS) {
+ log_error("init_vg_dlm args too long");
+ return -EARGS;
+ }
+
+ snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, clustername);
+ rv = 0;
+
+ log_debug("init_vg_dlm done %s vg_args %s", ls_name, vg_args);
+ return rv;
+}
+
+int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+ char sys_clustername[MAX_ARGS+1];
+ char arg_clustername[MAX_ARGS+1];
+ struct lm_dlm *lmd;
+ int rv;
+
+ memset(sys_clustername, 0, sizeof(sys_clustername));
+ memset(arg_clustername, 0, sizeof(arg_clustername));
+
+ rv = read_cluster_name(sys_clustername);
+ if (rv < 0)
+ return -EMANAGER;
+
+ if (!ls->vg_args[0]) {
+ /* global lockspace has no vg args */
+ goto skip_args;
+ }
+
+ rv = check_args_version(ls->vg_args);
+ if (rv < 0)
+ return -EARGS;
+
+ rv = cluster_name_from_args(ls->vg_args, arg_clustername);
+ if (rv < 0) {
+ log_error("prepare_lockspace_dlm %s no cluster name from args %s", ls->name, ls->vg_args);
+ return -EARGS;
+ }
+
+ if (strcmp(sys_clustername, arg_clustername)) {
+ log_error("prepare_lockspace_dlm %s mismatching cluster names sys %s arg %s",
+ ls->name, sys_clustername, arg_clustername);
+ return -EARGS;
+ }
+
+ skip_args:
+ lmd = malloc(sizeof(struct lm_dlm));
+ if (!lmd)
+ return -ENOMEM;
+
+ ls->lm_data = lmd;
+ return 0;
+}
+
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+
+ if (daemon_test)
+ return 0;
+
+ if (adopt)
+ lmd->dh = dlm_open_lockspace(ls->name);
+ else
+ lmd->dh = dlm_new_lockspace(ls->name, 0600, DLM_LSFL_NEWEXCL);
+
+ if (!lmd->dh) {
+ log_error("add_lockspace_dlm %s adopt %d error", ls->name, adopt);
+ free(lmd);
+ ls->lm_data = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ int rv;
+
+ if (daemon_test)
+ goto out;
+
+ /*
+ * If free_vg is set, it means we are doing vgremove, and we may want
+ * to tell any other nodes to leave the lockspace. This is not really
+ * necessary since there should be no harm in having an unused
+ * lockspace sitting around. A new "notification lock" would need to
+ * be added with a callback to signal this.
+ */
+
+ rv = dlm_release_lockspace(ls->name, lmd->dh, 1);
+ if (rv < 0) {
+ log_error("rem_lockspace_dlm error %d", rv);
+ return rv;
+ }
+ out:
+ free(lmd);
+ ls->lm_data = NULL;
+
+ if (!strcmp(ls->name, gl_lsname_dlm))
+ gl_running_dlm = 0;
+
+ return 0;
+}
+
+static int lm_add_resource_dlm(struct lockspace *ls, struct resource *r, int with_lock_nl)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ uint32_t flags = 0;
+ char *buf;
+ int rv;
+
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+ buf = malloc(sizeof(struct val_blk) + DLM_LVB_LEN);
+ if (!buf)
+ return -ENOMEM;
+ memset(buf, 0, sizeof(struct val_blk) + DLM_LVB_LEN);
+
+ rdd->vb = (struct val_blk *)buf;
+ rdd->lksb.sb_lvbptr = buf + sizeof(struct val_blk);
+
+ flags |= LKF_VALBLK;
+ }
+
+ if (!with_lock_nl)
+ goto out;
+
+ /* because this is a new NL lock request */
+ flags |= LKF_EXPEDITE;
+
+ if (daemon_test)
+ goto out;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, &rdd->lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s add_resource_dlm lock error %d", ls->name, r->name, rv);
+ return rv;
+ }
+ out:
+ return 0;
+}
+
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ int rv = 0;
+
+ if (daemon_test)
+ goto out;
+
+ lksb = &rdd->lksb;
+
+ if (!lksb->sb_lkid)
+ goto out;
+
+ rv = dlm_ls_unlock_wait(lmd->dh, lksb->sb_lkid, 0, lksb);
+ if (rv < 0) {
+ log_error("S %s R %s rem_resource_dlm unlock error %d", ls->name, r->name, rv);
+ }
+ out:
+ if (rdd->vb)
+ free(rdd->vb);
+
+ memset(rdd, 0, sizeof(struct rd_dlm));
+ r->lm_init = 0;
+ return rv;
+}
+
+static int to_dlm_mode(int ld_mode)
+{
+ switch (ld_mode) {
+ case LD_LK_EX:
+ return LKM_EXMODE;
+ case LD_LK_SH:
+ return LKM_PRMODE;
+ };
+ return -1;
+}
+
+static int lm_adopt_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ uint32_t flags = 0;
+ int mode;
+ int rv;
+
+ *r_version = 0;
+
+ if (!r->lm_init) {
+ rv = lm_add_resource_dlm(ls, r, 0);
+ if (rv < 0)
+ return rv;
+ r->lm_init = 1;
+ }
+
+ lksb = &rdd->lksb;
+
+ flags |= LKF_PERSISTENT;
+ flags |= LKF_ORPHAN;
+
+ if (rdd->vb)
+ flags |= LKF_VALBLK;
+
+ mode = to_dlm_mode(ld_mode);
+ if (mode < 0) {
+ log_error("adopt_dlm invalid mode %d", ld_mode);
+ rv = -EINVAL;
+ goto fail;
+ }
+
+ log_debug("S %s R %s adopt_dlm", ls->name, r->name);
+
+ if (daemon_test)
+ return 0;
+
+ /*
+ * dlm returns 0 for success, -EAGAIN if an orphan is
+ * found with another mode, and -ENOENT if no orphan.
+ *
+ * cast/bast/param are (void *)1 because the kernel
+ * returns errors if some are null.
+ */
+
+ rv = dlm_ls_lockx(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name), 0,
+ (void *)1, (void *)1, (void *)1,
+ NULL, NULL);
+
+ if (rv == -EAGAIN) {
+ log_debug("S %s R %s adopt_dlm adopt mode %d try other mode",
+ ls->name, r->name, ld_mode);
+ rv = -EUCLEAN;
+ goto fail;
+ }
+ if (rv < 0) {
+ log_debug("S %s R %s adopt_dlm mode %d flags %x error %d errno %d",
+ ls->name, r->name, mode, flags, rv, errno);
+ goto fail;
+ }
+
+ /*
+ * FIXME: For GL/VG locks we probably want to read the lvb,
+ * especially if adopting an ex lock, because when we
+ * release this adopted ex lock we may want to write new
+ * lvb values based on the current lvb values (at lease
+ * in the GL case where we increment the current values.)
+ *
+ * It should be possible to read the lvb by requesting
+ * this lock in the same mode it's already in.
+ */
+
+ return rv;
+
+ fail:
+ lm_rem_resource_dlm(ls, r);
+ return rv;
+}
+
+/*
+ * Use PERSISTENT so that if lvmlockd exits while holding locks,
+ * the locks will remain orphaned in the dlm, still protecting what
+ * they were acquired to protect.
+ */
+
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int adopt)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ struct val_blk vb;
+ uint32_t flags = 0;
+ uint16_t vb_version;
+ int mode;
+ int rv;
+
+ if (adopt) {
+ /* When adopting, we don't follow the normal method
+ of acquiring a NL lock then converting it to the
+ desired mode. */
+ return lm_adopt_dlm(ls, r, ld_mode, r_version);
+ }
+
+ if (!r->lm_init) {
+ rv = lm_add_resource_dlm(ls, r, 1);
+ if (rv < 0)
+ return rv;
+ r->lm_init = 1;
+ }
+
+ lksb = &rdd->lksb;
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb)
+ flags |= LKF_VALBLK;
+
+ mode = to_dlm_mode(ld_mode);
+ if (mode < 0) {
+ log_error("lock_dlm invalid mode %d", ld_mode);
+ return -EINVAL;
+ }
+
+ log_debug("S %s R %s lock_dlm", ls->name, r->name);
+
+ if (daemon_test) {
+ *r_version = 0;
+ return 0;
+ }
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ log_error("S %s R %s lock_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s lock_dlm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ if (rdd->vb) {
+ if (lksb->sb_flags & DLM_SBF_VALNOTVALID) {
+ log_debug("S %s R %s lock_dlm VALNOTVALID", ls->name, r->name);
+ memset(rdd->vb, 0, sizeof(struct val_blk));
+ *r_version = 0;
+ goto out;
+ }
+
+ memcpy(&vb, lksb->sb_lvbptr, sizeof(struct val_blk));
+ vb_version = le16_to_cpu(vb.version);
+
+ if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+ log_error("S %s R %s lock_dlm ignore vb_version %x",
+ ls->name, r->name, vb_version);
+ *r_version = 0;
+ free(rdd->vb);
+ rdd->vb = NULL;
+ lksb->sb_lvbptr = NULL;
+ goto out;
+ }
+
+ *r_version = le32_to_cpu(vb.r_version);
+ memcpy(rdd->vb, &vb, sizeof(vb)); /* rdd->vb saved as le */
+
+ log_debug("S %s R %s lock_dlm get r_version %u",
+ ls->name, r->name, *r_version);
+ }
+out:
+ return 0;
+}
+
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t mode;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s convert_dlm", ls->name, r->name);
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s convert_dlm set r_version %u",
+ ls->name, r->name, r_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ mode = to_dlm_mode(ld_mode);
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ /* FIXME: When does this happen? Should something different be done? */
+ log_error("S %s R %s convert_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
+ }
+ return rv;
+}
+
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmuf_flags)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s unlock_dlm r_version %u flags %x",
+ ls->name, r->name, r_version, lmuf_flags);
+
+ /*
+ * Do not set PERSISTENT, because we don't need an orphan
+ * NL lock to protect anything.
+ */
+
+ flags |= LKF_CONVERT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s unlock_dlm set r_version %u",
+ ls->name, r->name, r_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+/*
+ * This list could be read from dlm_controld via libdlmcontrol,
+ * but it's simpler to get it from sysfs.
+ */
+
+#define DLM_LOCKSPACES_PATH "/sys/kernel/config/dlm/cluster/spaces"
+
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+ static const char closedir_err_msg[] = "lm_get_lockspace_dlm: closedir failed";
+ struct lockspace *ls;
+ struct dirent *de;
+ DIR *ls_dir;
+
+ if (!(ls_dir = opendir(DLM_LOCKSPACES_PATH)))
+ return -ECONNREFUSED;
+
+ while ((de = readdir(ls_dir))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ if (strncmp(de->d_name, LVM_LS_PREFIX, strlen(LVM_LS_PREFIX)))
+ continue;
+
+ if (!(ls = alloc_lockspace())) {
+ if (closedir(ls_dir))
+ log_error(closedir_err_msg);
+ return -ENOMEM;
+ }
+
+ ls->lm_type = LD_LM_DLM;
+ strncpy(ls->name, de->d_name, MAX_NAME);
+ strncpy(ls->vg_name, ls->name + strlen(LVM_LS_PREFIX), MAX_NAME);
+ list_add_tail(&ls->list, ls_rejoin);
+ }
+
+ if (closedir(ls_dir))
+ log_error(closedir_err_msg);
+ return 0;
+}
+
+int lm_is_running_dlm(void)
+{
+ char sys_clustername[MAX_ARGS+1];
+ int rv;
+
+ memset(sys_clustername, 0, sizeof(sys_clustername));
+
+ rv = read_cluster_name(sys_clustername);
+ if (rv < 0)
+ return 0;
+ return 1;
+}
diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h
new file mode 100644
index 000000000..8e0582b7c
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-internal.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_INTERNAL_H
+#define _LVM_LVMLOCKD_INTERNAL_H
+
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+#define R_NAME_GL_DISABLED "_GLLK_disabled"
+#define R_NAME_GL "GLLK"
+#define R_NAME_VG "VGLK"
+#define S_NAME_GL_DLM "lvm_global"
+#define LVM_LS_PREFIX "lvm_" /* ls name is prefix + vg_name */
+/* global lockspace name for sanlock is a vg name */
+
+/* lock manager types */
+enum {
+ LD_LM_NONE = 0,
+ LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */
+ LD_LM_DLM = 2,
+ LD_LM_SANLOCK = 3,
+};
+
+/* operation types */
+enum {
+ LD_OP_HELLO = 1,
+ LD_OP_QUIT,
+ LD_OP_INIT,
+ LD_OP_FREE,
+ LD_OP_START,
+ LD_OP_STOP,
+ LD_OP_LOCK,
+ LD_OP_UPDATE,
+ LD_OP_CLOSE,
+ LD_OP_ENABLE,
+ LD_OP_DISABLE,
+ LD_OP_START_WAIT,
+ LD_OP_STOP_ALL,
+ LD_OP_DUMP_INFO,
+ LD_OP_DUMP_LOG,
+ LD_OP_RENAME_BEFORE,
+ LD_OP_RENAME_FINAL,
+ LD_OP_RUNNING_LM,
+ LD_OP_FIND_FREE_LOCK,
+ LD_OP_FORGET_VG_NAME,
+ LD_OP_KILL_VG,
+ LD_OP_DROP_VG,
+};
+
+/* resource types */
+enum {
+ LD_RT_GL = 1,
+ LD_RT_VG,
+ LD_RT_LV,
+};
+
+/* lock modes, more restrictive must be larger value */
+enum {
+ LD_LK_IV = -1,
+ LD_LK_UN = 0,
+ LD_LK_NL = 1,
+ LD_LK_SH = 2,
+ LD_LK_EX = 3,
+};
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct client {
+ struct list_head list;
+ pthread_mutex_t mutex;
+ int pid;
+ int fd;
+ int pi;
+ uint32_t id;
+ unsigned int recv : 1;
+ unsigned int dead : 1;
+ unsigned int poll_ignore : 1;
+ unsigned int lock_ops : 1;
+ char name[MAX_NAME+1];
+};
+
+#define LD_AF_PERSISTENT 0x00000001
+#define LD_AF_UNUSED 0x00000002 /* use me */
+#define LD_AF_UNLOCK_CANCEL 0x00000004
+#define LD_AF_NEXT_VERSION 0x00000008
+#define LD_AF_WAIT 0x00000010
+#define LD_AF_FORCE 0x00000020
+#define LD_AF_EX_DISABLE 0x00000040
+#define LD_AF_ENABLE 0x00000080
+#define LD_AF_DISABLE 0x00000100
+#define LD_AF_SEARCH_LS 0x00000200
+#define LD_AF_WAIT_STARTING 0x00001000
+#define LD_AF_DUP_GL_LS 0x00002000
+#define LD_AF_INACTIVE_LS 0x00004000
+#define LD_AF_ADD_LS_ERROR 0x00008000
+#define LD_AF_ADOPT 0x00010000
+#define LD_AF_WARN_GL_REMOVED 0x00020000
+
+/*
+ * Number of times to repeat a lock request after
+ * a lock conflict (-EAGAIN) if unspecified in the
+ * request.
+ */
+#define DEFAULT_MAX_RETRIES 4
+
+struct action {
+ struct list_head list;
+ uint32_t client_id;
+ uint32_t flags; /* LD_AF_ */
+ uint32_t version;
+ uint64_t host_id;
+ int8_t op; /* operation type LD_OP_ */
+ int8_t rt; /* resource type LD_RT_ */
+ int8_t mode; /* lock mode LD_LK_ */
+ int8_t lm_type; /* lock manager: LM_DLM, LM_SANLOCK */
+ int retries;
+ int max_retries;
+ int result;
+ int lm_rv; /* return value from lm_ function */
+ char vg_uuid[64];
+ char vg_name[MAX_NAME+1];
+ char lv_name[MAX_NAME+1];
+ char lv_uuid[MAX_NAME+1];
+ char vg_args[MAX_ARGS+1];
+ char lv_args[MAX_ARGS+1];
+ char vg_sysid[MAX_NAME+1];
+};
+
+struct resource {
+ struct list_head list; /* lockspace.resources */
+ char name[MAX_NAME+1]; /* vg name or lv name */
+ int8_t type; /* resource type LD_RT_ */
+ int8_t mode;
+ unsigned int sh_count; /* number of sh locks on locks list */
+ uint32_t version;
+ unsigned int lm_init : 1; /* lm_data is initialized */
+ unsigned int adopt : 1; /* temp flag in remove_inactive_lvs */
+ unsigned int version_zero_valid : 1;
+ struct list_head locks;
+ struct list_head actions;
+ struct val_blk *vb;
+ char lv_args[MAX_ARGS+1];
+ char lm_data[0]; /* lock manager specific data */
+};
+
+#define LD_LF_PERSISTENT 0x00000001
+
+struct lock {
+ struct list_head list; /* resource.locks */
+ int8_t mode; /* lock mode LD_LK_ */
+ uint32_t version;
+ uint32_t flags; /* LD_LF_ */
+ uint32_t client_id; /* may be 0 for persistent or internal locks */
+};
+
+struct lockspace {
+ struct list_head list; /* lockspaces */
+ char name[MAX_NAME+1];
+ char vg_name[MAX_NAME+1];
+ char vg_uuid[64];
+ char vg_args[MAX_ARGS+1]; /* lock manager specific args */
+ char vg_sysid[MAX_NAME+1];
+ int8_t lm_type; /* lock manager: LM_DLM, LM_SANLOCK */
+ void *lm_data;
+ uint64_t host_id;
+ uint64_t free_lock_offset; /* start search for free lock here */
+
+ uint32_t start_client_id; /* client_id that started the lockspace */
+ pthread_t thread; /* makes synchronous lock requests */
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ unsigned int create_fail : 1;
+ unsigned int create_done : 1;
+ unsigned int thread_work : 1;
+ unsigned int thread_stop : 1;
+ unsigned int thread_done : 1;
+ unsigned int sanlock_gl_enabled: 1;
+ unsigned int sanlock_gl_dup: 1;
+ unsigned int free_vg: 1;
+ unsigned int kill_vg: 1;
+ unsigned int drop_vg: 1;
+
+ struct list_head actions; /* new client actions */
+ struct list_head resources; /* resource/lock state for gl/vg/lv */
+};
+
+#define VAL_BLK_VERSION 0x0101
+
+struct val_blk {
+ uint16_t version;
+ uint16_t flags;
+ uint32_t r_version;
+};
+
+/* lm_unlock flags */
+#define LMUF_FREE_VG 0x00000001
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/* to improve readability */
+#define WAIT 1
+#define NO_WAIT 0
+#define FORCE 1
+#define NO_FORCE 0
+
+/*
+ * global variables
+ */
+
+#ifndef EXTERN
+#define EXTERN extern
+#define INIT(X)
+#else
+#undef EXTERN
+#define EXTERN
+#define INIT(X) =X
+#endif
+
+/*
+ * gl_type_static and gl_use_ are set by command line or config file
+ * to specify whether the global lock comes from dlm or sanlock.
+ * Without a static setting, lvmlockd will figure out where the
+ * global lock should be (but it could get mixed up in cases where
+ * both sanlock and dlm vgs exist.)
+ *
+ * gl_use_dlm means that the gl should come from lockspace gl_lsname_dlm
+ * gl_use_sanlock means that the gl should come from lockspace gl_lsname_sanlock
+ *
+ * gl_use_dlm has precedence over gl_use_sanlock, so if a node sees both
+ * dlm and sanlock vgs, it will use the dlm gl.
+ *
+ * gl_use_ is set when the first evidence of that lm_type is seen
+ * in any command.
+ *
+ * gl_lsname_sanlock is set when the first vg is seen in which an
+ * enabled gl is exists, or when init_vg creates a vg with gl enabled,
+ * or when enable_gl is used.
+ *
+ * gl_lsname_sanlock is cleared when free_vg deletes a vg with gl enabled
+ * or when disable_gl matches.
+ */
+
+EXTERN int gl_running_dlm;
+EXTERN int gl_type_static;
+EXTERN int gl_use_dlm;
+EXTERN int gl_use_sanlock;
+EXTERN pthread_mutex_t gl_type_mutex;
+
+EXTERN char gl_lsname_dlm[MAX_NAME+1];
+EXTERN char gl_lsname_sanlock[MAX_NAME+1];
+
+EXTERN int daemon_test; /* run as much as possible without a live lock manager */
+EXTERN int daemon_debug;
+EXTERN int daemon_host_id;
+EXTERN const char *daemon_host_id_file;
+EXTERN int sanlock_io_timeout;
+
+/*
+ * This flag is set to 1 if we see multiple vgs with the global
+ * lock enabled. While this is set, we return a special flag
+ * with the vg lock result indicating to the lvm command that
+ * there is a duplicate gl in the vg which should be resolved.
+ * While this is set, find_lockspace_name has the side job of
+ * counting the number of lockspaces with enabled gl's so that
+ * this can be set back to zero when the duplicates are disabled.
+ */
+EXTERN int sanlock_gl_dup;
+
+void log_level(int level, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+#define log_debug(fmt, args...) log_level(LOG_DEBUG, fmt, ##args)
+#define log_error(fmt, args...) log_level(LOG_ERR, fmt, ##args)
+#define log_warn(fmt, args...) log_level(LOG_WARNING, fmt, ##args)
+
+struct lockspace *alloc_lockspace(void);
+int lockspaces_empty(void);
+int last_string_from_args(char *args_in, char *last);
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+
+
+#ifdef LOCKDDLM_SUPPORT
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_dlm(struct lockspace *ls);
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg);
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int adopt);
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version);
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmu_flags);
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r);
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin);
+int lm_data_size_dlm(void);
+int lm_is_running_dlm(void);
+
+static inline int lm_support_dlm(void)
+{
+ return 1;
+}
+
+#else
+
+static inline int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ return -1;
+}
+
+static inline int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+ return -1;
+}
+
+static inline int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+ return -1;
+}
+
+static inline int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+ return -1;
+}
+
+static inline int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int adopt)
+{
+ return -1;
+}
+
+static inline int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ return -1;
+}
+
+static inline int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmu_flags)
+{
+ return -1;
+}
+
+static inline int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+ return -1;
+}
+
+static inline int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+ return -1;
+}
+
+static inline int lm_data_size_dlm(void)
+{
+ return -1;
+}
+
+static inline int lm_is_running_dlm(void)
+{
+ return 0;
+}
+
+static inline int lm_support_dlm(void)
+{
+ return 0;
+}
+
+#endif /* dlm support */
+
+#ifdef LOCKDSANLOCK_SUPPORT
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset);
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
+int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_sanlock(struct lockspace *ls);
+int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int *retry, int adopt);
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version);
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmu_flags);
+int lm_able_gl_sanlock(struct lockspace *ls, int enable);
+int lm_ex_disable_gl_sanlock(struct lockspace *ls);
+int lm_hosts_sanlock(struct lockspace *ls, int notify);
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r);
+int lm_gl_is_enabled(struct lockspace *ls);
+int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin);
+int lm_data_size_sanlock(void);
+int lm_is_running_sanlock(void);
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset);
+
+static inline int lm_support_sanlock(void)
+{
+ return 1;
+}
+
+#else
+
+static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ return -1;
+}
+
+static inline int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset)
+{
+ return -1;
+}
+
+static inline int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r)
+{
+ return -1;
+}
+
+static inline int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ return -1;
+}
+
+static inline int lm_prepare_lockspace_sanlock(struct lockspace *ls)
+{
+ return -1;
+}
+
+static inline int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt)
+{
+ return -1;
+}
+
+static inline int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg)
+{
+ return -1;
+}
+
+static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int *retry, int adopt)
+{
+ return -1;
+}
+
+static inline int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ return -1;
+}
+
+static inline int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmu_flags)
+{
+ return -1;
+}
+
+static inline int lm_able_gl_sanlock(struct lockspace *ls, int enable)
+{
+ return -1;
+}
+
+static inline int lm_ex_disable_gl_sanlock(struct lockspace *ls)
+{
+ return -1;
+}
+
+static inline int lm_hosts_sanlock(struct lockspace *ls, int notify)
+{
+ return -1;
+}
+
+static inline int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+ return -1;
+}
+
+static inline int lm_gl_is_enabled(struct lockspace *ls)
+{
+ return -1;
+}
+
+static inline int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin)
+{
+ return -1;
+}
+
+static inline int lm_data_size_sanlock(void)
+{
+ return -1;
+}
+
+static inline int lm_is_running_sanlock(void)
+{
+ return 0;
+}
+
+static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset)
+{
+ return -1;
+}
+
+static inline int lm_support_sanlock(void)
+{
+ return 0;
+}
+
+#endif /* sanlock support */
+
+#endif /* _LVM_LVMLOCKD_INTERNAL_H */
diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c
new file mode 100644
index 000000000..4317aad40
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
@@ -0,0 +1,1800 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+
+#include "tool.h"
+
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "xlate.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+#include "sanlock.h"
+#include "sanlock_rv.h"
+#include "sanlock_admin.h"
+#include "sanlock_resource.h"
+
+#include <pthread.h>
+#include <stddef.h>
+#include <poll.h>
+#include <errno.h>
+#include <syslog.h>
+#include <sys/socket.h>
+
+/*
+-------------------------------------------------------------------------------
+For each VG, lvmlockd creates a sanlock lockspace that holds the leases for
+that VG. There's a lease for the VG lock, and there's a lease for each active
+LV. sanlock maintains (reads/writes) these leases, which exist on storage.
+That storage is a hidden LV within the VG: /dev/vg/lvmlock. lvmlockd gives the
+path of this internal LV to sanlock, which then reads/writes the leases on it.
+
+# lvs -a cc -o+uuid
+ LV VG Attr LSize LV UUID
+ lv1 cc -wi-a----- 2.00g 7xoDtu-yvNM-iwQx-C94t-BbYs-UzBl-o8hAIa
+ lv2 cc -wi-a----- 100.00g exxNPX-wZdO-uCNy-yiGa-aJGT-JKVl-arfcYT
+ [lvmlock] cc -wi-ao---- 256.00m iLpDel-hR0T-hJ3u-rnVo-PcDh-mcjt-sF9egM
+
+# sanlock status
+s lvm_cc:1:/dev/mapper/cc-lvmlock:0
+r lvm_cc:exxNPX-wZdO-uCNy-yiGa-aJGT-JKVl-arfcYT:/dev/mapper/cc-lvmlock:71303168:13 p 26099
+r lvm_cc:7xoDtu-yvNM-iwQx-C94t-BbYs-UzBl-o8hAIa:/dev/mapper/cc-lvmlock:70254592:3 p 26099
+
+This shows that sanlock is maintaining leases on /dev/mapper/cc-lvmlock.
+
+sanlock acquires a lockspace lease when the lockspace is joined, i.e. when the
+VG is started by 'vgchange --lock-start cc'. This lockspace lease exists at
+/dev/mapper/cc-lvmlock offset 0, and sanlock regularly writes to it to maintain
+ownership of it. Joining the lockspace (by acquiring the lockspace lease in
+it) then allows standard resource leases to be acquired in the lockspace for
+whatever the application wants. lvmlockd uses resource leases for the VG lock
+and LV locks.
+
+sanlock acquires a resource lease for each actual lock that lvm commands use.
+Above, there are two LV locks that are held because the two LVs are active.
+These are on /dev/mapper/cc-lvmlock at offsets 71303168 and 70254592. sanlock
+does not write to these resource leases except when acquiring and releasing
+them (e.g. lvchange -ay/-an). The renewal of the lockspace lease maintains
+ownership of all the resource leases in the lockspace.
+
+If the host loses access to the disk that the sanlock lv lives on, then sanlock
+can no longer renew its lockspace lease. The lockspace lease will eventually
+expire, at which point the host will lose ownership of it, and of all resource
+leases it holds in the lockspace. Eventually, other hosts will be able to
+acquire those leases. sanlock ensures that another host will not be able to
+acquire one of the expired leases until the current host has quit using it.
+
+It is important that the host "quit using" the leases it is holding if the
+sanlock storage is lost and they begin expiring. If the host cannot quit using
+the leases and release them within a limited time, then sanlock will use the
+local watchdog to forcibly reset the host before any other host can acquire
+them. This is severe, but preferable to possibly corrupting the data protected
+by the lease. It ensures that two nodes will not be using the same lease at
+once. For LV leases, that means that another host will not be able to activate
+the LV while another host still has it active.
+
+sanlock notifies the application that it cannot renew the lockspace lease. The
+application needs to quit using all leases in the lockspace and release them as
+quickly as possible. In the initial version, lvmlockd ignored this
+notification, so sanlock would eventually reach the point where it would use
+the local watchdog to reset the host. However, it's better to attempt a
+response. If that response succeeds, the host can avoid being reset. If the
+response fails, then sanlock will eventually reset the host as the last resort.
+sanlock gives the application about 40 seconds to complete its response and
+release its leases before resetting the host.
+
+An application can specify the path and args of a program that sanlock should
+run to notify it if the lockspace lease cannot be renewed. This program should
+carry out the application's response to the expiring leases: attempt to quit
+using the leases and then release them. lvmlockd gives this command to sanlock
+for each VG when that VG is started: 'lvmlockctl --kill vg_name'
+
+If sanlock loses access to lease storage in that VG, it runs lvmlockctl --kill,
+which:
+
+1. Uses syslog to explain what is happening.
+
+2. Notifies lvmlockd that the VG is being killed, so lvmlockd can
+ immediatley return an error for this condition if any new lock
+ requests are made. (This step would not be strictly necessary.)
+
+3. Attempts to quit using the VG. This is not yet implemented, but
+ will eventually use blkdeactivate on the VG (or a more forceful
+ equivalent.)
+
+4. If step 3 was successful at terminating all use of the VG, then
+ lvmlockd is told to release all the leases for the VG. If this
+ is all done without about 40 seconds, the host can avoid being
+ reset.
+
+Until steps 3 and 4 are fully implemented, manual steps can be substituted.
+This is primarily for testing since the problem needs to be noticed and
+responded to in a very short time. The manual alternative to step 3 is to kill
+any processes using file systems on LV's in the VG, unmount all file systems on
+the LVs, and deactivate all the LVs. Once this is done, the manual alternative
+to step 4 is to run 'lvmlockctl --drop vg_name', which tells lvmlockd to
+release all the leases for the VG.
+-------------------------------------------------------------------------------
+*/
+
+
+/*
+ * Each lockspace thread has its own sanlock daemon connection.
+ * If they shared one, sanlock acquire/release calls would be
+ * serialized. Some aspects of sanlock expect a single connection
+ * from each pid: signals due to a sanlock_request, and
+ * acquire/release/convert/inquire. The later can probably be
+ * addressed with a flag to indicate that the pid field should be
+ * interpretted as 'ci' (which the caller would need to figure
+ * out somehow.)
+ */
+
+struct lm_sanlock {
+ struct sanlk_lockspace ss;
+ int align_size;
+ int sock; /* sanlock daemon connection */
+};
+
+struct rd_sanlock {
+ union {
+ struct sanlk_resource rs;
+ char buf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ };
+ struct val_blk *vb;
+};
+
+struct sanlk_resourced {
+ union {
+ struct sanlk_resource rs;
+ char buf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ };
+};
+
+int lm_data_size_sanlock(void)
+{
+ return sizeof(struct rd_sanlock);
+}
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for sanlock is
+ * vg_version_string:undefined:lock_lv_name
+ *
+ * lv_lock_args format for sanlock is
+ * lv_version_string:undefined:offset
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ *
+ * If a new version of the lock_args string cannot be
+ * handled by an old version of lvmlockd, then the
+ * new lock_args string should contain a larger major number.
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+#define LV_LOCK_ARGS_MAJOR 1
+#define LV_LOCK_ARGS_MINOR 0
+#define LV_LOCK_ARGS_PATCH 0
+
+/*
+ * offset 0 is lockspace
+ * offset align_size * 1 is unused
+ * offset align_size * 2 is unused
+ * ...
+ * offset align_size * 64 is unused
+ * offset align_size * 65 is gl lock
+ * offset align_size * 66 is vg lock
+ * offset align_size * 67 is first lv lock
+ * offset align_size * 68 is second lv lock
+ * ...
+ */
+
+#define LS_BEGIN 0
+#define GL_LOCK_BEGIN 65
+#define VG_LOCK_BEGIN 66
+#define LV_LOCK_BEGIN 67
+
+static int lock_lv_name_from_args(char *vg_args, char *lock_lv_name)
+{
+ return last_string_from_args(vg_args, lock_lv_name);
+}
+
+static int lock_lv_offset_from_args(char *lv_args, uint64_t *lock_lv_offset)
+{
+ char offset_str[MAX_ARGS+1];
+ int rv;
+
+ memset(offset_str, 0, sizeof(offset_str));
+
+ rv = last_string_from_args(lv_args, offset_str);
+ if (rv < 0)
+ return rv;
+
+ *lock_lv_offset = strtoull(offset_str, NULL, 10);
+ return 0;
+}
+
+static int check_args_version(char *args, unsigned int our_major)
+{
+ unsigned int major = 0;
+ int rv;
+
+ rv = version_from_args(args, &major, NULL, NULL);
+ if (rv < 0) {
+ log_error("check_args_version %s error %d", args, rv);
+ return rv;
+ }
+
+ if (major > our_major) {
+ log_error("check_args_version %s major %u %u", args, major, our_major);
+ return -1;
+ }
+
+ return 0;
+}
+
+#define MAX_LINE 64
+
+static int read_host_id_file(void)
+{
+ FILE *file;
+ char line[MAX_LINE];
+ char key_str[MAX_LINE];
+ char val_str[MAX_LINE];
+ char *key, *val, *sep;
+ int host_id = 0;
+
+ file = fopen(daemon_host_id_file, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(line, MAX_LINE, file)) {
+ if (line[0] == '#' || line[0] == '\n')
+ continue;
+
+ key = line;
+ sep = strstr(line, "=");
+ val = sep + 1;
+
+ if (!sep || !val)
+ continue;
+
+ *sep = '\0';
+ memset(key_str, 0, sizeof(key_str));
+ memset(val_str, 0, sizeof(val_str));
+ sscanf(key, "%s", key_str);
+ sscanf(val, "%s", val_str);
+
+ if (!strcmp(key_str, "host_id")) {
+ host_id = atoi(val_str);
+ break;
+ }
+ }
+ if (fclose(file))
+ log_error("failed to close host id file %s", daemon_host_id_file);
+out:
+ log_debug("host_id %d from %s", host_id, daemon_host_id_file);
+ return host_id;
+}
+
+/*
+ * vgcreate
+ *
+ * For init_vg, vgcreate passes the internal lv name as vg_args.
+ * This constructs the full/proper vg_args format, containing the
+ * version and lv name, and returns the real lock_args in vg_args.
+ */
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ struct sanlk_lockspace ss;
+ struct sanlk_resourced rd;
+ struct sanlk_disk disk;
+ char lock_lv_name[MAX_ARGS+1];
+ char lock_args_version[MAX_ARGS+1];
+ const char *gl_name = NULL;
+ uint32_t daemon_version;
+ uint32_t daemon_proto;
+ uint64_t offset;
+ int align_size;
+ int i, rv;
+
+ memset(&ss, 0, sizeof(ss));
+ memset(&rd, 0, sizeof(rd));
+ memset(&disk, 0, sizeof(disk));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ if (!vg_args || !vg_args[0] || !strcmp(vg_args, "none")) {
+ log_error("S %s init_vg_san vg_args missing", ls_name);
+ return -EARGS;
+ }
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+ /* see comment above about input vg_args being only lock_lv_name */
+ snprintf(lock_lv_name, MAX_ARGS, "%s", vg_args);
+
+ if (strlen(lock_lv_name) + strlen(lock_args_version) + 2 > MAX_ARGS)
+ return -EARGS;
+
+ snprintf(disk.path, SANLK_PATH_LEN-1, "/dev/mapper/%s-%s", vg_name, lock_lv_name);
+
+ log_debug("S %s init_vg_san path %s", ls_name, disk.path);
+
+ if (daemon_test) {
+ if (!gl_lsname_sanlock[0])
+ strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
+ return 0;
+ }
+
+ rv = sanlock_version(0, &daemon_version, &daemon_proto);
+ if (rv < 0) {
+ log_error("S %s init_vg_san failed to connect to sanlock daemon", ls_name);
+ return -EMANAGER;
+ }
+
+ log_debug("sanlock daemon version %08x proto %08x",
+ daemon_version, daemon_proto);
+
+ align_size = sanlock_align(&disk);
+ if (align_size <= 0) {
+ log_error("S %s init_vg_san bad disk align size %d %s",
+ ls_name, align_size, disk.path);
+ return -EARGS;
+ }
+
+ strncpy(ss.name, ls_name, SANLK_NAME_LEN);
+ memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
+ ss.host_id_disk.offset = LS_BEGIN * align_size;
+
+ rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_lockspace error %d %s",
+ ls_name, rv, ss.host_id_disk.path);
+ return rv;
+ }
+
+ /*
+ * We want to create the global lock in the first sanlock vg.
+ * If other sanlock vgs exist, then one of them must contain
+ * the gl. If gl_lsname_sanlock is not set, then perhaps
+ * the sanlock vg with the gl has been removed or has not yet
+ * been seen. (Would vgcreate get this far in that case?)
+ * If dlm vgs exist, then we choose to use the dlm gl and
+ * not a sanlock gl.
+ */
+
+ if (flags & LD_AF_ENABLE)
+ gl_name = R_NAME_GL;
+ else if (flags & LD_AF_DISABLE)
+ gl_name = R_NAME_GL_DISABLED;
+ else if (!gl_use_sanlock || gl_lsname_sanlock[0] || !lockspaces_empty())
+ gl_name = R_NAME_GL_DISABLED;
+ else
+ gl_name = R_NAME_GL;
+
+ memcpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, gl_name, SANLK_NAME_LEN);
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * GL_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_resource gl error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ memcpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, R_NAME_VG, SANLK_NAME_LEN);
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * VG_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_resource vg error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ if (!strcmp(gl_name, R_NAME_GL))
+ strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
+
+ snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
+
+ log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
+
+ /*
+ * Go through all lv resource slots and initialize them with the
+ * correct lockspace name but a special resource name that indicates
+ * it is unused.
+ */
+
+ memset(&rd, 0, sizeof(rd));
+ rd.rs.num_disks = 1;
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ strncpy(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN);
+ strcpy(rd.rs.name, "#unused");
+
+ offset = align_size * LV_LOCK_BEGIN;
+
+ log_debug("S %s init_vg_san clearing lv lease areas", ls_name);
+
+ for (i = 0; ; i++) {
+ rd.rs.disks[0].offset = offset;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv == -EMSGSIZE || rv == -ENOSPC) {
+ /* This indicates the end of the device is reached. */
+ rv = -EMSGSIZE;
+ break;
+ }
+
+ if (rv) {
+ log_error("clear lv resource area %llu error %d",
+ (unsigned long long)offset, rv);
+ break;
+ }
+ offset += align_size;
+ }
+
+ return 0;
+}
+
+/*
+ * lvcreate
+ *
+ * The offset at which the lv lease is written is passed
+ * all the way back to the lvcreate command so that it
+ * can be saved in the lv's lock_args in the vg metadata.
+ */
+
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name,
+ char *vg_args, char *lv_args, uint64_t free_offset)
+{
+ struct sanlk_resourced rd;
+ char lock_lv_name[MAX_ARGS+1];
+ char lock_args_version[MAX_ARGS+1];
+ uint64_t offset;
+ int align_size;
+ int rv;
+
+ memset(&rd, 0, sizeof(rd));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+ ls_name, rv, vg_args);
+ return rv;
+ }
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH);
+
+ strncpy(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN);
+ rd.rs.num_disks = 1;
+ snprintf(rd.rs.disks[0].path, SANLK_PATH_LEN-1, "/dev/mapper/%s-%s", vg_name, lock_lv_name);
+
+ align_size = sanlock_align(&rd.rs.disks[0]);
+ if (align_size <= 0) {
+ log_error("S %s init_lv_san align error %d", ls_name, align_size);
+ return -EINVAL;
+ }
+
+ if (free_offset)
+ offset = free_offset;
+ else
+ offset = align_size * LV_LOCK_BEGIN;
+ rd.rs.disks[0].offset = offset;
+
+ if (daemon_test) {
+ snprintf(lv_args, MAX_ARGS, "%s:%llu",
+ lock_args_version, (unsigned long long)1111);
+ return 0;
+ }
+
+ while (1) {
+ rd.rs.disks[0].offset = offset;
+
+ memset(rd.rs.name, 0, SANLK_NAME_LEN);
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv == -EMSGSIZE || rv == -ENOSPC) {
+ /* This indicates the end of the device is reached. */
+ log_debug("S %s init_lv_san read limit offset %llu",
+ ls_name, (unsigned long long)offset);
+ rv = -EMSGSIZE;
+ return rv;
+ }
+
+ if (rv && rv != SANLK_LEADER_MAGIC) {
+ log_error("S %s init_lv_san read error %d offset %llu",
+ ls_name, rv, (unsigned long long)offset);
+ break;
+ }
+
+ if (!strncmp(rd.rs.name, lv_name, SANLK_NAME_LEN)) {
+ log_error("S %s init_lv_san resource name %s already exists at %llu",
+ ls_name, lv_name, (unsigned long long)offset);
+ return -EEXIST;
+ }
+
+ /*
+ * If we read newly extended space, it will not be initialized
+ * with an "#unused" resource, but will return SANLK_LEADER_MAGIC
+ * indicating an uninitialized paxos structure on disk.
+ */
+ if ((rv == SANLK_LEADER_MAGIC) || !strcmp(rd.rs.name, "#unused")) {
+ log_debug("S %s init_lv_san %s found unused area at %llu",
+ ls_name, lv_name, (unsigned long long)offset);
+
+ strncpy(rd.rs.name, lv_name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (!rv) {
+ snprintf(lv_args, MAX_ARGS, "%s:%llu",
+ lock_args_version, (unsigned long long)offset);
+ } else {
+ log_error("S %s init_lv_san write error %d offset %llu",
+ ls_name, rv, (unsigned long long)rv);
+ }
+ break;
+ }
+
+ offset += align_size;
+ }
+
+ return rv;
+}
+
+/*
+ * Read the lockspace and each resource, replace the lockspace name,
+ * and write it back.
+ */
+
+int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ struct sanlk_lockspace ss;
+ struct sanlk_resourced rd;
+ struct sanlk_disk disk;
+ char lock_lv_name[MAX_ARGS+1];
+ uint64_t offset;
+ uint32_t io_timeout;
+ int align_size;
+ int i, rv;
+
+ memset(&disk, 0, sizeof(disk));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+
+ if (!vg_args || !vg_args[0] || !strcmp(vg_args, "none")) {
+ log_error("S %s rename_vg_san vg_args missing", ls_name);
+ return -EINVAL;
+ }
+
+ rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+ ls_name, rv, vg_args);
+ return rv;
+ }
+
+ snprintf(disk.path, SANLK_PATH_LEN-1, "/dev/mapper/%s-%s", vg_name, lock_lv_name);
+
+ log_debug("S %s rename_vg_san path %s", ls_name, disk.path);
+
+ if (daemon_test)
+ return 0;
+
+ /* FIXME: device is not always ready for us here */
+ sleep(1);
+
+ align_size = sanlock_align(&disk);
+ if (align_size <= 0) {
+ log_error("S %s rename_vg_san bad align size %d %s",
+ ls_name, align_size, disk.path);
+ return -EINVAL;
+ }
+
+ /*
+ * Lockspace
+ */
+
+ memset(&ss, 0, sizeof(ss));
+ memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
+ ss.host_id_disk.offset = LS_BEGIN * align_size;
+
+ rv = sanlock_read_lockspace(&ss, 0, &io_timeout);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san read_lockspace error %d %s",
+ ls_name, rv, ss.host_id_disk.path);
+ return rv;
+ }
+
+ strncpy(ss.name, ls_name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_lockspace(&ss, 0, 0, sanlock_io_timeout);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san write_lockspace error %d %s",
+ ls_name, rv, ss.host_id_disk.path);
+ return rv;
+ }
+
+ /*
+ * GL resource
+ */
+
+ memset(&rd, 0, sizeof(rd));
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * GL_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san read_resource gl error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ strncpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san write_resource gl error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ /*
+ * VG resource
+ */
+
+ memset(&rd, 0, sizeof(rd));
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * VG_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san write_resource vg error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ strncpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s rename_vg_san write_resource vg error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ /*
+ * LV resources
+ */
+
+ offset = align_size * LV_LOCK_BEGIN;
+
+ for (i = 0; ; i++) {
+ memset(&rd, 0, sizeof(rd));
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = offset;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv == -EMSGSIZE || rv == -ENOSPC) {
+ /* This indicates the end of the device is reached. */
+ rv = -EMSGSIZE;
+ break;
+ }
+
+ if (rv < 0) {
+ log_error("S %s rename_vg_san read_resource resource area %llu error %d",
+ ls_name, (unsigned long long)offset, rv);
+ break;
+ }
+
+ strncpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv) {
+ log_error("S %s rename_vg_san write_resource resource area %llu error %d",
+ ls_name, (unsigned long long)offset, rv);
+ break;
+ }
+ offset += align_size;
+ }
+
+ return 0;
+}
+
+/* lvremove */
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ int rv;
+
+ log_debug("S %s R %s free_lv_san", ls->name, r->name);
+
+ if (daemon_test)
+ return 0;
+
+ strcpy(rs->name, "#unused");
+
+ rv = sanlock_write_resource(rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s R %s free_lv_san write error %d",
+ ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+int lm_ex_disable_gl_sanlock(struct lockspace *ls)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct sanlk_resourced rd1;
+ struct sanlk_resourced rd2;
+ struct sanlk_resource *rs1;
+ struct sanlk_resource *rs2;
+ struct sanlk_resource **rs_args;
+ int rv;
+
+ rs_args = malloc(2 * sizeof(struct sanlk_resource *));
+ if (!rs_args)
+ return -ENOMEM;
+
+ rs1 = &rd1.rs;
+ rs2 = &rd2.rs;
+
+ memset(&rd1, 0, sizeof(rd1));
+ memset(&rd2, 0, sizeof(rd2));
+
+ strncpy(rd1.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd1.rs.name, R_NAME_GL, SANLK_NAME_LEN);
+
+ strncpy(rd2.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd2.rs.name, R_NAME_GL_DISABLED, SANLK_NAME_LEN);
+
+ rd1.rs.num_disks = 1;
+ strncpy(rd1.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
+ rd1.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+
+ rv = sanlock_acquire(lms->sock, -1, 0, 1, &rs1, NULL);
+ if (rv < 0) {
+ log_error("S %s ex_disable_gl_san acquire error %d",
+ ls->name, rv);
+ goto out;
+ }
+
+ rs_args[0] = rs1;
+ rs_args[1] = rs2;
+
+ rv = sanlock_release(lms->sock, -1, SANLK_REL_RENAME, 2, rs_args);
+ if (rv < 0) {
+ log_error("S %s ex_disable_gl_san release_rename error %d",
+ ls->name, rv);
+ }
+
+out:
+ free(rs_args);
+ return rv;
+}
+
+/*
+ * enable/disable exist because each vg contains a global lock,
+ * but we only want to use the gl from one of them. The first
+ * sanlock vg created, has its gl enabled, and subsequent
+ * sanlock vgs have their gl disabled. If the vg containing the
+ * gl is removed, the gl from another sanlock vg needs to be
+ * enabled. Or, if gl in multiple vgs are somehow enabled, we
+ * want to be able to disable one of them.
+ *
+ * Disable works by naming/renaming the gl resource to have a
+ * name that is different from the predefined name.
+ * When a host attempts to acquire the gl with its standard
+ * predefined name, it will fail because the resource's name
+ * on disk doesn't match.
+ */
+
+int lm_able_gl_sanlock(struct lockspace *ls, int enable)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct sanlk_resourced rd;
+ const char *gl_name;
+ int rv;
+
+ if (enable)
+ gl_name = R_NAME_GL;
+ else
+ gl_name = R_NAME_GL_DISABLED;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, gl_name, SANLK_NAME_LEN);
+
+ rd.rs.num_disks = 1;
+ strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
+ rd.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s able_gl %d write_resource gl error %d %s",
+ ls->name, enable, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ log_debug("S %s able_gl %s", ls->name, gl_name);
+
+ ls->sanlock_gl_enabled = enable;
+
+ if (enable)
+ strncpy(gl_lsname_sanlock, ls->name, MAX_NAME);
+
+ if (!enable && !strcmp(gl_lsname_sanlock, ls->name))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+
+ return 0;
+}
+
+static int gl_is_enabled(struct lockspace *ls, struct lm_sanlock *lms)
+{
+ char strname[SANLK_NAME_LEN + 1];
+ struct sanlk_resourced rd;
+ uint64_t offset;
+ int rv;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+
+ /* leave rs.name empty, it is what we're checking */
+
+ rd.rs.num_disks = 1;
+ strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
+
+ offset = lms->align_size * GL_LOCK_BEGIN;
+ rd.rs.disks[0].offset = offset;
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv < 0) {
+ log_error("gl_is_enabled read_resource error %d", rv);
+ return rv;
+ }
+
+ memset(strname, 0, sizeof(strname));
+ memcpy(strname, rd.rs.name, SANLK_NAME_LEN);
+
+ if (!strcmp(strname, R_NAME_GL_DISABLED)) {
+ return 0;
+ }
+
+ if (!strcmp(strname, R_NAME_GL)) {
+ return 1;
+ }
+
+ log_error("gl_is_enabled invalid gl name %s", strname);
+ return -1;
+}
+
+int lm_gl_is_enabled(struct lockspace *ls)
+{
+ int rv;
+ rv = gl_is_enabled(ls, ls->lm_data);
+ ls->sanlock_gl_enabled = rv;
+ return rv;
+}
+
+/*
+ * This is called at the beginning of lvcreate to
+ * ensure there is free space for a new LV lock.
+ * If not, lvcreate will extend the lvmlock lv
+ * before continuing with creating the new LV.
+ * This way, lm_init_lv_san() should find a free
+ * lock (unless the autoextend of lvmlock lv has
+ * been disabled.)
+ */
+
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct sanlk_resourced rd;
+ uint64_t offset;
+ int rv;
+
+ if (daemon_test)
+ return 0;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ rd.rs.num_disks = 1;
+ strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN-1);
+
+ offset = lms->align_size * LV_LOCK_BEGIN;
+
+ while (1) {
+ rd.rs.disks[0].offset = offset;
+
+ memset(rd.rs.name, 0, SANLK_NAME_LEN);
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv == -EMSGSIZE || rv == -ENOSPC) {
+ /* This indicates the end of the device is reached. */
+ log_debug("S %s find_free_lock_san read limit offset %llu",
+ ls->name, (unsigned long long)offset);
+ return -EMSGSIZE;
+ }
+
+ /*
+ * If we read newly extended space, it will not be initialized
+ * with an "#unused" resource, but will return an error about
+ * an invalid paxos structure on disk.
+ */
+ if (rv == SANLK_LEADER_MAGIC) {
+ log_debug("S %s find_free_lock_san found empty area at %llu",
+ ls->name, (unsigned long long)offset);
+ *free_offset = offset;
+ return 0;
+ }
+
+ if (rv) {
+ log_error("S %s find_free_lock_san read error %d offset %llu",
+ ls->name, rv, (unsigned long long)offset);
+ break;
+ }
+
+ if (!strcmp(rd.rs.name, "#unused")) {
+ log_debug("S %s find_free_lock_san found unused area at %llu",
+ ls->name, (unsigned long long)offset);
+ *free_offset = offset;
+ return 0;
+ }
+
+ offset += lms->align_size;
+ }
+
+ return rv;
+}
+
+/*
+ * host A: start_vg/add_lockspace
+ * host B: vgremove
+ *
+ * The global lock cannot always be held around start_vg
+ * on host A because the gl is in a vg that may not be
+ * started yet, or may be in the vg we are starting.
+ *
+ * If B removes the vg, destroying the delta leases,
+ * while A is a lockspace member, it will cause A's
+ * sanlock delta lease renewal to fail, and lockspace
+ * recovery.
+ *
+ * I expect this overlap would usually cause a failure
+ * in the add_lockspace() on host A when it sees that
+ * the lockspace structures have been clobbered by B.
+ * Having add_lockspace() fail should be a fine result.
+ *
+ * If add_lockspace was somehow able to finish, the
+ * subsequent renewal would probably fail instead.
+ * This should also not create any major problems.
+ */
+
+int lm_prepare_lockspace_sanlock(struct lockspace *ls)
+{
+ struct stat st;
+ struct lm_sanlock *lms = NULL;
+ char lock_lv_name[MAX_ARGS+1];
+ char lsname[SANLK_NAME_LEN + 1];
+ char disk_path[SANLK_PATH_LEN];
+ char killpath[SANLK_PATH_LEN];
+ char killargs[SANLK_PATH_LEN];
+ int gl_found;
+ int ret, rv;
+
+ memset(disk_path, 0, sizeof(disk_path));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+
+ /*
+ * Construct the path to lvmlockctl by using the path to the lvm binary
+ * and appending "lockctl" to get /path/to/lvmlockctl.
+ */
+ memset(killpath, 0, sizeof(killpath));
+ snprintf(killpath, SANLK_PATH_LEN - 1, "%slockctl", LVM_PATH);
+
+ memset(killargs, 0, sizeof(killargs));
+ snprintf(killargs, SANLK_PATH_LEN - 1, "--kill %s", ls->vg_name);
+
+ rv = check_args_version(ls->vg_args, VG_LOCK_ARGS_MAJOR);
+ if (rv < 0) {
+ ret = -EARGS;
+ goto fail;
+ }
+
+ rv = lock_lv_name_from_args(ls->vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s prepare_lockspace_san lock_lv_name_from_args error %d %s",
+ ls->name, rv, ls->vg_args);
+ ret = -EARGS;
+ goto fail;
+ }
+
+ snprintf(disk_path, SANLK_PATH_LEN-1, "/dev/mapper/%s-%s",
+ ls->vg_name, lock_lv_name);
+
+ /*
+ * When a vg is started, the internal sanlock lv should be
+ * activated before lvmlockd is asked to add the lockspace.
+ * (sanlock needs to use the lv.)
+ *
+ * In the future we might be able to ask something on the system
+ * to activate the sanlock lv from here, and with that we might be
+ * able to start sanlock VGs without requiring a
+ * vgchange --lock-start command.
+ */
+
+ /* FIXME: device is not always ready for us here */
+ sleep(1);
+
+ rv = stat(disk_path, &st);
+ if (rv < 0) {
+ log_error("S %s prepare_lockspace_san stat error %d disk_path %s",
+ ls->name, errno, disk_path);
+ ret = -EARGS;
+ goto fail;
+ }
+
+ if (!ls->host_id) {
+ if (daemon_host_id)
+ ls->host_id = daemon_host_id;
+ else if (daemon_host_id_file)
+ ls->host_id = read_host_id_file();
+ }
+
+ if (!ls->host_id || ls->host_id > 2000) {
+ log_error("S %s prepare_lockspace_san invalid host_id %llu",
+ ls->name, (unsigned long long)ls->host_id);
+ ret = -EHOSTID;
+ goto fail;
+ }
+
+ lms = malloc(sizeof(struct lm_sanlock));
+ if (!lms) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ memset(lsname, 0, sizeof(lsname));
+ strncpy(lsname, ls->name, SANLK_NAME_LEN);
+
+ memset(lms, 0, sizeof(struct lm_sanlock));
+ memcpy(lms->ss.name, lsname, SANLK_NAME_LEN);
+ lms->ss.host_id_disk.offset = 0;
+ lms->ss.host_id = ls->host_id;
+ strncpy(lms->ss.host_id_disk.path, disk_path, SANLK_PATH_LEN-1);
+
+ if (daemon_test) {
+ if (!gl_lsname_sanlock[0]) {
+ strncpy(gl_lsname_sanlock, lsname, MAX_NAME);
+ log_debug("S %s prepare_lockspace_san use global lock", lsname);
+ }
+ goto out;
+ }
+
+ lms->sock = sanlock_register();
+ if (lms->sock < 0) {
+ log_error("S %s prepare_lockspace_san register error %d", lsname, lms->sock);
+ lms->sock = 0;
+ ret = -EMANAGER;
+ goto fail;
+ }
+
+ log_debug("set killpath to %s %s", killpath, killargs);
+
+ rv = sanlock_killpath(lms->sock, 0, killpath, killargs);
+ if (rv < 0) {
+ log_error("S %s killpath error %d", lsname, rv);
+ ret = -EMANAGER;
+ goto fail;
+ }
+
+ rv = sanlock_restrict(lms->sock, SANLK_RESTRICT_SIGKILL);
+ if (rv < 0) {
+ log_error("S %s restrict error %d", lsname, rv);
+ ret = -EMANAGER;
+ goto fail;
+ }
+
+ lms->align_size = sanlock_align(&lms->ss.host_id_disk);
+ if (lms->align_size <= 0) {
+ log_error("S %s prepare_lockspace_san align error %d", lsname, lms->align_size);
+ ret = -EMANAGER;
+ goto fail;
+ }
+
+ gl_found = gl_is_enabled(ls, lms);
+ if (gl_found < 0) {
+ log_error("S %s prepare_lockspace_san gl_enabled error %d", lsname, gl_found);
+ ret = -EARGS;
+ goto fail;
+ }
+
+ ls->sanlock_gl_enabled = gl_found;
+
+ if (gl_found) {
+ if (gl_use_dlm) {
+ log_error("S %s prepare_lockspace_san gl_use_dlm is set", lsname);
+ } else if (gl_lsname_sanlock[0] && strcmp(gl_lsname_sanlock, lsname)) {
+ log_error("S %s prepare_lockspace_san multiple sanlock global locks current %s",
+ lsname, gl_lsname_sanlock);
+ } else {
+ strncpy(gl_lsname_sanlock, lsname, MAX_NAME);
+ log_debug("S %s prepare_lockspace_san use global lock %s",
+ lsname, gl_lsname_sanlock);
+ }
+ }
+
+out:
+ ls->lm_data = lms;
+ log_debug("S %s prepare_lockspace_san done", lsname);
+ return 0;
+
+fail:
+ if (lms && lms->sock)
+ close(lms->sock);
+ if (lms)
+ free(lms);
+ return ret;
+}
+
+int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ int rv;
+
+ rv = sanlock_add_lockspace_timeout(&lms->ss, 0, sanlock_io_timeout);
+ if (rv == -EEXIST && adopt) {
+ /* We could alternatively just skip the sanlock call for adopt. */
+ log_debug("S %s add_lockspace_san adopt found ls", ls->name);
+ goto out;
+ }
+ if (rv < 0) {
+ /* retry for some errors? */
+ log_error("S %s add_lockspace_san add_lockspace error %d", ls->name, rv);
+ goto fail;
+ }
+
+ /*
+ * Don't let the lockspace be cleanly released if orphan locks
+ * exist, because the orphan locks are still protecting resources
+ * that are being used on the host, e.g. active lvs. If the
+ * lockspace is cleanly released, another host could acquire the
+ * orphan leases.
+ */
+
+ rv = sanlock_set_config(ls->name, 0, SANLK_CONFIG_USED_BY_ORPHANS, NULL);
+ if (rv < 0) {
+ log_error("S %s add_lockspace_san set_config error %d", ls->name, rv);
+ sanlock_rem_lockspace(&lms->ss, 0);
+ goto fail;
+ }
+
+out:
+ log_debug("S %s add_lockspace_san done", ls->name);
+ return 0;
+
+fail:
+ if (close(lms->sock))
+ log_error("failed to close sanlock daemon socket connection");
+ free(lms);
+ ls->lm_data = NULL;
+ return rv;
+}
+
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ int rv;
+
+ if (daemon_test)
+ goto out;
+
+ rv = sanlock_rem_lockspace(&lms->ss, 0);
+ if (rv < 0) {
+ log_error("S %s rem_lockspace_san error %d", ls->name, rv);
+ return rv;
+ }
+
+ if (free_vg) {
+ /*
+ * Destroy sanlock lockspace (delta leases). Forces failure for any
+ * other host that is still using or attempts to use this lockspace.
+ * This shouldn't be generally necessary, but there may some races
+ * between nodes starting and removing a vg which this could help.
+ */
+ strncpy(lms->ss.name, "#unused", SANLK_NAME_LEN);
+
+ rv = sanlock_write_lockspace(&lms->ss, 0, 0, sanlock_io_timeout);
+ if (rv < 0) {
+ log_error("S %s rem_lockspace free_vg write_lockspace error %d %s",
+ ls->name, rv, lms->ss.host_id_disk.path);
+ }
+ }
+out:
+ if (close(lms->sock))
+ log_error("failed to close sanlock daemon socket connection");
+
+ free(lms);
+ ls->lm_data = NULL;
+
+ /* FIXME: should we only clear gl_lsname when doing free_vg? */
+
+ if (!strcmp(ls->name, gl_lsname_sanlock))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+
+ return 0;
+}
+
+static int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+
+ strncpy(rds->rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rds->rs.name, r->name, SANLK_NAME_LEN);
+ rds->rs.num_disks = 1;
+ memcpy(rds->rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+
+ if (r->type == LD_RT_GL)
+ rds->rs.disks[0].offset = GL_LOCK_BEGIN * lms->align_size;
+ else if (r->type == LD_RT_VG)
+ rds->rs.disks[0].offset = VG_LOCK_BEGIN * lms->align_size;
+
+ /* LD_RT_LV offset is set in each lm_lock call from lv_args. */
+
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+ rds->vb = malloc(sizeof(struct val_blk));
+ if (!rds->vb)
+ return -ENOMEM;
+ memset(rds->vb, 0, sizeof(struct val_blk));
+ }
+
+ return 0;
+}
+
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+
+ /* FIXME: assert r->mode == UN or unlock if it's not? */
+
+ if (rds->vb)
+ free(rds->vb);
+
+ memset(rds, 0, sizeof(struct rd_sanlock));
+ r->lm_init = 0;
+ return 0;
+}
+
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int *retry, int adopt)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+ struct sanlk_resource *rs;
+ uint64_t lock_lv_offset;
+ uint32_t flags = 0;
+ struct val_blk vb;
+ uint16_t vb_version;
+ int added = 0;
+ int rv;
+
+ if (!r->lm_init) {
+ rv = lm_add_resource_sanlock(ls, r);
+ if (rv < 0)
+ return rv;
+ r->lm_init = 1;
+ added = 1;
+ }
+
+ rs = &rds->rs;
+
+ /*
+ * While there are duplicate global locks, keep checking
+ * to see if any have been disabled.
+ */
+ if (sanlock_gl_dup && ls->sanlock_gl_enabled &&
+ (r->type == LD_RT_GL || r->type == LD_RT_VG))
+ ls->sanlock_gl_enabled = gl_is_enabled(ls, ls->lm_data);
+
+ if (r->type == LD_RT_LV) {
+ /*
+ * The lv may have been removed and recreated with a new lease
+ * offset, so we need to get the offset from lv_args each time
+ * instead of reusing the value that we last set in rds->rs.
+ * act->lv_args is copied to r->lv_args before every lm_lock().
+ */
+
+ rv = check_args_version(r->lv_args, LV_LOCK_ARGS_MAJOR);
+ if (rv < 0) {
+ log_error("S %s R %s lock_san wrong lv_args version %s",
+ ls->name, r->name, r->lv_args);
+ return rv;
+ }
+
+ rv = lock_lv_offset_from_args(r->lv_args, &lock_lv_offset);
+ if (rv < 0) {
+ log_error("S %s R %s lock_san lv_offset_from_args error %d %s",
+ ls->name, r->name, rv, r->lv_args);
+ return rv;
+ }
+
+ if (!added && (rds->rs.disks[0].offset != lock_lv_offset)) {
+ log_debug("S %s R %s lock_san offset old %llu new %llu",
+ ls->name, r->name,
+ (unsigned long long)rds->rs.disks[0].offset,
+ (unsigned long long)lock_lv_offset);
+ }
+
+ rds->rs.disks[0].offset = lock_lv_offset;
+ }
+
+ if (ld_mode == LD_LK_SH) {
+ rs->flags |= SANLK_RES_SHARED;
+ } else if (ld_mode == LD_LK_EX) {
+ rs->flags &= ~SANLK_RES_SHARED;
+ } else {
+ log_error("lock_san invalid mode %d", ld_mode);
+ return -EINVAL;
+ }
+
+ /*
+ * Use PERSISTENT because if lvmlockd exits while holding
+ * a lock, it's not safe to simply clear/drop the lock while
+ * a command or lv is using it.
+ */
+
+ rs->flags |= SANLK_RES_PERSISTENT;
+
+ log_debug("S %s R %s lock_san acquire %s:%llu",
+ ls->name, r->name, rs->disks[0].path,
+ (unsigned long long)rs->disks[0].offset);
+
+ if (daemon_test) {
+ *r_version = 0;
+ return 0;
+ }
+
+ if (rds->vb)
+ flags |= SANLK_ACQUIRE_LVB;
+ if (adopt)
+ flags |= SANLK_ACQUIRE_ORPHAN_ONLY;
+
+ rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, NULL);
+
+ if (rv == -EAGAIN) {
+ /*
+ * It appears that sanlock_acquire returns EAGAIN when we request
+ * a shared lock but the lock is held ex by another host.
+ * There's no point in retrying this case, just return an error.
+ */
+ log_debug("S %s R %s lock_san acquire mode %d rv EAGAIN", ls->name, r->name, ld_mode);
+ *retry = 0;
+ return -EAGAIN;
+ }
+
+ if ((rv == -EMSGSIZE) && (r->type == LD_RT_LV)) {
+ /*
+ * sanlock tried to read beyond the end of the device,
+ * so the offset of the lv lease is beyond the end of the
+ * device, which means that the lease lv was extended, and
+ * the lease for this lv was allocated in the new space.
+ * The lvm command will see this error, refresh the lvmlock
+ * lv, and try again.
+ */
+ log_debug("S %s R %s lock_san acquire offset %llu rv EMSGSIZE",
+ ls->name, r->name, (unsigned long long)rs->disks[0].offset);
+ *retry = 0;
+ return -EMSGSIZE;
+ }
+
+ if (adopt && (rv == -EUCLEAN)) {
+ /*
+ * The orphan lock exists but in a different mode than we asked
+ * for, so the caller should try again with the other mode.
+ */
+ log_debug("S %s R %s lock_san adopt mode %d try other mode",
+ ls->name, r->name, ld_mode);
+ *retry = 0;
+ return -EUCLEAN;
+ }
+
+ if (adopt && (rv == -ENOENT)) {
+ /*
+ * No orphan lock exists.
+ */
+ log_debug("S %s R %s lock_san adopt mode %d no orphan found",
+ ls->name, r->name, ld_mode);
+ *retry = 0;
+ return -ENOENT;
+ }
+
+ if (rv == SANLK_ACQUIRE_IDLIVE || rv == SANLK_ACQUIRE_OWNED || rv == SANLK_ACQUIRE_OTHER) {
+ /*
+ * The lock is held by another host. These failures can
+ * happen while multiple hosts are concurrently acquiring
+ * shared locks. We want to retry a couple times in this
+ * case because we'll probably get the sh lock.
+ *
+ * I believe these are also the errors when requesting an
+ * ex lock that another host holds ex. We want to report
+ * something like: "lock is held by another host" in this case.
+ * Retry is pointless here.
+ *
+ * We can't distinguish between the two cases above,
+ * so if requesting a sh lock, retry a couple times,
+ * otherwise don't.
+ */
+ log_debug("S %s R %s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
+ *retry = (ld_mode == LD_LK_SH) ? 1 : 0;
+ return -EAGAIN;
+ }
+
+ if (rv < 0) {
+ log_error("S %s R %s lock_san acquire error %d",
+ ls->name, r->name, rv);
+
+ /* if the gl has been disabled, remove and free the gl resource */
+ if ((rv == SANLK_LEADER_RESOURCE) && (r->type == LD_RT_GL)) {
+ if (!lm_gl_is_enabled(ls)) {
+ log_error("S %s R %s lock_san gl has been disabled",
+ ls->name, r->name);
+ if (!strcmp(gl_lsname_sanlock, ls->name))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+ return -EUNATCH;
+ }
+ }
+
+ if (added)
+ lm_rem_resource_sanlock(ls, r);
+
+ /* sanlock gets i/o errors trying to read/write the leases. */
+ if (rv == -EIO)
+ rv = -ELOCKIO;
+
+ /*
+ * The sanlock lockspace can disappear if the lease storage fails,
+ * the delta lease renewals fail, the lockspace enters recovery,
+ * lvmlockd holds no leases in the lockspace, so sanlock can
+ * stop and free the lockspace.
+ */
+ if (rv == -ENOSPC)
+ rv = -ELOCKIO;
+
+ return rv;
+ }
+
+ if (rds->vb) {
+ rv = sanlock_get_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s lock_san get_lvb error %d", ls->name, r->name, rv);
+ *r_version = 0;
+ goto out;
+ }
+
+ vb_version = le16_to_cpu(vb.version);
+
+ if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+ log_error("S %s R %s lock_san ignore vb_version %x",
+ ls->name, r->name, vb_version);
+ *r_version = 0;
+ free(rds->vb);
+ rds->vb = NULL;
+ goto out;
+ }
+
+ *r_version = le32_to_cpu(vb.r_version);
+ memcpy(rds->vb, &vb, sizeof(vb)); /* rds->vb saved as le */
+
+ log_debug("S %s R %s lock_san get r_version %u",
+ ls->name, r->name, *r_version);
+ }
+out:
+ return rv;
+}
+
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ struct val_blk vb;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s convert_san", ls->name, r->name);
+
+ if (daemon_test)
+ goto rs_flag;
+
+ if (rds->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rds->vb->version) {
+ /* first time vb has been written */
+ rds->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rds->vb->r_version = cpu_to_le32(r_version);
+ memcpy(&vb, rds->vb, sizeof(vb));
+
+ log_debug("S %s R %s convert_san set r_version %u",
+ ls->name, r->name, r_version);
+
+ rv = sanlock_set_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s convert_san set_lvb error %d",
+ ls->name, r->name, rv);
+ }
+ }
+
+ rs_flag:
+ if (ld_mode == LD_LK_SH)
+ rs->flags |= SANLK_RES_SHARED;
+ else
+ rs->flags &= ~SANLK_RES_SHARED;
+
+ if (daemon_test)
+ return 0;
+
+ rv = sanlock_convert(lms->sock, -1, flags, rs);
+ if (rv == -EAGAIN) {
+ /* FIXME: When could this happen? Should something different be done? */
+ log_error("S %s R %s convert_san EAGAIN", ls->name, r->name);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s convert_san convert error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+static int release_rename(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock rd1;
+ struct rd_sanlock rd2;
+ struct sanlk_resource *res1;
+ struct sanlk_resource *res2;
+ struct sanlk_resource **res_args;
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+ int rv;
+
+ log_debug("S %s R %s release rename", ls->name, r->name);
+
+ res_args = malloc(2 * sizeof(struct sanlk_resource *));
+ if (!res_args)
+ return -ENOMEM;
+
+ memcpy(&rd1, rds, sizeof(struct rd_sanlock));
+ memcpy(&rd2, rds, sizeof(struct rd_sanlock));
+
+ res1 = (struct sanlk_resource *)&rd1;
+ res2 = (struct sanlk_resource *)&rd2;
+
+ strcpy(res2->name, "invalid_removed");
+
+ res_args[0] = res1;
+ res_args[1] = res2;
+
+ rv = sanlock_release(lms->sock, -1, SANLK_REL_RENAME, 2, res_args);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_san release rename error %d", ls->name, r->name, rv);
+ }
+
+ free(res_args);
+
+ return rv;
+}
+
+/*
+ * rds->vb is stored in le
+ *
+ * r_version is r->version
+ *
+ * for GL locks lvmlockd just increments this value
+ * each time the global lock is released from ex.
+ *
+ * for VG locks it is the seqno from the vg metadata.
+ */
+
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmu_flags)
+{
+ struct lm_sanlock *lms = (struct lm_sanlock *)ls->lm_data;
+ struct rd_sanlock *rds = (struct rd_sanlock *)r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ struct val_blk vb;
+ int rv;
+
+ log_debug("S %s R %s unlock_san r_version %u flags %x",
+ ls->name, r->name, r_version, lmu_flags);
+
+ if (daemon_test)
+ return 0;
+
+ if (rds->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rds->vb->version) {
+ /* first time vb has been written */
+ rds->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rds->vb->r_version = cpu_to_le32(r_version);
+ memcpy(&vb, rds->vb, sizeof(vb));
+
+ log_debug("S %s R %s unlock_san set r_version %u",
+ ls->name, r->name, r_version);
+
+ rv = sanlock_set_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s unlock_san set_lvb error %d",
+ ls->name, r->name, rv);
+ }
+ }
+
+ /*
+ * For vgremove (FREE_VG) we unlock-rename the vg and gl locks
+ * so they cannot be reacquired.
+ */
+ if ((lmu_flags & LMUF_FREE_VG) &&
+ (r->type == LD_RT_GL || r->type == LD_RT_VG)) {
+ return release_rename(ls, r);
+ }
+
+ rv = sanlock_release(lms->sock, -1, 0, 1, &rs);
+ if (rv < 0)
+ log_error("S %s R %s unlock_san release error %d", ls->name, r->name, rv);
+
+ if (rv == -EIO)
+ rv = -ELOCKIO;
+
+ return rv;
+}
+
+int lm_hosts_sanlock(struct lockspace *ls, int notify)
+{
+ struct sanlk_host *hss = NULL;
+ struct sanlk_host *hs;
+ uint32_t state;
+ int hss_count = 0;
+ int found_self = 0;
+ int found_others = 0;
+ int i, rv;
+
+ rv = sanlock_get_hosts(ls->name, 0, &hss, &hss_count, 0);
+ if (rv < 0) {
+ log_error("S %s hosts_san get_hosts error %d", ls->name, rv);
+ return 0;
+ }
+
+ if (!hss || !hss_count) {
+ log_error("S %s hosts_san zero hosts", ls->name);
+ return 0;
+ }
+
+ hs = hss;
+
+ for (i = 0; i < hss_count; i++) {
+ log_debug("S %s hosts_san host_id %llu gen %llu flags %x",
+ ls->name,
+ (unsigned long long)hs->host_id,
+ (unsigned long long)hs->generation,
+ hs->flags);
+
+ if (hs->host_id == ls->host_id) {
+ found_self = 1;
+ hs++;
+ continue;
+ }
+
+ state = hs->flags & SANLK_HOST_MASK;
+ if (state == SANLK_HOST_LIVE)
+ found_others++;
+ hs++;
+ }
+ free(hss);
+
+ if (found_others && notify) {
+ /*
+ * We could use the sanlock event mechanism to notify lvmlockd
+ * on other hosts to stop this VG. lvmlockd would need to
+ * register for and listen for sanlock events in the main loop.
+ * The events are slow to propagate. We'd need to retry for a
+ * while before all the hosts see the event and stop the VG.
+ * sanlock_set_event(ls->name, &he, SANLK_SETEV_ALL_HOSTS);
+ *
+ * Wait to try this until there appears to be real value/interest
+ * in doing it.
+ */
+ }
+
+ if (!found_self) {
+ log_error("S %s hosts_san self not found others %d", ls->name, found_others);
+ return 0;
+ }
+
+ return found_others;
+}
+
+int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin)
+{
+ struct sanlk_lockspace *ss_all = NULL;
+ struct sanlk_lockspace *ss;
+ struct lockspace *ls;
+ int ss_count = 0;
+ int i, rv;
+
+ rv = sanlock_get_lockspaces(&ss_all, &ss_count, 0);
+ if (rv < 0)
+ return rv;
+
+ if (!ss_all || !ss_count)
+ return 0;
+
+ ss = ss_all;
+
+ for (i = 0; i < ss_count; i++) {
+
+ if (strncmp(ss->name, LVM_LS_PREFIX, strlen(LVM_LS_PREFIX)))
+ continue;
+
+ if (!(ls = alloc_lockspace()))
+ return -ENOMEM;
+
+ ls->lm_type = LD_LM_SANLOCK;
+ ls->host_id = ss->host_id;
+ strncpy(ls->name, ss->name, MAX_NAME);
+ strncpy(ls->vg_name, ss->name + strlen(LVM_LS_PREFIX), MAX_NAME);
+ list_add_tail(&ls->list, ls_rejoin);
+
+ ss++;
+ }
+
+ free(ss_all);
+ return 0;
+}
+
+int lm_is_running_sanlock(void)
+{
+ uint32_t daemon_version;
+ uint32_t daemon_proto;
+ int rv;
+
+ rv = sanlock_version(0, &daemon_version, &daemon_proto);
+ if (rv < 0)
+ return 0;
+ return 1;
+}
diff --git a/daemons/lvmpolld/.gitignore b/daemons/lvmpolld/.gitignore
new file mode 100644
index 000000000..4ffad0a68
--- /dev/null
+++ b/daemons/lvmpolld/.gitignore
@@ -0,0 +1 @@
+lvmpolld
diff --git a/daemons/lvmpolld/Makefile.in b/daemons/lvmpolld/Makefile.in
new file mode 100644
index 000000000..8ebbb906c
--- /dev/null
+++ b/daemons/lvmpolld/Makefile.in
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmpolld-core.c lvmpolld-data-utils.c lvmpolld-cmd-utils.c
+
+TARGETS = lvmpolld
+
+.PHONY: install_lvmpolld
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+CFLOW_TARGET = lvmpolld
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server $(DAEMON_LDFLAGS)
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+CFLAGS += $(DAEMON_CFLAGS)
+
+lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+install_lvmpolld: lvmpolld
+ $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmpolld
+
+install: install_lvm2
diff --git a/daemons/lvmpolld/lvmpolld-cmd-utils.c b/daemons/lvmpolld/lvmpolld-cmd-utils.c
new file mode 100644
index 000000000..024822159
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lvmpolld-common.h"
+
+/* extract this info from autoconf/automake files */
+#define LVPOLL_CMD "lvpoll"
+
+#define MIN_ARGV_SIZE 8
+
+static const char *const const polling_ops[] = { [PVMOVE] = LVMPD_REQ_PVMOVE,
+ [CONVERT] = LVMPD_REQ_CONVERT,
+ [MERGE] = LVMPD_REQ_MERGE,
+ [MERGE_THIN] = LVMPD_REQ_MERGE_THIN };
+
+const char *polling_op(enum poll_type type)
+{
+ return type < POLL_TYPE_MAX ? polling_ops[type] : "<undefined>";
+}
+
+static int add_to_cmd_arr(const char ***cmdargv, const char *str, unsigned *ind)
+{
+ const char **newargv = *cmdargv;
+
+ if (*ind && !(*ind % MIN_ARGV_SIZE)) {
+ newargv = dm_realloc(*cmdargv, (*ind / MIN_ARGV_SIZE + 1) * MIN_ARGV_SIZE * sizeof(char *));
+ if (!newargv)
+ return 0;
+ *cmdargv = newargv;
+ }
+
+ *(*cmdargv + (*ind)++) = str;
+
+ return 1;
+}
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort_polling, unsigned handle_missing_pvs)
+{
+ unsigned i = 0;
+ const char **cmd_argv = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+ if (!cmd_argv)
+ return NULL;
+
+ /* path to lvm2 binary */
+ if (!add_to_cmd_arr(&cmd_argv, lvm_binary, &i))
+ goto err;
+
+ /* cmd to execute */
+ if (!add_to_cmd_arr(&cmd_argv, LVPOLL_CMD, &i))
+ goto err;
+
+ /* transfer internal polling interval */
+ if (pdlv->sinterval &&
+ (!add_to_cmd_arr(&cmd_argv, "--interval", &i) ||
+ !add_to_cmd_arr(&cmd_argv, pdlv->sinterval, &i)))
+ goto err;
+
+ /* pass abort param */
+ if (abort_polling &&
+ !add_to_cmd_arr(&cmd_argv, "--abort", &i))
+ goto err;
+
+ /* pass handle-missing-pvs. used by mirror polling operation */
+ if (handle_missing_pvs &&
+ !add_to_cmd_arr(&cmd_argv, "--handlemissingpvs", &i))
+ goto err;
+
+ /* one of: "convert", "pvmove", "merge", "merge_thin" */
+ if (!add_to_cmd_arr(&cmd_argv, "--polloperation", &i) ||
+ !add_to_cmd_arr(&cmd_argv, polling_ops[pdlv->type], &i))
+ goto err;
+
+ /* vg/lv name */
+ if (!add_to_cmd_arr(&cmd_argv, pdlv->lvname, &i))
+ goto err;
+
+ /* disable metadata backup */
+ if (!add_to_cmd_arr(&cmd_argv, "-An", &i))
+ goto err;
+
+ /* terminating NULL */
+ if (!add_to_cmd_arr(&cmd_argv, NULL, &i))
+ goto err;
+
+ return cmd_argv;
+err:
+ dm_free(cmd_argv);
+ return NULL;
+}
+
+/* FIXME: in fact exclude should be va list */
+static int copy_env(const char ***cmd_envp, unsigned *i, const char *exclude)
+{
+ const char * const* tmp = (const char * const*) environ;
+
+ if (!tmp)
+ return 0;
+
+ while (*tmp) {
+ if (strncmp(*tmp, exclude, strlen(exclude)) && !add_to_cmd_arr(cmd_envp, *tmp, i))
+ return 0;
+ tmp++;
+ }
+
+ return 1;
+}
+
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv)
+{
+ unsigned i = 0;
+ const char **cmd_envp = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+ if (!cmd_envp)
+ return NULL;
+
+ /* copy whole environment from lvmpolld, exclude LVM_SYSTEM_DIR if set */
+ if (!copy_env(&cmd_envp, &i, "LVM_SYSTEM_DIR="))
+ goto err;
+
+ /* Add per client LVM_SYSTEM_DIR variable if set */
+ if (*pdlv->lvm_system_dir_env && !add_to_cmd_arr(&cmd_envp, pdlv->lvm_system_dir_env, &i))
+ goto err;
+
+ /* terminating NULL */
+ if (!add_to_cmd_arr(&cmd_envp, NULL, &i))
+ goto err;
+
+ return cmd_envp;
+err:
+ dm_free(cmd_envp);
+ return NULL;
+}
diff --git a/daemons/lvmpolld/lvmpolld-cmd-utils.h b/daemons/lvmpolld/lvmpolld-cmd-utils.h
new file mode 100644
index 000000000..509c2e999
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVMPOLLD_CMD_UTILS_H
+#define _LVM_LVMPOLLD_CMD_UTILS_H
+
+#include "lvmpolld-data-utils.h"
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort, unsigned handle_missing_pvs);
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv);
+
+const char *polling_op(enum poll_type);
+
+#endif /* _LVM_LVMPOLLD_CMD_UTILS_H */
diff --git a/lib/misc/timestamp.h b/daemons/lvmpolld/lvmpolld-common.h
index 50e2a853b..16f764240 100644
--- a/lib/misc/timestamp.h
+++ b/daemons/lvmpolld/lvmpolld-common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Rackable Systems All rights reserved.
+ * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -12,22 +12,20 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#ifndef _LVM_TIMESTAMP_H
-#define _LVM_TIMESTAMP_H
-
-struct timestamp;
+/*
+ * This file must be included first by every lvmpolld source file.
+ */
+#ifndef _LVM_LVMPOLLD_COMMON_H
+#define _LVM_LVMPOLLD_COMMON_H
-struct timestamp *get_timestamp(void);
+#define _REENTRANT
-/* cmp_timestamp: Compare two timestamps
- *
- * Return: -1 if t1 is less than t2
- * 0 if t1 is equal to t2
- * 1 if t1 is greater than t2
- */
-int cmp_timestamp(struct timestamp *t1, struct timestamp *t2);
+#include "tool.h"
-void destroy_timestamp(struct timestamp *t);
+#include "lvmpolld-cmd-utils.h"
+#include "lvmpolld-protocol.h"
-#endif /* _LVM_TIMESTAMP_H */
+#include <assert.h>
+#include <errno.h>
+#endif /* _LVM_LVMPOLLD_COMMON_H */
diff --git a/daemons/lvmpolld/lvmpolld-core.c b/daemons/lvmpolld/lvmpolld-core.c
new file mode 100644
index 000000000..7d2a98b23
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-core.c
@@ -0,0 +1,989 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "lvm-version.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+
+#include <getopt.h>
+#include <poll.h>
+#include <wait.h>
+
+#define LVMPOLLD_SOCKET DEFAULT_RUN_DIR "/lvmpolld.socket"
+
+#define PD_LOG_PREFIX "LVMPOLLD"
+#define LVM2_LOG_PREFIX "\tLVPOLL"
+
+/* predefined reason for response = "failed" case */
+#define REASON_REQ_NOT_IMPLEMENTED "request not implemented"
+#define REASON_MISSING_LVID "request requires lvid set"
+#define REASON_MISSING_LVNAME "request requires lvname set"
+#define REASON_MISSING_VGNAME "request requires vgname set"
+#define REASON_POLLING_FAILED "polling of lvm command failed"
+#define REASON_ILLEGAL_ABORT_REQUEST "abort only supported with PVMOVE polling operation"
+#define REASON_DIFFERENT_OPERATION_IN_PROGRESS "Different operation on LV already in progress"
+#define REASON_INVALID_INTERVAL "request requires interval set"
+#define REASON_ENOMEM "not enough memory"
+
+struct lvmpolld_state {
+ daemon_idle *idle;
+ log_state *log;
+ const char *log_config;
+ const char *lvm_binary;
+
+ struct lvmpolld_store *id_to_pdlv_abort;
+ struct lvmpolld_store *id_to_pdlv_poll;
+};
+
+static pthread_key_t key;
+
+static const char *_strerror_r(int errnum, struct lvmpolld_thread_data *data)
+{
+#ifdef _GNU_SOURCE
+ return strerror_r(errnum, data->buf, sizeof(data->buf)); /* never returns NULL */
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600)
+ return strerror_r(errnum, data->buf, sizeof(data->buf)) ? "" : data->buf;
+#else
+# warning "Can't decide proper strerror_r implementation. lvmpolld will not issue specific system error messages"
+ return "";
+#endif
+}
+
+static void _usage(const char *prog, FILE *file)
+{
+ fprintf(file, "Usage:\n"
+ "%s [-V] [-h] [-f] [-l {all|wire|debug}] [-s path] [-B path] [-p path] [-t secs]\n"
+ "%s --dump [-s path]\n"
+ " -V|--version Show version info\n"
+ " -h|--help Show this help information\n"
+ " -f|--foreground Don't fork, run in the foreground\n"
+ " --dump Dump full lvmpolld state\n"
+ " -l|--log Logging message level (-l {all|wire|debug})\n"
+ " -p|--pidfile Set path to the pidfile\n"
+ " -s|--socket Set path to the communication socket\n"
+ " -B|--binary Path to lvm2 binary\n"
+ " -t|--timeout Time to wait in seconds before shutdown on idle (missing or 0 = inifinite)\n\n", prog, prog);
+}
+
+static int _init(struct daemon_state *s)
+{
+ struct lvmpolld_state *ls = s->private;
+ ls->log = s->log;
+
+ if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1))
+ return 0;
+
+ if (pthread_key_create(&key, lvmpolld_thread_data_destroy)) {
+ FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to create pthread key");
+ return 0;
+ }
+
+ ls->id_to_pdlv_poll = pdst_init("polling");
+ ls->id_to_pdlv_abort = pdst_init("abort");
+
+ if (!ls->id_to_pdlv_poll || !ls->id_to_pdlv_abort) {
+ FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to allocate internal data structures");
+ return 0;
+ }
+
+ ls->lvm_binary = ls->lvm_binary ?: LVM_PATH;
+
+ if (access(ls->lvm_binary, X_OK)) {
+ FATAL(ls, "%s: %s %s", PD_LOG_PREFIX, "Execute access rights denied on", ls->lvm_binary);
+ return 0;
+ }
+
+ if (ls->idle)
+ ls->idle->is_idle = 1;
+
+ return 1;
+}
+
+static void _lvmpolld_stores_lock(struct lvmpolld_state *ls)
+{
+ pdst_lock(ls->id_to_pdlv_poll);
+ pdst_lock(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_stores_unlock(struct lvmpolld_state *ls)
+{
+ pdst_unlock(ls->id_to_pdlv_abort);
+ pdst_unlock(ls->id_to_pdlv_poll);
+}
+
+static void _lvmpolld_global_lock(struct lvmpolld_state *ls)
+{
+ _lvmpolld_stores_lock(ls);
+
+ pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_poll);
+ pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_global_unlock(struct lvmpolld_state *ls)
+{
+ pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_abort);
+ pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_poll);
+
+ _lvmpolld_stores_unlock(ls);
+}
+
+static int _fini(struct daemon_state *s)
+{
+ int done;
+ const struct timespec t = { .tv_nsec = 250000000 }; /* .25 sec */
+ struct lvmpolld_state *ls = s->private;
+
+ DEBUGLOG(s, "fini");
+
+ DEBUGLOG(s, "sending cancel requests");
+
+ _lvmpolld_global_lock(ls);
+ pdst_locked_send_cancel(ls->id_to_pdlv_poll);
+ pdst_locked_send_cancel(ls->id_to_pdlv_abort);
+ _lvmpolld_global_unlock(ls);
+
+ DEBUGLOG(s, "waiting for background threads to finish");
+
+ while(1) {
+ _lvmpolld_stores_lock(ls);
+ done = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+ !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+ _lvmpolld_stores_unlock(ls);
+ if (done)
+ break;
+ nanosleep(&t, NULL);
+ }
+
+ DEBUGLOG(s, "destroying internal data structures");
+
+ _lvmpolld_stores_lock(ls);
+ pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_poll);
+ pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_abort);
+ _lvmpolld_stores_unlock(ls);
+
+ pdst_destroy(ls->id_to_pdlv_poll);
+ pdst_destroy(ls->id_to_pdlv_abort);
+
+ pthread_key_delete(key);
+
+ return 1;
+}
+
+static response reply(const char *res, const char *reason)
+{
+ return daemon_reply_simple(res, "reason = %s", reason, NULL);
+}
+
+static int read_single_line(struct lvmpolld_thread_data *data, int err)
+{
+ ssize_t r = getline(&data->line, &data->line_size, err ? data->ferr : data->fout);
+
+ if (r > 0 && *(data->line + r - 1) == '\n')
+ *(data->line + r - 1) = '\0';
+
+ return (r > 0);
+}
+
+static void update_idle_state(struct lvmpolld_state *ls)
+{
+ if (!ls->idle)
+ return;
+
+ _lvmpolld_stores_lock(ls);
+
+ ls->idle->is_idle = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+ !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+
+ _lvmpolld_stores_unlock(ls);
+
+ DEBUGLOG(ls, "%s: %s %s%s", PD_LOG_PREFIX, "daemon is", ls->idle->is_idle ? "" : "not ", "idle");
+}
+
+/* make this configurable */
+#define MAX_TIMEOUT 2
+
+static int poll_for_output(struct lvmpolld_lv *pdlv, struct lvmpolld_thread_data *data)
+{
+ int ch_stat, r, err = 1, fds_count = 2, timeout = 0;
+ pid_t pid;
+ struct lvmpolld_cmd_stat cmd_state = { .retcode = -1, .signal = 0 };
+ struct pollfd fds[] = { { .fd = data->outpipe[0], .events = POLLIN },
+ { .fd = data->errpipe[0], .events = POLLIN } };
+
+ if (!(data->fout = fdopen(data->outpipe[0], "r")) || !(data->ferr = fdopen(data->errpipe[0], "r"))) {
+ ERROR(pdlv->ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to open file stream",
+ errno, _strerror_r(errno, data));
+ goto out;
+ }
+
+ while (1) {
+ do {
+ r = poll(fds, 2, pdlv_get_timeout(pdlv) * 1000);
+ } while (r < 0 && errno == EINTR);
+
+ DEBUGLOG(pdlv->ls, "%s: %s %d", PD_LOG_PREFIX, "poll() returned", r);
+ if (r < 0) {
+ ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+ PD_LOG_PREFIX, "poll() for LVM2 cmd", pdlv->cmd_pid,
+ errno, _strerror_r(errno, data));
+ goto out;
+ } else if (!r) {
+ timeout++;
+
+ WARN(pdlv->ls, "%s: %s (PID %d) %s", PD_LOG_PREFIX,
+ "polling for output of the lvm cmd", pdlv->cmd_pid,
+ "has timed out");
+
+ if (timeout > MAX_TIMEOUT) {
+ ERROR(pdlv->ls, "%s: %s (PID %d) (no output for %d seconds)",
+ PD_LOG_PREFIX,
+ "LVM2 cmd is unresponsive too long",
+ pdlv->cmd_pid,
+ timeout * pdlv_get_timeout(pdlv));
+ goto out;
+ }
+
+ continue; /* while(1) */
+ }
+
+ timeout = 0;
+
+ /* handle the command's STDOUT */
+ if (fds[0].revents & POLLIN) {
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught input data in STDOUT");
+
+ assert(read_single_line(data, 0)); /* may block indef. anyway */
+ INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+ pdlv->cmd_pid, "STDOUT", data->line);
+ } else if (fds[0].revents) {
+ if (fds[0].revents & POLLHUP)
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught POLLHUP");
+ else
+ WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+ fds[0].fd = -1;
+ fds_count--;
+ }
+
+ /* handle the command's STDERR */
+ if (fds[1].revents & POLLIN) {
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX,
+ "caught input data in STDERR");
+
+ assert(read_single_line(data, 1)); /* may block indef. anyway */
+ INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+ pdlv->cmd_pid, "STDERR", data->line);
+ } else if (fds[1].revents) {
+ if (fds[1].revents & POLLHUP)
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught err POLLHUP");
+ else
+ WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+ fds[1].fd = -1;
+ fds_count--;
+ }
+
+ do {
+ /*
+ * fds_count == 0 means polling reached EOF
+ * or received error on both descriptors.
+ * In such case, just wait for command to finish
+ */
+ pid = waitpid(pdlv->cmd_pid, &ch_stat, fds_count ? WNOHANG : 0);
+ } while (pid < 0 && errno == EINTR);
+
+ if (pid) {
+ if (pid < 0) {
+ ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+ PD_LOG_PREFIX, "waitpid() for lvm2 cmd",
+ pdlv->cmd_pid, errno,
+ _strerror_r(errno, data));
+ goto out;
+ }
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "child exited");
+ break;
+ }
+ } /* while(1) */
+
+ DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "about to collect remaining lines");
+ if (fds[0].fd >= 0)
+ while (read_single_line(data, 0)) {
+ assert(r > 0);
+ INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDOUT", data->line);
+ }
+ if (fds[1].fd >= 0)
+ while (read_single_line(data, 1)) {
+ assert(r > 0);
+ INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDERR", data->line);
+ }
+
+ if (WIFEXITED(ch_stat)) {
+ INFO(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+ "lvm2 cmd", pdlv->cmd_pid, "exited with", WEXITSTATUS(ch_stat));
+ cmd_state.retcode = WEXITSTATUS(ch_stat);
+ } else if (WIFSIGNALED(ch_stat)) {
+ WARN(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+ "lvm2 cmd", pdlv->cmd_pid, "got terminated by signal",
+ WTERMSIG(ch_stat));
+ cmd_state.signal = WTERMSIG(ch_stat);
+ }
+
+ err = 0;
+out:
+ if (!err)
+ pdlv_set_cmd_state(pdlv, &cmd_state);
+
+ return err;
+}
+
+static void debug_print(struct lvmpolld_state *ls, const char * const* ptr)
+{
+ const char * const* tmp = ptr;
+
+ if (!tmp)
+ return;
+
+ while (*tmp) {
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, *tmp);
+ tmp++;
+ }
+}
+
+static void *fork_and_poll(void *args)
+{
+ int outfd, errfd, state;
+ struct lvmpolld_thread_data *data;
+ pid_t r;
+
+ int error = 1;
+ struct lvmpolld_lv *pdlv = (struct lvmpolld_lv *) args;
+ struct lvmpolld_state *ls = pdlv->ls;
+
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+ data = lvmpolld_thread_data_constructor(pdlv);
+ pthread_setspecific(key, data);
+ pthread_setcancelstate(state, &state);
+
+ if (!data) {
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "Failed to initialize per-thread data");
+ goto err;
+ }
+
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd line arguments:");
+ debug_print(ls, pdlv->cmdargv);
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd environment variables:");
+ debug_print(ls, pdlv->cmdenvp);
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+ outfd = data->outpipe[1];
+ errfd = data->errpipe[1];
+
+ r = fork();
+ if (!r) {
+ /* child */
+ /* !!! Do not touch any posix thread primitives !!! */
+
+ if ((dup2(outfd, STDOUT_FILENO ) != STDOUT_FILENO) ||
+ (dup2(errfd, STDERR_FILENO ) != STDERR_FILENO))
+ _exit(LVMPD_RET_DUP_FAILED);
+
+ execve(*(pdlv->cmdargv), (char *const *)pdlv->cmdargv, (char *const *)pdlv->cmdenvp);
+
+ _exit(LVMPD_RET_EXC_FAILED);
+ } else {
+ /* parent */
+ if (r == -1) {
+ ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "fork failed",
+ errno, _strerror_r(errno, data));
+ goto err;
+ }
+
+ INFO(ls, "%s: LVM2 cmd \"%s\" (PID: %d)", PD_LOG_PREFIX, *(pdlv->cmdargv), r);
+
+ pdlv->cmd_pid = r;
+
+ /* failure to close write end of any pipe will result in broken polling */
+ if (close(data->outpipe[1])) {
+ ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of pipe",
+ errno, _strerror_r(errno, data));
+ goto err;
+ }
+ data->outpipe[1] = -1;
+
+ if (close(data->errpipe[1])) {
+ ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of err pipe",
+ errno, _strerror_r(errno, data));
+ goto err;
+ }
+ data->errpipe[1] = -1;
+
+ error = poll_for_output(pdlv, data);
+ DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "polling for lvpoll output has finished");
+ }
+
+err:
+ r = 0;
+
+ pdst_lock(pdlv->pdst);
+
+ if (error) {
+ /* last reader is responsible for pdlv cleanup */
+ r = pdlv->cmd_pid;
+ pdlv_set_error(pdlv, 1);
+ }
+
+ pdlv_set_polling_finished(pdlv, 1);
+ if (data)
+ data->pdlv = NULL;
+
+ pdst_locked_dec(pdlv->pdst);
+
+ pdst_unlock(pdlv->pdst);
+
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+ lvmpolld_thread_data_destroy(data);
+ pthread_setspecific(key, NULL);
+ pthread_setcancelstate(state, &state);
+
+ update_idle_state(ls);
+
+ /*
+ * This is unfortunate case where we
+ * know nothing about state of lvm cmd and
+ * (eventually) ongoing progress.
+ *
+ * harvest zombies
+ */
+ if (r)
+ while(waitpid(r, NULL, 0) < 0 && errno == EINTR);
+
+ return NULL;
+}
+
+static response progress_info(client_handle h, struct lvmpolld_state *ls, request req)
+{
+ char *id;
+ struct lvmpolld_lv *pdlv;
+ struct lvmpolld_store *pdst;
+ struct lvmpolld_lv_state st;
+ response r;
+ const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+ const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+ unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+ if (!lvid)
+ return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+ id = construct_id(sysdir, lvid);
+ if (!id) {
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "progress_info request failed to construct ID.");
+ return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+ }
+
+ DEBUGLOG(ls, "%s: %s: %s", PD_LOG_PREFIX, "ID", id);
+
+ pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+ pdst_lock(pdst);
+
+ pdlv = pdst_locked_lookup(pdst, id);
+ if (pdlv) {
+ /*
+ * with store lock held, I'm the only reader accessing the pdlv
+ */
+ st = pdlv_get_status(pdlv);
+
+ if (st.error || st.polling_finished) {
+ INFO(ls, "%s: %s %s", PD_LOG_PREFIX,
+ "Polling finished. Removing related data structure for LV",
+ lvid);
+ pdst_locked_remove(pdst, id);
+ pdlv_destroy(pdlv);
+ }
+ }
+ /* pdlv must not be dereferenced from now on */
+
+ pdst_unlock(pdst);
+
+ dm_free(id);
+
+ if (pdlv) {
+ if (st.error)
+ return reply(LVMPD_RESP_FAILED, REASON_POLLING_FAILED);
+
+ if (st.polling_finished)
+ r = daemon_reply_simple(LVMPD_RESP_FINISHED,
+ "reason = %s", st.cmd_state.signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE,
+ LVMPD_PARM_VALUE " = %d", (int64_t)(st.cmd_state.signal ?: st.cmd_state.retcode),
+ NULL);
+ else
+ r = daemon_reply_simple(LVMPD_RESP_IN_PROGRESS, NULL);
+ }
+ else
+ r = daemon_reply_simple(LVMPD_RESP_NOT_FOUND, NULL);
+
+ return r;
+}
+
+static struct lvmpolld_lv *construct_pdlv(request req, struct lvmpolld_state *ls,
+ struct lvmpolld_store *pdst,
+ const char *interval, const char *id,
+ const char *vgname, const char *lvname,
+ const char *sysdir, enum poll_type type,
+ unsigned abort_polling, unsigned uinterval)
+{
+ const char **cmdargv, **cmdenvp;
+ struct lvmpolld_lv *pdlv;
+ unsigned handle_missing_pvs = daemon_request_int(req, LVMPD_PARM_HANDLE_MISSING_PVS, 0);
+
+ pdlv = pdlv_create(ls, id, vgname, lvname, sysdir, type,
+ interval, uinterval, pdst);
+
+ if (!pdlv) {
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to create internal LV data structure.");
+ return NULL;
+ }
+
+ cmdargv = cmdargv_ctr(pdlv, pdlv->ls->lvm_binary, abort_polling, handle_missing_pvs);
+ if (!cmdargv) {
+ pdlv_destroy(pdlv);
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd arguments for lvpoll command");
+ return NULL;
+ }
+
+ pdlv->cmdargv = cmdargv;
+
+ cmdenvp = cmdenvp_ctr(pdlv);
+ if (!cmdenvp) {
+ pdlv_destroy(pdlv);
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd environment for lvpoll command");
+ return NULL;
+ }
+
+ pdlv->cmdenvp = cmdenvp;
+
+ return pdlv;
+}
+
+static int spawn_detached_thread(struct lvmpolld_lv *pdlv)
+{
+ int r;
+ pthread_attr_t attr;
+
+ if (pthread_attr_init(&attr) != 0)
+ return 0;
+
+ if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
+ return 0;
+
+ r = pthread_create(&pdlv->tid, &attr, fork_and_poll, (void *)pdlv);
+
+ if (pthread_attr_destroy(&attr) != 0)
+ return 0;
+
+ return !r;
+}
+
+static response poll_init(client_handle h, struct lvmpolld_state *ls, request req, enum poll_type type)
+{
+ char *id;
+ struct lvmpolld_lv *pdlv;
+ struct lvmpolld_store *pdst;
+ unsigned uinterval;
+
+ const char *interval = daemon_request_str(req, LVMPD_PARM_INTERVAL, NULL);
+ const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+ const char *lvname = daemon_request_str(req, LVMPD_PARM_LVNAME, NULL);
+ const char *vgname = daemon_request_str(req, LVMPD_PARM_VGNAME, NULL);
+ const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+ unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+ assert(type < POLL_TYPE_MAX);
+
+ if (abort_polling && type != PVMOVE)
+ return reply(LVMPD_RESP_EINVAL, REASON_ILLEGAL_ABORT_REQUEST);
+
+ if (!interval || strpbrk(interval, "-") || sscanf(interval, "%u", &uinterval) != 1)
+ return reply(LVMPD_RESP_EINVAL, REASON_INVALID_INTERVAL);
+
+ if (!lvname)
+ return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVNAME);
+
+ if (!lvid)
+ return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+ if (!vgname)
+ return reply(LVMPD_RESP_FAILED, REASON_MISSING_VGNAME);
+
+ id = construct_id(sysdir, lvid);
+ if (!id) {
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "poll_init request failed to construct ID.");
+ return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+ }
+
+ DEBUGLOG(ls, "%s: %s=%s", PD_LOG_PREFIX, "ID", id);
+
+ pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+ pdst_lock(pdst);
+
+ pdlv = pdst_locked_lookup(pdst, id);
+ if (pdlv && pdlv_get_polling_finished(pdlv)) {
+ WARN(ls, "%s: %s %s", PD_LOG_PREFIX, "Force removal of uncollected info for LV",
+ lvid);
+ /*
+ * lvmpolld has to remove uncollected results in this case.
+ * otherwise it would have to refuse request for new polling
+ * lv with same id.
+ */
+ pdst_locked_remove(pdst, id);
+ pdlv_destroy(pdlv);
+ pdlv = NULL;
+ }
+
+ if (pdlv) {
+ if (!pdlv_is_type(pdlv, type)) {
+ pdst_unlock(pdst);
+ ERROR(ls, "%s: %s '%s': expected: %s, requested: %s",
+ PD_LOG_PREFIX, "poll operation type mismatch on LV identified by",
+ id,
+ polling_op(pdlv_get_type(pdlv)), polling_op(type));
+ dm_free(id);
+ return reply(LVMPD_RESP_EINVAL,
+ REASON_DIFFERENT_OPERATION_IN_PROGRESS);
+ }
+ pdlv->init_rq_count++; /* safe. protected by store lock */
+ } else {
+ pdlv = construct_pdlv(req, ls, pdst, interval, id, vgname,
+ lvname, sysdir, type, abort_polling, 2 * uinterval);
+ if (!pdlv) {
+ pdst_unlock(pdst);
+ dm_free(id);
+ return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+ }
+ if (!pdst_locked_insert(pdst, id, pdlv)) {
+ pdlv_destroy(pdlv);
+ pdst_unlock(pdst);
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "couldn't store internal LV data structure");
+ dm_free(id);
+ return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+ }
+ if (!spawn_detached_thread(pdlv)) {
+ ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to spawn detached monitoring thread");
+ pdst_locked_remove(pdst, id);
+ pdlv_destroy(pdlv);
+ pdst_unlock(pdst);
+ dm_free(id);
+ return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+ }
+
+ pdst_locked_inc(pdst);
+ if (ls->idle)
+ ls->idle->is_idle = 0;
+ }
+
+ pdst_unlock(pdst);
+
+ dm_free(id);
+
+ return daemon_reply_simple(LVMPD_RESP_OK, NULL);
+}
+
+static response dump_state(client_handle h, struct lvmpolld_state *ls, request r)
+{
+ response res = { 0 };
+ struct buffer *b = &res.buffer;
+
+ buffer_init(b);
+
+ _lvmpolld_global_lock(ls);
+
+ buffer_append(b, "# Registered polling operations\n\n");
+ buffer_append(b, "poll {\n");
+ pdst_locked_dump(ls->id_to_pdlv_poll, b);
+ buffer_append(b, "}\n\n");
+
+ buffer_append(b, "# Registered abort operations\n\n");
+ buffer_append(b, "abort {\n");
+ pdst_locked_dump(ls->id_to_pdlv_abort, b);
+ buffer_append(b, "}");
+
+ _lvmpolld_global_unlock(ls);
+
+ return res;
+}
+
+static response _handler(struct daemon_state s, client_handle h, request r)
+{
+ struct lvmpolld_state *ls = s.private;
+ const char *rq = daemon_request_str(r, "request", "NONE");
+
+ if (!strcmp(rq, LVMPD_REQ_PVMOVE))
+ return poll_init(h, ls, r, PVMOVE);
+ else if (!strcmp(rq, LVMPD_REQ_CONVERT))
+ return poll_init(h, ls, r, CONVERT);
+ else if (!strcmp(rq, LVMPD_REQ_MERGE))
+ return poll_init(h, ls, r, MERGE);
+ else if (!strcmp(rq, LVMPD_REQ_MERGE_THIN))
+ return poll_init(h, ls, r, MERGE_THIN);
+ else if (!strcmp(rq, LVMPD_REQ_PROGRESS))
+ return progress_info(h, ls, r);
+ else if (!strcmp(rq, LVMPD_REQ_DUMP))
+ return dump_state(h, ls, r);
+ else
+ return reply(LVMPD_RESP_EINVAL, REASON_REQ_NOT_IMPLEMENTED);
+}
+
+static int process_timeout_arg(const char *str, unsigned *max_timeouts)
+{
+ char *endptr;
+ unsigned long l;
+
+ errno = 0;
+ l = strtoul(str, &endptr, 10);
+ if (errno || *endptr || l >= UINT_MAX)
+ return 0;
+
+ *max_timeouts = (unsigned) l;
+
+ return 1;
+}
+
+/* Client functionality */
+typedef int (*action_fn_t) (void *args);
+
+struct log_line_baton {
+ const char *prefix;
+};
+
+daemon_handle _lvmpolld = { .error = 0 };
+
+static daemon_handle _lvmpolld_open(const char *socket)
+{
+ daemon_info lvmpolld_info = {
+ .path = "lvmpolld",
+ .socket = socket ?: DEFAULT_RUN_DIR "/lvmpolld.socket",
+ .protocol = LVMPOLLD_PROTOCOL,
+ .protocol_version = LVMPOLLD_PROTOCOL_VERSION
+ };
+
+ return daemon_open(lvmpolld_info);
+}
+
+static void _log_line(const char *line, void *baton) {
+ struct log_line_baton *b = baton;
+ fprintf(stdout, "%s%s\n", b->prefix, line);
+}
+
+static int printout_raw_response(const char *prefix, const char *msg)
+{
+ struct log_line_baton b = { .prefix = prefix };
+ char *buf;
+ char *pos;
+
+ buf = dm_strdup(msg);
+ pos = buf;
+
+ if (!buf)
+ return 0;
+
+ while (pos) {
+ char *next = strchr(pos, '\n');
+ if (next)
+ *next = 0;
+ _log_line(pos, &b);
+ pos = next ? next + 1 : 0;
+ }
+ dm_free(buf);
+
+ return 1;
+}
+
+/* place all action implementations below */
+
+static int action_dump(void *args __attribute__((unused)))
+{
+ daemon_request req;
+ daemon_reply repl;
+ int r = 0;
+
+ req = daemon_request_make(LVMPD_REQ_DUMP);
+ if (!req.cft) {
+ fprintf(stderr, "Failed to create lvmpolld " LVMPD_REQ_DUMP " request.\n");
+ goto out_req;
+ }
+
+ repl = daemon_send(_lvmpolld, req);
+ if (repl.error) {
+ fprintf(stderr, "Failed to send a request or receive response.\n");
+ goto out_rep;
+ }
+
+ /*
+ * This is dumb copy & paste from libdaemon log routines.
+ */
+ if (!printout_raw_response(" ", repl.buffer.mem)) {
+ fprintf(stderr, "Failed to print out the response.\n");
+ goto out_rep;
+ }
+
+ r = 1;
+
+out_rep:
+ daemon_reply_destroy(repl);
+out_req:
+ daemon_request_destroy(req);
+
+ return r;
+}
+
+enum action_index {
+ ACTION_DUMP = 0,
+ ACTION_MAX /* keep at the end */
+};
+
+static const action_fn_t actions[ACTION_MAX] = { [ACTION_DUMP] = action_dump };
+
+static int _make_action(enum action_index idx, void *args)
+{
+ return idx < ACTION_MAX ? actions[idx](args) : 0;
+}
+
+static int _lvmpolld_client(const char *socket, unsigned action)
+{
+ int r;
+
+ _lvmpolld = _lvmpolld_open(socket);
+
+ if (_lvmpolld.error || _lvmpolld.socket_fd < 0) {
+ fprintf(stderr, "Failed to establish connection with lvmpolld.\n");
+ return 0;
+ }
+
+ r = _make_action(action, NULL);
+
+ daemon_close(_lvmpolld);
+
+ return r ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int action_idx = ACTION_MAX;
+static struct option long_options[] = {
+ /* Have actions always at the beginning of the array. */
+ {"dump", no_argument, &action_idx, ACTION_DUMP }, /* or an option_index ? */
+
+ /* other options */
+ {"binary", required_argument, 0, 'B' },
+ {"foreground", no_argument, 0, 'f' },
+ {"help", no_argument, 0, 'h' },
+ {"log", required_argument, 0, 'l' },
+ {"pidfile", required_argument, 0, 'p' },
+ {"socket", required_argument, 0, 's' },
+ {"timeout", required_argument, 0, 't' },
+ {"version", no_argument, 0, 'V' },
+ {0, 0, 0, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+ int opt;
+ int option_index = 0;
+ int client = 0, server = 0;
+ unsigned action = ACTION_MAX;
+ struct timeval timeout;
+ daemon_idle di = { .ptimeout = &timeout };
+ struct lvmpolld_state ls = { .log_config = "" };
+ daemon_state s = {
+ .daemon_fini = _fini,
+ .daemon_init = _init,
+ .handler = _handler,
+ .name = "lvmpolld",
+ .pidfile = getenv("LVM_LVMPOLLD_PIDFILE") ?: LVMPOLLD_PIDFILE,
+ .private = &ls,
+ .protocol = LVMPOLLD_PROTOCOL,
+ .protocol_version = LVMPOLLD_PROTOCOL_VERSION,
+ .socket_path = getenv("LVM_LVMPOLLD_SOCKET") ?: LVMPOLLD_SOCKET,
+ };
+
+ while ((opt = getopt_long(argc, argv, "fhVl:p:s:B:t:", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 0 :
+ if (action < ACTION_MAX) {
+ fprintf(stderr, "Can't perform more actions. Action already requested: %s\n",
+ long_options[action].name);
+ _usage(argv[0], stderr);
+ exit(EXIT_FAILURE);
+ }
+ action = action_idx;
+ client = 1;
+ break;
+ case '?':
+ _usage(argv[0], stderr);
+ exit(EXIT_FAILURE);
+ case 'B': /* --binary */
+ ls.lvm_binary = optarg;
+ server = 1;
+ break;
+ case 'V': /* --version */
+ printf("lvmpolld version: " LVM_VERSION "\n");
+ exit(EXIT_SUCCESS);
+ case 'f': /* --foreground */
+ s.foreground = 1;
+ server = 1;
+ break;
+ case 'h': /* --help */
+ _usage(argv[0], stdout);
+ exit(EXIT_SUCCESS);
+ case 'l': /* --log */
+ ls.log_config = optarg;
+ server = 1;
+ break;
+ case 'p': /* --pidfile */
+ s.pidfile = optarg;
+ server = 1;
+ break;
+ case 's': /* --socket */
+ s.socket_path = optarg;
+ break;
+ case 't': /* --timeout in seconds */
+ if (!process_timeout_arg(optarg, &di.max_timeouts)) {
+ fprintf(stderr, "Invalid value of timeout parameter.\n");
+ exit(EXIT_FAILURE);
+ }
+ /* 0 equals to wait indefinitely */
+ if (di.max_timeouts)
+ s.idle = ls.idle = &di;
+ server = 1;
+ break;
+ }
+ }
+
+ if (client && server) {
+ fprintf(stderr, "Invalid combination of client and server parameters.\n\n");
+ _usage(argv[0], stdout);
+ exit(EXIT_FAILURE);
+ }
+
+ if (client)
+ return _lvmpolld_client(s.socket_path, action);
+
+ /* Server */
+ daemon_start(s);
+
+ return EXIT_SUCCESS;
+}
diff --git a/daemons/lvmpolld/lvmpolld-data-utils.c b/daemons/lvmpolld/lvmpolld-data-utils.c
new file mode 100644
index 000000000..efe31cfd9
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-data-utils.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "config-util.h"
+
+#include <fcntl.h>
+#include <signal.h>
+
+static char *_construct_full_lvname(const char *vgname, const char *lvname)
+{
+ char *name;
+ size_t l;
+
+ l = strlen(vgname) + strlen(lvname) + 2; /* vg/lv and \0 */
+ name = (char *) dm_malloc(l * sizeof(char));
+ if (!name)
+ return NULL;
+
+ if (dm_snprintf(name, l, "%s/%s", vgname, lvname) < 0) {
+ dm_free(name);
+ name = NULL;
+ }
+
+ return name;
+}
+
+static char *_construct_lvm_system_dir_env(const char *sysdir)
+{
+ /*
+ * Store either "LVM_SYSTEM_DIR=/path/to..."
+ * - or -
+ * just single char to store NULL byte
+ */
+ size_t l = sysdir ? strlen(sysdir) + 16 : 1;
+ char *env = (char *) dm_malloc(l * sizeof(char));
+
+ if (!env)
+ return NULL;
+
+ *env = '\0';
+
+ if (sysdir && dm_snprintf(env, l, "LVM_SYSTEM_DIR=%s", sysdir) < 0) {
+ dm_free(env);
+ env = NULL;
+ }
+
+ return env;
+}
+
+static const char *_get_lvid(const char *lvmpolld_id, const char *sysdir)
+{
+ return lvmpolld_id ? (lvmpolld_id + (sysdir ? strlen(sysdir) : 0)) : NULL;
+}
+
+char *construct_id(const char *sysdir, const char *uuid)
+{
+ char *id;
+ int r;
+ size_t l;
+
+ l = strlen(uuid) + (sysdir ? strlen(sysdir) : 0) + 1;
+ id = (char *) dm_malloc(l * sizeof(char));
+ if (!id)
+ return NULL;
+
+ r = sysdir ? dm_snprintf(id, l, "%s%s", sysdir, uuid) :
+ dm_snprintf(id, l, "%s", uuid);
+
+ if (r < 0) {
+ dm_free(id);
+ id = NULL;
+ }
+
+ return id;
+}
+
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+ const char *vgname, const char *lvname,
+ const char *sysdir, enum poll_type type,
+ const char *sinterval, unsigned pdtimeout,
+ struct lvmpolld_store *pdst)
+{
+ char *lvmpolld_id = dm_strdup(id), /* copy */
+ *full_lvname = _construct_full_lvname(vgname, lvname), /* copy */
+ *lvm_system_dir_env = _construct_lvm_system_dir_env(sysdir); /* copy */
+
+ struct lvmpolld_lv tmp = {
+ .ls = ls,
+ .type = type,
+ .lvmpolld_id = lvmpolld_id,
+ .lvid = _get_lvid(lvmpolld_id, sysdir),
+ .lvname = full_lvname,
+ .lvm_system_dir_env = lvm_system_dir_env,
+ .sinterval = dm_strdup(sinterval), /* copy */
+ .pdtimeout = pdtimeout < MIN_POLLING_TIMEOUT ? MIN_POLLING_TIMEOUT : pdtimeout,
+ .cmd_state = { .retcode = -1, .signal = 0 },
+ .pdst = pdst,
+ .init_rq_count = 1
+ }, *pdlv = (struct lvmpolld_lv *) dm_malloc(sizeof(struct lvmpolld_lv));
+
+ if (!pdlv || !tmp.lvid || !tmp.lvname || !tmp.lvm_system_dir_env || !tmp.sinterval)
+ goto err;
+
+ memcpy(pdlv, &tmp, sizeof(*pdlv));
+
+ if (pthread_mutex_init(&pdlv->lock, NULL))
+ goto err;
+
+ return pdlv;
+
+err:
+ dm_free((void *)full_lvname);
+ dm_free((void *)lvmpolld_id);
+ dm_free((void *)lvm_system_dir_env);
+ dm_free((void *)tmp.sinterval);
+ dm_free((void *)pdlv);
+
+ return NULL;
+}
+
+void pdlv_destroy(struct lvmpolld_lv *pdlv)
+{
+ dm_free((void *)pdlv->lvmpolld_id);
+ dm_free((void *)pdlv->lvname);
+ dm_free((void *)pdlv->sinterval);
+ dm_free((void *)pdlv->lvm_system_dir_env);
+ dm_free((void *)pdlv->cmdargv);
+ dm_free((void *)pdlv->cmdenvp);
+
+ pthread_mutex_destroy(&pdlv->lock);
+
+ dm_free((void *)pdlv);
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv)
+{
+ unsigned ret;
+
+ pdlv_lock(pdlv);
+ ret = pdlv->polling_finished;
+ pdlv_unlock(pdlv);
+
+ return ret;
+}
+
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv)
+{
+ struct lvmpolld_lv_state r;
+
+ pdlv_lock(pdlv);
+ r.error = pdlv_locked_error(pdlv);
+ r.polling_finished = pdlv_locked_polling_finished(pdlv);
+ r.cmd_state = pdlv_locked_cmd_state(pdlv);
+ pdlv_unlock(pdlv);
+
+ return r;
+}
+
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state)
+{
+ pdlv_lock(pdlv);
+ pdlv->cmd_state = *cmd_state;
+ pdlv_unlock(pdlv);
+}
+
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error)
+{
+ pdlv_lock(pdlv);
+ pdlv->error = error;
+ pdlv_unlock(pdlv);
+}
+
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished)
+{
+ pdlv_lock(pdlv);
+ pdlv->polling_finished = finished;
+ pdlv_unlock(pdlv);
+}
+
+struct lvmpolld_store *pdst_init(const char *name)
+{
+ struct lvmpolld_store *pdst = (struct lvmpolld_store *) dm_malloc(sizeof(struct lvmpolld_store));
+ if (!pdst)
+ return NULL;
+
+ pdst->store = dm_hash_create(32);
+ if (!pdst->store)
+ goto err_hash;
+ if (pthread_mutex_init(&pdst->lock, NULL))
+ goto err_mutex;
+
+ pdst->name = name;
+ pdst->active_polling_count = 0;
+
+ return pdst;
+
+err_mutex:
+ dm_hash_destroy(pdst->store);
+err_hash:
+ dm_free(pdst);
+ return NULL;
+}
+
+void pdst_destroy(struct lvmpolld_store *pdst)
+{
+ if (!pdst)
+ return;
+
+ dm_hash_destroy(pdst->store);
+ pthread_mutex_destroy(&pdst->lock);
+ dm_free(pdst);
+}
+
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+ struct dm_hash_node *n;
+
+ dm_hash_iterate(n, pdst->store)
+ pdlv_lock(dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+ struct dm_hash_node *n;
+
+ dm_hash_iterate(n, pdst->store)
+ pdlv_unlock(dm_hash_get_data(pdst->store, n));
+}
+
+static void _pdlv_locked_dump(struct buffer *buff, const struct lvmpolld_lv *pdlv)
+{
+ char tmp[1024];
+ const struct lvmpolld_cmd_stat *cmd_state = &pdlv->cmd_state;
+
+ /* pdlv-section { */
+ if (dm_snprintf(tmp, sizeof(tmp), "\t%s {\n", pdlv->lvmpolld_id) > 0)
+ buffer_append(buff, tmp);
+
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvid=\"%s\"\n", pdlv->lvid) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\ttype=\"%s\"\n", polling_op(pdlv->type)) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvname=\"%s\"\n", pdlv->lvname) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvmpolld_internal_timeout=%d\n", pdlv->pdtimeout) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_interval=\"%s\"\n", pdlv->sinterval ?: "<undefined>") > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tLVM_SYSTEM_DIR=\"%s\"\n",
+ (*pdlv->lvm_system_dir_env ? (pdlv->lvm_system_dir_env + strlen("LVM_SYSTEM_DIR=")) : "<undefined>")) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_pid=%d\n", pdlv->cmd_pid) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tpolling_finished=%d\n", pdlv->polling_finished) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\terror_occured=%d\n", pdlv->error) > 0)
+ buffer_append(buff, tmp);
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\tinit_requests_count=%d\n", pdlv->init_rq_count) > 0)
+ buffer_append(buff, tmp);
+
+ /* lvm_commmand-section { */
+ buffer_append(buff, "\t\tlvm_command {\n");
+ if (cmd_state->retcode == -1 && !cmd_state->signal)
+ buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_IN_PROGRESS "\"\n");
+ else {
+ buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_FINISHED "\"\n");
+ if (dm_snprintf(tmp, sizeof(tmp), "\t\t\treason=\"%s\"\n\t\t\tvalue=%d\n",
+ (cmd_state->signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE),
+ (cmd_state->signal ?: cmd_state->retcode)) > 0)
+ buffer_append(buff, tmp);
+ }
+ buffer_append(buff, "\t\t}\n");
+ /* } lvm_commmand-section */
+
+ buffer_append(buff, "\t}\n");
+ /* } pdlv-section */
+}
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff)
+{
+ struct dm_hash_node *n;
+
+ dm_hash_iterate(n, pdst->store)
+ _pdlv_locked_dump(buff, dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst)
+{
+ struct lvmpolld_lv *pdlv;
+ struct dm_hash_node *n;
+
+ dm_hash_iterate(n, pdst->store) {
+ pdlv = dm_hash_get_data(pdst->store, n);
+ if (!pdlv_locked_polling_finished(pdlv))
+ pthread_cancel(pdlv->tid);
+ }
+}
+
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+ struct dm_hash_node *n;
+
+ dm_hash_iterate(n, pdst->store)
+ pdlv_destroy(dm_hash_get_data(pdst->store, n));
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv)
+{
+ struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) dm_malloc(sizeof(struct lvmpolld_thread_data));
+ if (!data)
+ return NULL;
+
+ data->pdlv = NULL;
+ data->line = NULL;
+ data->line_size = 0;
+ data->fout = data->ferr = NULL;
+ data->outpipe[0] = data->outpipe[1] = data->errpipe[0] = data->errpipe[1] = -1;
+
+ if (pipe(data->outpipe) || pipe(data->errpipe)) {
+ lvmpolld_thread_data_destroy(data);
+ return NULL;
+ }
+
+ if (fcntl(data->outpipe[0], F_SETFD, FD_CLOEXEC) ||
+ fcntl(data->outpipe[1], F_SETFD, FD_CLOEXEC) ||
+ fcntl(data->errpipe[0], F_SETFD, FD_CLOEXEC) ||
+ fcntl(data->errpipe[1], F_SETFD, FD_CLOEXEC)) {
+ lvmpolld_thread_data_destroy(data);
+ return NULL;
+ }
+
+ data->pdlv = pdlv;
+
+ return data;
+}
+
+void lvmpolld_thread_data_destroy(void *thread_private)
+{
+ struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) thread_private;
+ if (!data)
+ return;
+
+ if (data->pdlv) {
+ pdst_lock(data->pdlv->pdst);
+ /*
+ * FIXME: skip this step if lvmpolld is activated
+ * by systemd.
+ */
+ if (!pdlv_get_polling_finished(data->pdlv))
+ kill(data->pdlv->cmd_pid, SIGTERM);
+ pdlv_set_polling_finished(data->pdlv, 1);
+ pdst_locked_dec(data->pdlv->pdst);
+ pdst_unlock(data->pdlv->pdst);
+ }
+
+ /* may get reallocated in getline(). dm_free must not be used */
+ free(data->line);
+
+ if (data->fout && !fclose(data->fout))
+ data->outpipe[0] = -1;
+
+ if (data->ferr && !fclose(data->ferr))
+ data->errpipe[0] = -1;
+
+ if (data->outpipe[0] >= 0)
+ (void) close(data->outpipe[0]);
+
+ if (data->outpipe[1] >= 0)
+ (void) close(data->outpipe[1]);
+
+ if (data->errpipe[0] >= 0)
+ (void) close(data->errpipe[0]);
+
+ if (data->errpipe[1] >= 0)
+ (void) close(data->errpipe[1]);
+
+ dm_free(data);
+}
diff --git a/daemons/lvmpolld/lvmpolld-data-utils.h b/daemons/lvmpolld/lvmpolld-data-utils.h
new file mode 100644
index 000000000..e3bede19c
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-data-utils.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVMPOLLD_DATA_UTILS_H
+#define _LVM_LVMPOLLD_DATA_UTILS_H
+
+#include <pthread.h>
+
+struct buffer;
+struct lvmpolld_state;
+
+enum poll_type {
+ PVMOVE = 0,
+ CONVERT,
+ MERGE,
+ MERGE_THIN,
+ POLL_TYPE_MAX
+};
+
+struct lvmpolld_cmd_stat {
+ int retcode;
+ int signal;
+};
+
+struct lvmpolld_store {
+ pthread_mutex_t lock;
+ void *store;
+ const char *name;
+ unsigned active_polling_count;
+};
+
+struct lvmpolld_lv {
+ /*
+ * accessing following vars doesn't
+ * require struct lvmpolld_lv lock
+ */
+ struct lvmpolld_state *const ls;
+ const enum poll_type type;
+ const char *const lvid;
+ const char *const lvmpolld_id;
+ const char *const lvname; /* full vg/lv name */
+ const unsigned pdtimeout; /* in seconds */
+ const char *const sinterval;
+ const char *const lvm_system_dir_env;
+ struct lvmpolld_store *const pdst;
+ const char *const *cmdargv;
+ const char *const *cmdenvp;
+
+ /* only used by write */
+ pid_t cmd_pid;
+ pthread_t tid;
+
+ pthread_mutex_t lock;
+
+ /* block of shared variables protected by lock */
+ struct lvmpolld_cmd_stat cmd_state;
+ unsigned init_rq_count; /* for debuging purposes only */
+ unsigned polling_finished:1; /* no more updates */
+ unsigned error:1; /* unrecoverable error occured in lvmpolld */
+};
+
+typedef void (*lvmpolld_parse_output_fn_t) (struct lvmpolld_lv *pdlv, const char *line);
+
+/* TODO: replace with configuration option */
+#define MIN_POLLING_TIMEOUT 60
+
+struct lvmpolld_lv_state {
+ unsigned error:1;
+ unsigned polling_finished:1;
+ struct lvmpolld_cmd_stat cmd_state;
+};
+
+struct lvmpolld_thread_data {
+ char *line;
+ size_t line_size;
+ int outpipe[2];
+ int errpipe[2];
+ FILE *fout;
+ FILE *ferr;
+ char buf[1024];
+ struct lvmpolld_lv *pdlv;
+};
+
+char *construct_id(const char *sysdir, const char *lvid);
+
+/* LVMPOLLD_LV_T section */
+
+/* only call with appropriate struct lvmpolld_store lock held */
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+ const char *vgname, const char *lvname,
+ const char *sysdir, enum poll_type type,
+ const char *sinterval, unsigned pdtimeout,
+ struct lvmpolld_store *pdst);
+
+/* only call with appropriate struct lvmpolld_store lock held */
+void pdlv_destroy(struct lvmpolld_lv *pdlv);
+
+static inline void pdlv_lock(struct lvmpolld_lv *pdlv)
+{
+ pthread_mutex_lock(&pdlv->lock);
+}
+
+static inline void pdlv_unlock(struct lvmpolld_lv *pdlv)
+{
+ pthread_mutex_unlock(&pdlv->lock);
+}
+
+/*
+ * no struct lvmpolld_lv lock required section
+ */
+static inline int pdlv_is_type(const struct lvmpolld_lv *pdlv, enum poll_type type)
+{
+ return pdlv->type == type;
+}
+
+static inline unsigned pdlv_get_timeout(const struct lvmpolld_lv *pdlv)
+{
+ return pdlv->pdtimeout;
+}
+
+static inline enum poll_type pdlv_get_type(const struct lvmpolld_lv *pdlv)
+{
+ return pdlv->type;
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv);
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv);
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state);
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error);
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished);
+
+/*
+ * struct lvmpolld_lv lock required section
+ */
+static inline struct lvmpolld_cmd_stat pdlv_locked_cmd_state(const struct lvmpolld_lv *pdlv)
+{
+ return pdlv->cmd_state;
+}
+
+static inline int pdlv_locked_polling_finished(const struct lvmpolld_lv *pdlv)
+{
+ return pdlv->polling_finished;
+}
+
+static inline unsigned pdlv_locked_error(const struct lvmpolld_lv *pdlv)
+{
+ return pdlv->error;
+}
+
+/* struct lvmpolld_store manipulation routines */
+
+struct lvmpolld_store *pdst_init(const char *name);
+void pdst_destroy(struct lvmpolld_store *pdst);
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff);
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst);
+
+static inline void pdst_lock(struct lvmpolld_store *pdst)
+{
+ pthread_mutex_lock(&pdst->lock);
+}
+
+static inline void pdst_unlock(struct lvmpolld_store *pdst)
+{
+ pthread_mutex_unlock(&pdst->lock);
+}
+
+static inline void pdst_locked_inc(struct lvmpolld_store *pdst)
+{
+ pdst->active_polling_count++;
+}
+
+static inline void pdst_locked_dec(struct lvmpolld_store *pdst)
+{
+ pdst->active_polling_count--;
+}
+
+static inline unsigned pdst_locked_get_active_count(const struct lvmpolld_store *pdst)
+{
+ return pdst->active_polling_count;
+}
+
+static inline int pdst_locked_insert(struct lvmpolld_store *pdst, const char *key, struct lvmpolld_lv *pdlv)
+{
+ return dm_hash_insert(pdst->store, key, pdlv);
+}
+
+static inline struct lvmpolld_lv *pdst_locked_lookup(struct lvmpolld_store *pdst, const char *key)
+{
+ return dm_hash_lookup(pdst->store, key);
+}
+
+static inline void pdst_locked_remove(struct lvmpolld_store *pdst, const char *key)
+{
+ dm_hash_remove(pdst->store, key);
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv);
+void lvmpolld_thread_data_destroy(void *thread_private);
+
+#endif /* _LVM_LVMPOLLD_DATA_UTILS_H */
diff --git a/daemons/lvmpolld/lvmpolld-protocol.h b/daemons/lvmpolld/lvmpolld-protocol.h
new file mode 100644
index 000000000..1f0d6a6ba
--- /dev/null
+++ b/daemons/lvmpolld/lvmpolld-protocol.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVMPOLLD_PROTOCOL_H
+#define _LVM_LVMPOLLD_PROTOCOL_H
+
+#include "polling_ops.h"
+
+#define LVMPOLLD_PROTOCOL "lvmpolld"
+#define LVMPOLLD_PROTOCOL_VERSION 1
+
+#define LVMPD_REQ_CONVERT CONVERT_POLL
+#define LVMPD_REQ_DUMP "dump"
+#define LVMPD_REQ_MERGE MERGE_POLL
+#define LVMPD_REQ_MERGE_THIN MERGE_THIN_POLL
+#define LVMPD_REQ_PROGRESS "progress_info"
+#define LVMPD_REQ_PVMOVE PVMOVE_POLL
+
+#define LVMPD_PARM_ABORT "abort"
+#define LVMPD_PARM_HANDLE_MISSING_PVS "handle_missing_pvs"
+#define LVMPD_PARM_INTERVAL "interval"
+#define LVMPD_PARM_LVID "lvid"
+#define LVMPD_PARM_LVNAME "lvname"
+#define LVMPD_PARM_SYSDIR "sysdir"
+#define LVMPD_PARM_VALUE "value" /* either retcode or signal value */
+#define LVMPD_PARM_VGNAME "vgname"
+
+#define LVMPD_RESP_FAILED "failed"
+#define LVMPD_RESP_FINISHED "finished"
+#define LVMPD_RESP_IN_PROGRESS "in_progress"
+#define LVMPD_RESP_EINVAL "invalid"
+#define LVMPD_RESP_NOT_FOUND "not_found"
+#define LVMPD_RESP_OK "OK"
+
+#define LVMPD_REAS_RETCODE "retcode" /* lvm cmd ret code */
+#define LVMPD_REAS_SIGNAL "signal" /* lvm cmd terminating singal */
+
+#define LVMPD_RET_DUP_FAILED 100
+#define LVMPD_RET_EXC_FAILED 101
+
+#endif /* _LVM_LVMPOLLD_PROTOCOL_H */
diff --git a/daemons/lvmpolld/polling_ops.h b/daemons/lvmpolld/polling_ops.h
new file mode 100644
index 000000000..9f4dd58a5
--- /dev/null
+++ b/daemons/lvmpolld/polling_ops.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_TOOL_POLLING_OPS_H
+#define _LVM_TOOL_POLLING_OPS_H
+
+/* this file is also part of lvmpolld protocol */
+
+#define PVMOVE_POLL "pvmove"
+#define CONVERT_POLL "convert"
+#define MERGE_POLL "merge"
+#define MERGE_THIN_POLL "merge_thin"
+
+#endif /* _LVM_TOOL_POLLING_OPS_H */
diff --git a/doc/caching_foreign_vgs.txt b/doc/caching_foreign_vgs.txt
new file mode 100644
index 000000000..12ffecab4
--- /dev/null
+++ b/doc/caching_foreign_vgs.txt
@@ -0,0 +1,86 @@
+Q: Why should lvmetad cache foreign VGs?
+A: It's the most useful behavior in the "steady state".
+
+How to arrive at that conclusion.
+Four code configurations to consider, each in two different circumstances.
+
+configurations:
+
+1. lvm not using lvmetad
+2. lvm using lvmetad and lvmlockd
+3. lvm using lvmetad, and lvmetad does not cache foreign VGs
+ (Not currently implemented.)
+4. lvm using lvmetad, and lvmetad caches foreign VGs
+
+circumstances:
+
+A. steady state: PVs are not added or removed to/from foreign VGs
+B. transient state: PVs are added or removed to/from foreign VGs
+
+combinations:
+
+1.A. A PV is correctly shown in the foreign VG.
+1.B. A PV is correctly shown in the foreign VG.
+
+The most accurate representation, at the cost of always scanning disks.
+
+
+2.A. A PV is correctly shown in the foreign VG.
+2.B. A PV is correctly shown in the foreign VG.
+
+The most accurate representation, at the cost of using lvmlockd.
+
+
+3.A. A PV in a foreign VG is shown as unused.
+3.B. A PV in a foreign VG is shown as unused.
+
+If lvmetad ignores foreign VGs and does not cache them, the PVs in the
+foreign VGs appear to be unused. This largely defeats the purpose of
+system_id, which is meant to treat VGs/PVs as foreign instead of free
+(albeit imperfectly, see below.)
+
+
+4.A. A PV is correctly shown in the foreign VG.
+4.B. A PV is not correctly shown in the foreign VG.
+
+This avoids the cost of always scanning disks, and avoids the cost of
+using lvmlockd. The steady state 4.A. is an improvement over the steady
+state 3.A. When the steady state is the common case, this is a big
+advantage. When the steady state is *not* the common case, the foreign VG
+concept is not as useful (if shared devices are this dynamic, lvmlockd
+should be considered.)
+
+The limitations related to the transient state 4.B. are explained in
+lvmsystemid(7), along with how to handle it. The specific inaccuracies
+possible in 4.B. are:
+
+. PV is shown as belonging to a foreign VG, but is actually unused.
+. PV is shown as unused, but actually belongs to a foreign VG.
+
+To resolve the inaccuracies in the transient state (4.B.), and return the
+system to an accurate steady state (4.A.), the disks need to be scanned,
+which updates lvmetad. The scanning/updating is a manual step, i.e.
+running 'pvscan --cache', which by definition scans disks and updates
+lvmetad.
+
+--
+
+The --foreign command line option for report/display commands
+(vgs/lvs/pvs/vgdisplay/lvdisplay/pvdisplay) is not directly related to
+whether or not lvmetad caches foreign VGs.
+
+By default, foreign VGs are silently ignored and not printed by these
+commands. However, when the --foreign option is used, these commands do
+produce output about foreign VGs.
+
+(When --foreign is not used, and the command specifically requests a
+foreign VG by name, an error is produced about not accessing foreign VGs,
+and the foreign VG is not displayed.)
+
+The decision to report/display foreign VGs or not is independent of
+whether lvmetad is caching those VGs. When lvmetad is caching the foreign
+VG, a report/display command run with --foreign will scan disks to read
+the foreign VG and give the most up to date version of it (the copy of the
+foreign VG in lvmetad may be out of date due to changes to the VG by the
+foreign host.)
+
diff --git a/doc/kernel/cache-policies.txt b/doc/kernel/cache-policies.txt
index d7c440b44..0d124a971 100644
--- a/doc/kernel/cache-policies.txt
+++ b/doc/kernel/cache-policies.txt
@@ -30,28 +30,48 @@ multiqueue
This policy is the default.
-The multiqueue policy has two sets of 16 queues: one set for entries
-waiting for the cache and another one for those in the cache.
+The multiqueue policy has three sets of 16 queues: one set for entries
+waiting for the cache and another two for those in the cache (a set for
+clean entries and a set for dirty entries).
+
Cache entries in the queues are aged based on logical time. Entry into
the cache is based on variable thresholds and queue selection is based
on hit count on entry. The policy aims to take different cache miss
costs into account and to adjust to varying load patterns automatically.
Message and constructor argument pairs are:
- 'sequential_threshold <#nr_sequential_ios>' and
- 'random_threshold <#nr_random_ios>'.
+ 'sequential_threshold <#nr_sequential_ios>'
+ 'random_threshold <#nr_random_ios>'
+ 'read_promote_adjustment <value>'
+ 'write_promote_adjustment <value>'
+ 'discard_promote_adjustment <value>'
The sequential threshold indicates the number of contiguous I/Os
-required before a stream is treated as sequential. The random threshold
+required before a stream is treated as sequential. Once a stream is
+considered sequential it will bypass the cache. The random threshold
is the number of intervening non-contiguous I/Os that must be seen
before the stream is treated as random again.
The sequential and random thresholds default to 512 and 4 respectively.
-Large, sequential ios are probably better left on the origin device
-since spindles tend to have good bandwidth. The io_tracker counts
-contiguous I/Os to try to spot when the io is in one of these sequential
-modes.
+Large, sequential I/Os are probably better left on the origin device
+since spindles tend to have good sequential I/O bandwidth. The
+io_tracker counts contiguous I/Os to try to spot when the I/O is in one
+of these sequential modes. But there are use-cases for wanting to
+promote sequential blocks to the cache (e.g. fast application startup).
+If sequential threshold is set to 0 the sequential I/O detection is
+disabled and sequential I/O will no longer implicitly bypass the cache.
+Setting the random threshold to 0 does _not_ disable the random I/O
+stream detection.
+
+Internally the mq policy determines a promotion threshold. If the hit
+count of a block not in the cache goes above this threshold it gets
+promoted to the cache. The read, write and discard promote adjustment
+tunables allow you to tweak the promotion threshold by adding a small
+value based on the io type. They default to 4, 8 and 1 respectively.
+If you're trying to quickly warm a new cache device you may wish to
+reduce these to encourage promotion. Remember to switch them back to
+their defaults after the cache fills though.
cleaner
-------
diff --git a/doc/kernel/cache.txt b/doc/kernel/cache.txt
index f50470abe..68c0f517c 100644
--- a/doc/kernel/cache.txt
+++ b/doc/kernel/cache.txt
@@ -50,14 +50,16 @@ other parameters detailed later):
which are dirty, and extra hints for use by the policy object.
This information could be put on the cache device, but having it
separate allows the volume manager to configure it differently,
- e.g. as a mirror for extra robustness.
+ e.g. as a mirror for extra robustness. This metadata device may only
+ be used by a single cache device.
Fixed block size
----------------
The origin is divided up into blocks of a fixed size. This block size
is configurable when you first create the cache. Typically we've been
-using block sizes of 256k - 1024k.
+using block sizes of 256KB - 1024KB. The block size must be between 64
+(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).
Having a fixed block size simplifies the target a lot. But it is
something of a compromise. For instance, a small part of a block may be
@@ -66,10 +68,11 @@ So large block sizes are bad because they waste cache space. And small
block sizes are bad because they increase the amount of metadata (both
in core and on disk).
-Writeback/writethrough
-----------------------
+Cache operating modes
+---------------------
-The cache has two modes, writeback and writethrough.
+The cache has three operating modes: writeback, writethrough and
+passthrough.
If writeback, the default, is selected then a write to a block that is
cached will go only to the cache and the block will be marked dirty in
@@ -79,15 +82,38 @@ If writethrough is selected then a write to a cached block will not
complete until it has hit both the origin and cache devices. Clean
blocks should remain clean.
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates. To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency. Coherency that exists is maintained, although
+the cache will gradually cool as writes take place. If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm. Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache. Useful for decommissioning a cache.
+dirty blocks in a cache. Useful for decommissioning a cache or when
+shrinking a cache. Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean. If the
+area being removed from the cache still contains dirty blocks the resize
+will fail. Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean. This is of particular
+importance if writeback mode is used. Writethrough and passthrough
+modes already maintain a clean cache. Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.
Migration throttling
--------------------
Migrating data between the origin and cache device uses bandwidth.
The user can set a throttle to prevent more than a certain amount of
-migration occuring at any one time. Currently we're not taking any
+migration occurring at any one time. Currently we're not taking any
account of normal io traffic going to the devices. More work needs
doing here to avoid migrating during those peak io moments.
@@ -98,12 +124,11 @@ the default being 204800 sectors (or 100MB).
Updating on-disk metadata
-------------------------
-On-disk metadata is committed every time a REQ_SYNC or REQ_FUA bio is
-written. If no such requests are made then commits will occur every
-second. This means the cache behaves like a physical disk that has a
-write cache (the same is true of the thin-provisioning target). If
-power is lost you may lose some recent writes. The metadata should
-always be consistent in spite of any crash.
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the cache behaves like a physical disk that has a volatile write
+cache. If power is lost you may lose some recent writes. The metadata
+should always be consistent in spite of any crash.
The 'dirty' state for a cache block changes far too frequently for us
to keep updating it on the fly. So we treat it as a hint. In normal
@@ -159,7 +184,7 @@ Constructor
block size : cache unit size in sectors
#feature args : number of feature arguments passed
- feature args : writethrough. (The default is writeback.)
+ feature args : writethrough or passthrough (The default is writeback.)
policy : the replacement policy to use
#policy args : an even number of arguments corresponding to
@@ -175,6 +200,13 @@ Optional feature arguments are:
back cache block contents later for performance reasons,
so they may differ from the corresponding origin blocks.
+ passthrough : a degraded mode useful for various cache coherency
+ situations (e.g., rolling back snapshots of
+ underlying storage). Reads and writes always go to
+ the origin. If a write goes to a cached origin
+ block, then the cache block is invalidated.
+ To enable passthrough mode the cache must be clean.
+
A policy called 'default' is always registered. This is an alias for
the policy we currently think is giving best all round performance.
@@ -184,36 +216,43 @@ the characteristics of a specific policy, always request it by name.
Status
------
-<#used metadata blocks>/<#total metadata blocks> <#read hits> <#read misses>
-<#write hits> <#write misses> <#demotions> <#promotions> <#blocks in cache>
-<#dirty> <#features> <features>* <#core args> <core args>* <#policy args>
-<policy args>*
-
-#used metadata blocks : Number of metadata blocks used
-#total metadata blocks : Total number of metadata blocks
-#read hits : Number of times a READ bio has been mapped
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<cache block size> <#used cache blocks>/<#total cache blocks>
+<#read hits> <#read misses> <#write hits> <#write misses>
+<#demotions> <#promotions> <#dirty> <#features> <features>*
+<#core args> <core args>* <policy name> <#policy args> <policy args>*
+
+metadata block size : Fixed block size for each metadata block in
+ sectors
+#used metadata blocks : Number of metadata blocks used
+#total metadata blocks : Total number of metadata blocks
+cache block size : Configurable block size for the cache device
+ in sectors
+#used cache blocks : Number of blocks resident in the cache
+#total cache blocks : Total number of cache blocks
+#read hits : Number of times a READ bio has been mapped
to the cache
-#read misses : Number of times a READ bio has been mapped
+#read misses : Number of times a READ bio has been mapped
to the origin
-#write hits : Number of times a WRITE bio has been mapped
+#write hits : Number of times a WRITE bio has been mapped
to the cache
-#write misses : Number of times a WRITE bio has been
+#write misses : Number of times a WRITE bio has been
mapped to the origin
-#demotions : Number of times a block has been removed
+#demotions : Number of times a block has been removed
from the cache
-#promotions : Number of times a block has been moved to
+#promotions : Number of times a block has been moved to
the cache
-#blocks in cache : Number of blocks resident in the cache
-#dirty : Number of blocks in the cache that differ
+#dirty : Number of blocks in the cache that differ
from the origin
-#feature args : Number of feature args to follow
-feature args : 'writethrough' (optional)
-#core args : Number of core arguments (must be even)
-core args : Key/value pairs for tuning the core
+#feature args : Number of feature args to follow
+feature args : 'writethrough' (optional)
+#core args : Number of core arguments (must be even)
+core args : Key/value pairs for tuning the core
e.g. migration_threshold
-#policy args : Number of policy arguments to follow (must be even)
-policy args : Key/value pairs
- e.g. 'sequential_threshold 1024
+policy name : Name of the policy
+#policy args : Number of policy arguments to follow (must be even)
+policy args : Key/value pairs
+ e.g. sequential_threshold
Messages
--------
@@ -229,12 +268,28 @@ The message format is:
E.g.
dmsetup message my_cache 0 sequential_threshold 1024
+
+Invalidation is removing an entry from the cache without writing it
+back. Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges. Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9. Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexidecimal may be needed to better support efficient
+invalidation of larger caches. The cache must be in passthrough mode
+when invalidate_cblocks is used.
+
+ invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.
+ dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
Examples
========
The test suite can be found here:
-https://github.com/jthornber/thinp-test-suite
+https://github.com/jthornber/device-mapper-test-suite
dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
/dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
diff --git a/doc/kernel/crypt.txt b/doc/kernel/crypt.txt
index 2c656ae43..692171fe9 100644
--- a/doc/kernel/crypt.txt
+++ b/doc/kernel/crypt.txt
@@ -4,12 +4,15 @@ dm-crypt
Device-Mapper's "crypt" target provides transparent encryption of block devices
using the kernel crypto API.
+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
Parameters: <cipher> <key> <iv_offset> <device path> \
<offset> [<#opt_params> <opt_params>]
<cipher>
Encryption cipher and an optional IV generation mode.
- (In format cipher[:keycount]-chainmode-ivopts:ivmode).
+ (In format cipher[:keycount]-chainmode-ivmode[:ivopts]).
Examples:
des
aes-cbc-essiv:sha256
@@ -19,7 +22,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
<key>
Key used for encryption. It is encoded as a hexadecimal number.
- You can only use key sizes that are valid for the selected cipher.
+ You can only use key sizes that are valid for the selected cipher
+ in combination with the selected iv mode.
+ Note that for some iv modes the key string can contain additional
+ keys (for example IV seed) so the key contains more parts concatenated
+ into a single string.
<keycount>
Multi-key compatibility mode. You can define <keycount> keys and
@@ -44,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
Otherwise #opt_params is the number of following arguments.
Example of optional parameters section:
- 1 allow_discards
+ 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
allow_discards
Block discard requests (a.k.a. TRIM) are passed through the crypt device.
@@ -56,11 +63,24 @@ allow_discards
used space etc.) if the discarded blocks can be located easily on the
device later.
+same_cpu_crypt
+ Perform encryption using the same cpu that IO was submitted on.
+ The default is to use an unbound workqueue so that encryption work
+ is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+ Disable offloading writes to a separate thread after encryption.
+ There are some situations where offloading write bios from the
+ encryption threads to a single thread degrades performance
+ significantly. The default is to offload write bios to the same
+ thread because it benefits CFQ to have writes submitted using the
+ same context.
+
Example scripts
===============
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
encryption with dm-crypt using the 'cryptsetup' utility, see
-http://code.google.com/p/cryptsetup/
+https://gitlab.com/cryptsetup/cryptsetup
[[
#!/bin/sh
diff --git a/doc/kernel/era.txt b/doc/kernel/era.txt
new file mode 100644
index 000000000..3c6d01be3
--- /dev/null
+++ b/doc/kernel/era.txt
@@ -0,0 +1,108 @@
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target. In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'. Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+ era <metadata dev> <origin dev> <block size>
+
+ metadata dev : fast device holding the persistent metadata
+ origin dev : device holding data blocks that may change
+ block size : block size of origin data device, granularity that is
+ tracked by the target
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era. You shouldn't assume the era has
+incremented. After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+metadata block size : Fixed block size for each metadata block in
+ sectors
+#used metadata blocks : Number of metadata blocks used
+#total metadata blocks : Total number of metadata blocks
+current era : The current era
+held metadata root : The location, in blocks, of the metadata root
+ that has been 'held' for userspace read
+ access. '-' indicates there is no held root
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+ associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+ by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era. It also
+has a spare bitset ready for switching over to a new era. Other than
+that it uses a few 4k blocks for updating metadata.
+
+ (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed. As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+ https://github.com/jthornber/thin-provisioning-tools
diff --git a/doc/kernel/log-writes.txt b/doc/kernel/log-writes.txt
new file mode 100644
index 000000000..c10f30c9b
--- /dev/null
+++ b/doc/kernel/log-writes.txt
@@ -0,0 +1,140 @@
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to. This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log. The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache. This means that normal WRITE requests are not actually logged until the
+next REQ_FLUSH request. This is to make it easier for userspace to replay the
+log in a way that correlates to what is on disk and not what is in cache, to
+make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_FLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH. Only
+completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures. Consider the
+following example (W means write, C means complete):
+
+W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following
+
+W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request. Consider the following example:
+
+WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this
+
+DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+ log-writes <dev_path> <log_dev_path>
+
+ dev_path : Device that all of the IO will go to normally.
+ log_dev_path : Device where the log entries are written to.
+
+ii) Status
+
+ <#logged entries> <highest allocated sector>
+
+ #logged entries : Number of logged entries
+ highest allocated sector : Highest allocated sector
+
+iii) Messages
+
+ mark <description>
+
+ You can use a dmsetup message to set an arbitrary mark in a log.
+ For example say you want to fsck a file system after every
+ write, but first you need to replay up to the mkfs to make sure
+ we're fsck'ing something reasonable, you would do something like
+ this:
+
+ mkfs.btrfs -f /dev/mapper/log
+ dmsetup message log 0 mark mkfs
+ <run test>
+
+ This would allow you to replay the log up to the mkfs mark and
+ then replay from that point on doing the fsck check in the
+ interval that you want.
+
+ Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system. You would do something like
+this:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<some test that does fsync at the end>
+dmsetup message log 0 mark fsync
+md5sum /mnt/btrfs-test/foo
+umount /mnt/btrfs-test
+
+dmsetup remove log
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+mount /dev/sdb /mnt/btrfs-test
+md5sum /mnt/btrfs-test/foo
+<verify md5sum's are correct>
+
+Another option is to do a complicated file system operation and verify the file
+system is consistent during the entire operation. You could do this with:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<fsstress to dirty the fs>
+btrfs filesystem balance /mnt/btrfs-test
+umount /mnt/btrfs-test
+dmsetup remove log
+
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+btrfsck /dev/sdb
+replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+ --fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
diff --git a/doc/kernel/raid.txt b/doc/kernel/raid.txt
index e9192283e..ef8ba9fa5 100644
--- a/doc/kernel/raid.txt
+++ b/doc/kernel/raid.txt
@@ -222,3 +222,5 @@ Version History
1.4.2 Add RAID10 "far" and "offset" algorithm support.
1.5.0 Add message interface to allow manipulation of the sync_action.
New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+1.5.1 Add ability to restore transiently failed devices on resume.
+1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
diff --git a/doc/kernel/statistics.txt b/doc/kernel/statistics.txt
new file mode 100644
index 000000000..2a1673adc
--- /dev/null
+++ b/doc/kernel/statistics.txt
@@ -0,0 +1,186 @@
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device. If no regions are defined no statistics are
+collected so there isn't any performance impact. Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as /sys/block/*/stat or /proc/diskstats (see:
+Documentation/iostats.txt). But two extra counters (12 and 13) are
+provided: total time spent reading and writing in milliseconds. All
+these counters may be accessed by sending the @stats_print message to
+the appropriate DM device via dmsetup.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created. The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc. Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space. At most, 1/4 of the overall system
+memory may be allocated by DM statistics. The admin can see how much
+memory is used by reading
+/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+ @stats_create <range> <step> [<program_id> [<aux_data>]]
+
+ Create a new region and return the region_id.
+
+ <range>
+ "-" - whole device
+ "<start_sector>+<length>" - a range of <length> 512-byte sectors
+ starting with <start_sector>.
+
+ <step>
+ "<area_size>" - the range is subdivided into areas each containing
+ <area_size> sectors.
+ "/<number_of_areas>" - the range is subdivided into the specified
+ number of areas.
+
+ <program_id>
+ An optional parameter. A name that uniquely identifies
+ the userspace owner of the range. This groups ranges together
+ so that userspace programs can identify the ranges they
+ created and ignore those created by others.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use it for anything else.
+
+ <aux_data>
+ An optional parameter. A word that provides auxiliary data
+ that is useful to the client program that created the range.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use this value for anything.
+
+ @stats_delete <region_id>
+
+ Delete the region with the specified id.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_clear <region_id>
+
+ Clear all the counters except the in-flight i/o counters.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_list [<program_id>]
+
+ List all regions registered with @stats_create.
+
+ <program_id>
+ An optional parameter.
+ If this parameter is specified, only matching regions
+ are returned.
+ If it is not specified, all regions are returned.
+
+ Output format:
+ <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+
+ @stats_print <region_id> [<starting_line> <number_of_lines>]
+
+ Print counters for each step-sized area of a region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are returned.
+
+ <number_of_lines>
+ The number of lines to include in the output.
+ If omitted, all lines are returned.
+
+ Output format for each step-sized area of a region:
+
+ <start_sector>+<length> counters
+
+ The first 11 counters have the same meaning as
+ /sys/block/*/stat or /proc/diskstats.
+
+ Please refer to Documentation/iostats.txt for details.
+
+ 1. the number of reads completed
+ 2. the number of reads merged
+ 3. the number of sectors read
+ 4. the number of milliseconds spent reading
+ 5. the number of writes completed
+ 6. the number of writes merged
+ 7. the number of sectors written
+ 8. the number of milliseconds spent writing
+ 9. the number of I/Os currently in progress
+ 10. the number of milliseconds spent doing I/Os
+ 11. the weighted number of milliseconds spent doing I/Os
+
+ Additional counters:
+ 12. the total time spent reading in milliseconds
+ 13. the total time spent writing in milliseconds
+
+ @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+
+ Atomically print and then clear all the counters except the
+ in-flight i/o counters. Useful when the client consuming the
+ statistics does not want to lose any statistics (those updated
+ between printing and clearing).
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are printed and then cleared.
+
+ <number_of_lines>
+ The number of lines to process.
+ If omitted, all lines are printed and then cleared.
+
+ @stats_set_aux <region_id> <aux_data>
+
+ Store auxiliary data aux_data for the specified region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <aux_data>
+ The string that identifies data which is useful to the client
+ program that created the range. The kernel returns this
+ string back in the output of @stats_list message, but it
+ doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them:
+
+ dmsetup message vol 0 @stats_create - /100
+
+Set the auxillary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them):
+
+ dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics:
+
+ dmsetup message vol 0 @stats_list
+
+Print the statistics:
+
+ dmsetup message vol 0 @stats_print 0
+
+Delete the statistics:
+
+ dmsetup message vol 0 @stats_delete 0
diff --git a/doc/kernel/switch.txt b/doc/kernel/switch.txt
new file mode 100644
index 000000000..424835e57
--- /dev/null
+++ b/doc/kernel/switch.txt
@@ -0,0 +1,138 @@
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths. The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture. In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters. When a LUN
+is created it is spread across multiple members. The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used. When iSCSI sessions are created, each
+session is connected to an eth port on a single member. Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required. This
+forwarding is invisible to the initiator. The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators. In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth. An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets. However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+ Upper Tier - Determine which array member the I/O should be sent to.
+ Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths. We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device. This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O. By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us). This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+ <num_paths> <region_size> <num_optional_args> [<optional_args>...]
+ [<dev_path> <offset>]+
+
+<num_paths>
+ The number of paths across which to distribute the I/O.
+
+<region_size>
+ The number of 512-byte sectors in a region. Each region can be redirected
+ to any of the available paths.
+
+<num_optional_args>
+ The number of optional arguments. Currently, no optional arguments
+ are supported and so this must be zero.
+
+<dev_path>
+ The block device that represents a specific path to the device.
+
+<offset>
+ The offset of the start of data on the specific <dev_path> (in units
+ of 512-byte sectors). This number is added to the sector number when
+ forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+ The region number (region size was specified in constructor parameters).
+ If index is omitted, the next region (previous index + 1) is used.
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+ The path number in the range 0 ... (<num_paths> - 1).
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+ This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+ are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+ slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size:
+ dmsetup create switch --table "0 `blockdev --getsize /dev/vg1/switch0`
+ switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1:
+ dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+is equivalent to:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+ :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+
diff --git a/doc/kernel/thin-provisioning.txt b/doc/kernel/thin-provisioning.txt
index 30b8b83bd..4f67578b2 100644
--- a/doc/kernel/thin-provisioning.txt
+++ b/doc/kernel/thin-provisioning.txt
@@ -99,13 +99,14 @@ Using an existing pool device
$data_block_size $low_water_mark"
$data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors. People
-primarily interested in thin provisioning may want to use a value such
-as 1024 (512KB). People doing lots of snapshotting may want a smaller value
-such as 128 (64KB). If you are not zeroing newly-allocated data,
-a larger $data_block_size in the region of 256000 (128MB) is suggested.
-$data_block_size must be the same for the lifetime of the
-metadata device.
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB). $data_block_size cannot be changed after the
+thin-pool is created. People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB). People doing lots of
+snapshotting may want a smaller value such as 128 (64KB). If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.
$low_water_mark is expressed in blocks of size $data_block_size. If
free space on the data device drops below this level then a dm event
@@ -115,6 +116,35 @@ Resuming a device with a new table itself triggers an event so the
userspace daemon can use this to detect a situation where a new table
already exceeds the threshold.
+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache. If power is lost you may lose some recent
+writes. The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space). If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation. Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed. It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted. Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
Thin provisioning
-----------------
@@ -234,6 +264,8 @@ i) Constructor
read_only: Don't allow any changes to be made to the pool
metadata.
+ error_if_no_space: Error IOs, instead of queueing, if no space.
+
Data block size must be between 64KB (128 sectors) and 1GB
(2097152 sectors) inclusive.
@@ -255,10 +287,9 @@ ii) Status
should register for the event and then check the target's status.
held metadata root:
- The location, in sectors, of the metadata root that has been
+ The location, in blocks, of the metadata root that has been
'held' for userspace read access. '-' indicates there is no
- held root. This feature is not yet implemented so '-' is
- always returned.
+ held root.
discard_passdown|no_discard_passdown
Whether or not discards are actually being passed down to the
@@ -275,6 +306,14 @@ ii) Status
contain the string 'Fail'. The userspace recovery tools
should then be used.
+ error_if_no_space|queue_if_no_space
+ If the pool runs out of data or metadata space, the pool will
+ either queue or error the IO destined to the data device. The
+ default is to queue the IO until more space is added or the
+ 'no_space_timeout' expires. The 'no_space_timeout' dm-thin-pool
+ module parameter can be used to change this timeout -- it
+ defaults to 60 seconds but may be disabled using a value of 0.
+
iii) Messages
create_thin <dev id>
@@ -341,9 +380,6 @@ then you'll have no access to blocks mapped beyond the end. If you
load a target that is bigger than before, then extra blocks will be
provisioned as and when needed.
-If you wish to reduce the size of your thin device and potentially
-regain some space then send the 'trim' message to the pool.
-
ii) Status
<nr mapped sectors> <highest mapped sector>
diff --git a/doc/kernel/verity.txt b/doc/kernel/verity.txt
index 988468153..e15bc1a0f 100644
--- a/doc/kernel/verity.txt
+++ b/doc/kernel/verity.txt
@@ -11,6 +11,7 @@ Construction Parameters
<data_block_size> <hash_block_size>
<num_data_blocks> <hash_start_block>
<algorithm> <digest> <salt>
+ [<#opt_params> <opt_params>]
<version>
This is the type of the on-disk hash format.
@@ -62,6 +63,22 @@ Construction Parameters
<salt>
The hexadecimal encoding of the salt value.
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional paramaters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 1 ignore_corruption
+
+ignore_corruption
+ Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+ Restart the system when a corrupted block is discovered. This option is
+ not compatible with ignore_corruption and requires user space support to
+ avoid restart loops.
+
Theory of operation
===================
@@ -125,7 +142,7 @@ block boundary) are the hash blocks which are stored a depth at a time
The full specification of kernel parameters and on-disk metadata format
is available at the cryptsetup project's wiki page
- http://code.google.com/p/cryptsetup/wiki/DMVerity
+ https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
Status
======
@@ -142,7 +159,7 @@ Set up a device:
A command line tool veritysetup is available to compute or verify
the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository http://code.google.com/p/cryptsetup/
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
(as a libcryptsetup extension).
Create hash on the device:
diff --git a/doc/lvmetad_design.txt b/doc/lvmetad_design.txt
index 3b336eced..1961cfbd8 100644
--- a/doc/lvmetad_design.txt
+++ b/doc/lvmetad_design.txt
@@ -137,6 +137,17 @@ hosts. Overall, this is not hard, but the devil is in the details. I would
possibly disable lvmetad for clustered volume groups in the first phase and
only proceed when the local mode is robust and well tested.
+With lvmlockd, lvmetad state is kept up to date by flagging either an
+individual VG as "invalid", or the global state as "invalid". When either
+the VG or the global state are read, this invalid flag is returned along
+with the data. The client command can check for this invalid state and
+decide to read the information from disk rather than use the stale cached
+data. After the latest data is read from disk, the command may choose to
+send it to lvmetad to update the cache. lvmlockd uses version numbers
+embedded in its VG and global locks to detect when cached data becomes
+invalid, and it then tells lvmetad to set the related invalid flag.
+dct, 2015-06-23
+
Protocol & co.
--------------
diff --git a/doc/lvmpolld_overview.txt b/doc/lvmpolld_overview.txt
new file mode 100644
index 000000000..8c66e5e1a
--- /dev/null
+++ b/doc/lvmpolld_overview.txt
@@ -0,0 +1,81 @@
+LVM poll daemon overview
+========================
+
+(last updated: 2015-05-09)
+
+LVM poll daemon (lvmpolld) is the alternative for lvm2 classical polling
+mechanisms. The motivation behind new lvmpolld was to create persistent
+system service that would be more durable and transparent. It's suited
+particularly for any systemd enabled distribution.
+
+Before lvmpolld any background polling process originating in a lvm2 command
+initiated inside cgroup of a systemd service could get killed if the main
+process (service) exited in such cgroup. That could lead to premature termination
+of such lvm2 polling process.
+
+Also without lvmpolld there were no means to detect a particular polling process
+suited for monitoring of specific operation is already in-progress and therefore
+it's not desirable to start next one with exactly same task. lvmpolld is able to
+detect such duplicate requests and not spawn such redundant process.
+
+lvmpolld is primarily targeted for systems with systemd as init process. For systems
+without systemd there's no need to install lvmpolld because there is no issue
+with observation described in second paragraph. You can still benefit from
+avoiding duplicate polling process being spawned, but without systemd lvmpolld
+can't easily be run on-demand (activated by a socket maintained by systemd).
+
+lvmpolld implement shutdown on idle and can shutdown automatically when idle
+for requested time. 60 second is recommended default here. This behaviour can be
+turned off if found useless.
+
+Data structures
+---------------
+
+a) Logical Volume (struct lvmpolld_lv)
+
+Each operation is identified by LV. Internal identifier within lvmpolld
+is full LV uuid (vg_uuid+lv_uuid) prefixed with LVM_SYSTEM_DIR if set by client.
+
+such full identifier may look like:
+
+ "/etc/lvm/lvm.confWFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+or without LVM_SYSTEM_DIR being set explicitly:
+
+ "WFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+
+LV carries various metadata about polling operation. The most significant are:
+
+VG name
+LV name
+polling interval (usually --interval passed to lvm2 command or default from lvm2
+ configuration)
+operation type (one of: pvmove, convert, merge, thin_merge)
+LVM_SYSTEM_DIR (if set, this is also passed among environment variables of lvpoll
+ command spawned by lvmpolld)
+
+b) LV stores (struct lvmpolld_store)
+
+lvmpolld uses two stores for Logical volumes (struct lvmpolld_lv). One store for polling
+operations in-progress. These operations are as of now: PV move, mirror up-conversion,
+classical snapshot merge, thin snapshot merge.
+
+The second store is suited only for pvmove --abort operations in-progress. Both
+stores are independent and identical LVs (pvmove /dev/sda3 and pvmove --abort /dev/sda3)
+can be run concurently from lvmpolld point of view (on lvm2 side the consistency is
+guaranteed by lvm2 locking mechanism).
+
+Locking order
+-------------
+
+There are two types of locks in lvmpolld. Each store has own store lock and each LV has
+own lv lock.
+
+Locking order is:
+1) store lock
+2) LV lock
+
+Each LV has to be inside a store. When daemon requires to take both locks it has
+to take a store lock first and LV lock has to be taken afterwards (after the
+appropriate store lock where the LV is being stored :))
diff --git a/include/.symlinks.in b/include/.symlinks.in
index 48c4d9d9c..e78072a1d 100644
--- a/include/.symlinks.in
+++ b/include/.symlinks.in
@@ -1,11 +1,15 @@
@top_srcdir@/daemons/clvmd/clvm.h
@top_srcdir@/daemons/dmeventd/libdevmapper-event.h
@top_srcdir@/daemons/lvmetad/lvmetad-client.h
+@top_srcdir@/daemons/lvmpolld/lvmpolld-protocol.h
+@top_srcdir@/daemons/lvmpolld/polling_ops.h
+@top_srcdir@/daemons/lvmlockd/lvmlockd-client.h
@top_srcdir@/liblvm/lvm2app.h
@top_srcdir@/lib/activate/activate.h
@top_srcdir@/lib/activate/targets.h
@top_srcdir@/lib/cache/lvmcache.h
@top_srcdir@/lib/cache/lvmetad.h
+@top_srcdir@/lib/locking/lvmlockd.h
@top_srcdir@/lib/commands/toolcontext.h
@top_srcdir@/lib/config/config.h
@top_srcdir@/lib/config/config_settings.h
@@ -13,6 +17,7 @@
@top_srcdir@/lib/datastruct/btree.h
@top_srcdir@/lib/datastruct/str_list.h
@top_srcdir@/lib/device/dev-cache.h
+@top_srcdir@/lib/device/dev-ext-udev-constants.h
@top_srcdir@/lib/device/dev-type.h
@top_srcdir@/lib/device/device.h
@top_srcdir@/lib/device/device-types.h
@@ -28,6 +33,8 @@
@top_srcdir@/lib/locking/locking.h
@top_srcdir@/lib/log/log.h
@top_srcdir@/lib/log/lvm-logging.h
+@top_srcdir@/lib/lvmpolld/lvmpolld-client.h
+@top_srcdir@/lib/lvmpolld/polldaemon.h
@top_srcdir@/lib/metadata/lv.h
@top_srcdir@/lib/metadata/lv_alloc.h
@top_srcdir@/lib/metadata/metadata.h
@@ -69,3 +76,4 @@
@top_srcdir@/libdm/misc/kdev_t.h
@top_srcdir@/po/pogen.h
@top_srcdir@/tools/lvm2cmd.h
+@top_srcdir@/tools/tool.h
diff --git a/include/Makefile.in b/include/Makefile.in
index 3daaab105..2049b671e 100644
--- a/include/Makefile.in
+++ b/include/Makefile.in
@@ -20,8 +20,12 @@ include $(top_builddir)/make.tmpl
all: .symlinks_created
-.symlinks_created: .symlinks
- find . -maxdepth 1 -type l -exec $(RM) \{\} \;
+LINKS := $(shell find . -maxdepth 1 -type l)
+
+.symlinks_created: .symlinks
+ifneq (,$(firstword $(LINKS)))
+ $(RM) $(LINKS)
+endif
for i in `cat $<`; do $(LN_S) $$i ; done
touch $@
@@ -31,5 +35,5 @@ device-mapper: all
cflow: all
-DISTCLEAN_TARGETS += $(shell find . -maxdepth 1 -type l)
-DISTCLEAN_TARGETS += .include_symlinks .symlinks_created .symlinks
+DISTCLEAN_TARGETS += .symlinks
+CLEAN_TARGETS += $(LINKS) .include_symlinks .symlinks_created
diff --git a/lib/Makefile.in b/lib/Makefile.in
index bad5d8cd3..e29ff2985 100644
--- a/lib/Makefile.in
+++ b/lib/Makefile.in
@@ -56,6 +56,7 @@ SOURCES =\
datastruct/btree.c \
datastruct/str_list.c \
device/dev-cache.c \
+ device/dev-ext.c \
device/dev-io.c \
device/dev-md.c \
device/dev-swap.c \
@@ -69,6 +70,7 @@ SOURCES =\
filters/filter-regex.c \
filters/filter-sysfs.c \
filters/filter-md.c \
+ filters/filter-fwraid.c \
filters/filter-mpath.c \
filters/filter-partitioned.c \
filters/filter-type.c \
@@ -80,7 +82,6 @@ SOURCES =\
format_text/format-text.c \
format_text/import.c \
format_text/import_vsn1.c \
- format_text/tags.c \
format_text/text_label.c \
freeseg/freeseg.c \
label/label.c \
@@ -121,11 +122,6 @@ SOURCES =\
uuid/uuid.c \
zero/zero.c
-ifeq ("@HAVE_REALTIME@", "yes")
- SOURCES +=\
- misc/timestamp.c
-endif
-
ifeq ("@LVM1@", "internal")
SOURCES +=\
format1/disk-rep.c \
@@ -194,6 +190,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
cache/lvmetad.c
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+ SOURCES +=\
+ lvmpolld/lvmpolld-client.c
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+ SOURCES +=\
+ locking/lvmlockd.c
+endif
+
ifeq ("@DMEVENTD@", "yes")
CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
LIBS += -ldevmapper-event
@@ -220,7 +226,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
-CFLAGS += $(BLKID_CFLAGS) $(UDEV_CFLAGS)
+CFLAGS += $(BLKID_CFLAGS) $(UDEV_CFLAGS) $(VALGRIND_CFLAGS)
$(SUBDIRS): $(LIB_STATIC)
diff --git a/lib/activate/activate.c b/lib/activate/activate.c
index 908a53ed2..b39406d76 100644
--- a/lib/activate/activate.c
+++ b/lib/activate/activate.c
@@ -180,7 +180,7 @@ int lv_passes_auto_activation_filter(struct cmd_context *cmd, struct logical_vol
{
const struct dm_config_node *cn;
- if (!(cn = find_config_tree_node(cmd, activation_auto_activation_volume_list_CFG, NULL))) {
+ if (!(cn = find_config_tree_array(cmd, activation_auto_activation_volume_list_CFG, NULL))) {
log_verbose("activation/auto_activation_volume_list configuration setting "
"not defined: All logical volumes will be auto-activated.");
return 1;
@@ -238,6 +238,22 @@ int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s, int use_layer,
{
return 0;
}
+int lv_info_with_seg_status(struct cmd_context *cmd, const struct logical_volume *lv,
+ const struct lv_segment *lv_seg, int use_layer,
+ struct lv_with_info_and_seg_status *status,
+ int with_open_count, int with_read_ahead)
+{
+ return 0;
+}
+int lv_status(struct cmd_context *cmd, const struct lv_segment *lv_seg,
+ int use_layer, struct lv_seg_status *lv_seg_status)
+{
+ return 0;
+}
+int lv_cache_status(const struct logical_volume *cache_lv,
+ struct lv_status_cache **status)
+{
+}
int lv_check_not_in_use(const struct logical_volume *lv)
{
return 0;
@@ -457,7 +473,7 @@ static int _passes_activation_filter(struct cmd_context *cmd,
{
const struct dm_config_node *cn;
- if (!(cn = find_config_tree_node(cmd, activation_volume_list_CFG, NULL))) {
+ if (!(cn = find_config_tree_array(cmd, activation_volume_list_CFG, NULL))) {
log_verbose("activation/volume_list configuration setting "
"not defined: Checking only host tags for %s/%s",
lv->vg->name, lv->name);
@@ -486,7 +502,7 @@ static int _passes_readonly_filter(struct cmd_context *cmd,
{
const struct dm_config_node *cn;
- if (!(cn = find_config_tree_node(cmd, activation_read_only_volume_list_CFG, NULL)))
+ if (!(cn = find_config_tree_array(cmd, activation_read_only_volume_list_CFG, NULL)))
return 0;
return _lv_passes_volumes_filter(cmd, lv, cn, activation_read_only_volume_list_CFG);
@@ -625,7 +641,8 @@ int target_present(struct cmd_context *cmd, const char *target_name,
static int _lv_info(struct cmd_context *cmd, const struct logical_volume *lv,
int use_layer, struct lvinfo *info,
- struct lv_segment *seg, struct lv_seg_status *seg_status,
+ const struct lv_segment *seg,
+ struct lv_seg_status *seg_status,
int with_open_count, int with_read_ahead)
{
struct dm_info dminfo;
@@ -637,15 +654,20 @@ static int _lv_info(struct cmd_context *cmd, const struct logical_volume *lv,
* in progress - as only those could lead to opened files
*/
if (with_open_count) {
- if (locking_is_clustered())
- sync_local_dev_names(cmd); /* Wait to have udev in sync */
+ if (locking_is_clustered() && !sync_local_dev_names(cmd)) /* Wait to have udev in sync */
+ return_0;
else if (fs_has_non_delete_ops())
fs_unlock(); /* For non clustered - wait if there are non-delete ops */
}
/* New thin-pool has no layer, but -tpool suffix needs to be queried */
- if (!use_layer && lv_is_new_thin_pool(lv))
- use_layer = 1;
+ if (!use_layer && lv_is_new_thin_pool(lv)) {
+ /* Check if there isn't existing old thin pool mapping in the table */
+ if (!dev_manager_info(cmd->mem, lv, NULL, 0, 0, &dminfo, NULL, NULL))
+ return_0;
+ if (!dminfo.exists)
+ use_layer = 1;
+ }
if (seg_status)
seg_status->seg = seg;
@@ -704,13 +726,13 @@ int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s, int use_layer,
* Returns 1 if lv_seg_status structure populated,
* else 0 on failure or if device not active locally.
*/
-int lv_status(struct cmd_context *cmd, struct lv_segment *lv_seg,
- struct lv_seg_status *lv_seg_status)
+int lv_status(struct cmd_context *cmd, const struct lv_segment *lv_seg,
+ int use_layer, struct lv_seg_status *lv_seg_status)
{
if (!activation())
return 0;
- return _lv_info(cmd, lv_seg->lv, 0, NULL, lv_seg, lv_seg_status, 0, 0);
+ return _lv_info(cmd, lv_seg->lv, use_layer, NULL, lv_seg, lv_seg_status, 0, 0);
}
/*
@@ -718,18 +740,18 @@ int lv_status(struct cmd_context *cmd, struct lv_segment *lv_seg,
* else 0 on failure or if device not active locally.
*
* This is the same as calling lv_info and lv_status,
- * but* it's done in one go with one ioctl if possible!
+ * but* it's done in one go with one ioctl if possible! ]
*/
int lv_info_with_seg_status(struct cmd_context *cmd, const struct logical_volume *lv,
- struct lv_segment *lv_seg, int use_layer,
- struct lvinfo *lvinfo, struct lv_seg_status *lv_seg_status,
- int with_open_count, int with_read_ahead)
+ const struct lv_segment *lv_seg, int use_layer,
+ struct lv_with_info_and_seg_status *status,
+ int with_open_count, int with_read_ahead)
{
if (!activation())
return 0;
if (lv == lv_seg->lv)
- return _lv_info(cmd, lv, use_layer, lvinfo, lv_seg, lv_seg_status,
+ return _lv_info(cmd, lv, use_layer, &status->info, lv_seg, &status->seg_status,
with_open_count, with_read_ahead);
/*
@@ -737,8 +759,8 @@ int lv_info_with_seg_status(struct cmd_context *cmd, const struct logical_volume
* status for segment that belong to another LV,
* we need to acquire info and status separately!
*/
- return _lv_info(cmd, lv, use_layer, lvinfo, NULL, NULL, with_open_count, with_read_ahead) &&
- _lv_info(cmd, lv_seg->lv, use_layer, NULL, lv_seg, lv_seg_status, 0, 0);
+ return _lv_info(cmd, lv, use_layer, &status->info, NULL, NULL, with_open_count, with_read_ahead) &&
+ _lv_info(cmd, lv_seg->lv, use_layer, NULL, lv_seg, &status->seg_status, 0, 0);
}
#define OPEN_COUNT_CHECK_RETRIES 25
@@ -918,7 +940,8 @@ int lv_raid_offset_and_sectors(const struct logical_volume *lv,
stack;
*data_offset = status->data_offset;
- *dev_sectors = status->total_dev_sectors;
+ if (dev_sectors)
+ *dev_sectors = status->total_dev_sectors;
dev_manager_destroy(dm);
@@ -1074,7 +1097,8 @@ int lv_raid_message(const struct logical_volume *lv, const char *msg)
log_error("\"%s\" is not a supported sync operation.", msg);
goto out;
}
- if (strcmp(status->sync_action, "idle")) {
+ if (strcmp(status->sync_action, "idle") &&
+ strcmp(status->sync_action, "frozen")) {
log_error("%s/%s state is currently \"%s\". Unable to switch to \"%s\".",
lv->vg->name, lv->name, status->sync_action, msg);
goto out;
@@ -1804,6 +1828,19 @@ static int _preload_detached_lv(struct logical_volume *lv, void *data)
struct detached_lv_data *detached = data;
struct lv_list *lvl_pre;
+ /* Check and preload removed raid image leg or metadata */
+ if (lv_is_raid_image(lv)) {
+ if ((lvl_pre = find_lv_in_vg_by_lvid(detached->lv_pre->vg, &lv->lvid)) &&
+ !lv_is_raid_image(lvl_pre->lv) && lv_is_active(lv) &&
+ !_lv_preload(lvl_pre->lv, detached->laopts, detached->flush_required))
+ return_0;
+ } else if (lv_is_raid_metadata(lv)) {
+ if ((lvl_pre = find_lv_in_vg_by_lvid(detached->lv_pre->vg, &lv->lvid)) &&
+ !lv_is_raid_metadata(lvl_pre->lv) && lv_is_active(lv) &&
+ !_lv_preload(lvl_pre->lv, detached->laopts, detached->flush_required))
+ return_0;
+ }
+
if ((lvl_pre = find_lv_in_vg(detached->lv_pre->vg, lv->name))) {
if (lv_is_visible(lvl_pre->lv) && lv_is_active(lv) &&
(!lv_is_cow(lv) || !lv_is_cow(lvl_pre->lv)) &&
@@ -1839,7 +1876,9 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
goto_out;
/* Ignore origin_only unless LV is origin in both old and new metadata */
- if (!lv_is_thin_volume(ondisk_lv) && !(lv_is_origin(ondisk_lv) && lv_is_origin(incore_lv)))
+ /* or LV is thin or thin pool volume */
+ if (!lv_is_thin_volume(ondisk_lv) && !lv_is_thin_pool(ondisk_lv) &&
+ !(lv_is_origin(ondisk_lv) && lv_is_origin(incore_lv)))
laopts->origin_only = 0;
if (test_mode()) {
@@ -2013,7 +2052,6 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
const struct logical_volume *lv_to_free = NULL;
struct lvinfo info;
int r = 0;
- int messages_only = 0;
if (!activation())
return 1;
@@ -2021,10 +2059,7 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0)))
goto_out;
- if (lv_is_thin_pool(lv) && laopts->origin_only)
- messages_only = 1;
-
- if (!lv_is_origin(lv) && !lv_is_thin_volume(lv))
+ if (!lv_is_origin(lv) && !lv_is_thin_volume(lv) && !lv_is_thin_pool(lv))
laopts->origin_only = 0;
if (test_mode()) {
@@ -2036,13 +2071,15 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
log_debug_activation("Resuming LV %s/%s%s%s%s.", lv->vg->name, lv->name,
error_if_not_active ? "" : " if active",
- laopts->origin_only ? " without snapshots" : "",
+ laopts->origin_only ?
+ (lv_is_thin_pool(lv) ? " pool only" :
+ lv_is_thin_volume(lv) ? " thin only" : " without snapshots") : "",
laopts->revert ? " (reverting)" : "");
if (!lv_info(cmd, lv, laopts->origin_only, &info, 0, 0))
goto_out;
- if (!info.exists || !(info.suspended || messages_only)) {
+ if (!info.exists || !info.suspended) {
if (error_if_not_active)
goto_out;
r = 1;
@@ -2264,6 +2301,16 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
goto out;
}
+ /*
+ * Check if cmirrord is running for clustered mirrors.
+ */
+ if (!laopts->exclusive && vg_is_clustered(lv->vg) &&
+ lv_is_mirror(lv) && !lv_is_raid(lv) &&
+ !cluster_mirror_is_available(lv->vg->cmd)) {
+ log_error("Shared cluster mirrors are not available.");
+ goto out;
+ }
+
if (test_mode()) {
_skip("Activating '%s'.", lv->name);
r = 1;
@@ -2288,6 +2335,7 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
if (info.exists && !info.suspended && info.live_table &&
(info.read_only == read_only_lv(lv, laopts))) {
r = 1;
+ log_debug_activation("Volume is already active.");
goto out;
}
diff --git a/lib/activate/activate.h b/lib/activate/activate.h
index 8d096a369..c2c958e26 100644
--- a/lib/activate/activate.h
+++ b/lib/activate/activate.h
@@ -36,21 +36,29 @@ typedef enum {
SEG_STATUS_RAID,
SEG_STATUS_SNAPSHOT,
SEG_STATUS_THIN,
- SEG_STATUS_THIN_POOL
+ SEG_STATUS_THIN_POOL,
+ SEG_STATUS_UNKNOWN
} lv_seg_status_type_t;
struct lv_seg_status {
struct dm_pool *mem; /* input */
- struct lv_segment *seg; /* input */
+ const struct lv_segment *seg; /* input */
lv_seg_status_type_t type; /* output */
- void *status; /* struct dm_status_* */ /* output */
+ union {
+ struct dm_status_cache *cache;
+ struct dm_status_raid *raid;
+ struct dm_status_snapshot *snapshot;
+ struct dm_status_thin *thin;
+ struct dm_status_thin_pool *thin_pool;
+ };
};
struct lv_with_info_and_seg_status {
- struct logical_volume *lv; /* input */
- struct lvinfo *info; /* output */
+ const struct logical_volume *lv; /* input */
+ int info_ok;
+ struct lvinfo info; /* output */
int seg_part_of_lv; /* output */
- struct lv_seg_status *seg_status; /* input/output, see lv_seg_status */
+ struct lv_seg_status seg_status; /* input/output, see lv_seg_status */
};
struct lv_activate_opts {
@@ -123,19 +131,19 @@ int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s, int use_layer,
* Returns 1 if lv_seg_status structure has been populated,
* else 0 on failure or if device not active locally.
*/
-int lv_status(struct cmd_context *cmd, struct lv_segment *lv_seg,
- struct lv_seg_status *lv_seg_status);
+int lv_status(struct cmd_context *cmd, const struct lv_segment *lv_seg,
+ int use_layer, struct lv_seg_status *lv_seg_status);
/*
* Returns 1 if lv_info_and_seg_status structure has been populated,
* else 0 on failure or if device not active locally.
*
- * lv_info_with_seg_status is the same as calling lv_info and then lv_seg_status,
+ * lv_info_with_seg_status is the same as calling lv_info and then lv_status,
* but this fn tries to do that with one ioctl if possible.
*/
int lv_info_with_seg_status(struct cmd_context *cmd, const struct logical_volume *lv,
- struct lv_segment *lv_seg, int use_layer,
- struct lvinfo *lvinfo, struct lv_seg_status *lv_seg_status,
+ const struct lv_segment *lv_seg, int use_layer,
+ struct lv_with_info_and_seg_status *status,
int with_open_count, int with_read_ahead);
int lv_check_not_in_use(const struct logical_volume *lv);
diff --git a/lib/activate/dev_manager.c b/lib/activate/dev_manager.c
index aea406498..7153db362 100644
--- a/lib/activate/dev_manager.c
+++ b/lib/activate/dev_manager.c
@@ -30,14 +30,6 @@
#include <limits.h>
#include <dirent.h>
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
#define MAX_TARGET_PARAMSIZE 50000
#define LVM_UDEV_NOSCAN_FLAG DM_SUBSYSTEM_UDEV_FLAG0
@@ -68,6 +60,7 @@ struct dev_manager {
uint32_t pvmove_mirror_count;
int flush_required;
int activation; /* building activation tree */
+ int suspend; /* building suspend tree */
int skip_external_lv;
struct dm_list pending_delete; /* str_list of dlid(s) with pending delete */
unsigned track_pending_delete;
@@ -129,6 +122,7 @@ static int _get_segment_status_from_target_params(const char *target_name,
{
struct segment_type *segtype;
+ seg_status->type = SEG_STATUS_UNKNOWN;
/*
* TODO: Add support for other segment types too!
* The segment to report status for must be properly
@@ -136,7 +130,7 @@ static int _get_segment_status_from_target_params(const char *target_name,
* linear/striped, old snapshots and raids have proper
* segment selected for status!
*/
- if (strcmp(target_name, "cache"))
+ if (strcmp(target_name, "cache") && strcmp(target_name, "thin-pool"))
return 1;
if (!(segtype = get_segtype_from_string(seg_status->seg->lv->vg->cmd, target_name)))
@@ -150,30 +144,28 @@ static int _get_segment_status_from_target_params(const char *target_name,
}
if (!strcmp(segtype->name, "cache")) {
- if (!dm_get_status_cache(seg_status->mem, params,
- (struct dm_status_cache **) &seg_status->status))
- return_0;
- seg_status->type = SEG_STATUS_CACHE;
+ if (!dm_get_status_cache(seg_status->mem, params, &(seg_status->cache)))
+ return_0;
+ seg_status->type = SEG_STATUS_CACHE;
} else if (!strcmp(segtype->name, "raid")) {
- if (!dm_get_status_raid(seg_status->mem, params,
- (struct dm_status_raid **) &seg_status->status))
- return_0;
- seg_status->type = SEG_STATUS_RAID;
+ if (!dm_get_status_raid(seg_status->mem, params, &seg_status->raid))
+ return_0;
+ seg_status->type = SEG_STATUS_RAID;
} else if (!strcmp(segtype->name, "thin")) {
- if (!dm_get_status_thin(seg_status->mem, params,
- (struct dm_status_thin **) &seg_status->status))
- return_0;
- seg_status->type = SEG_STATUS_THIN;
+ if (!dm_get_status_thin(seg_status->mem, params, &seg_status->thin))
+ return_0;
+ seg_status->type = SEG_STATUS_THIN;
} else if (!strcmp(segtype->name, "thin-pool")) {
- if (!dm_get_status_thin_pool(seg_status->mem, params,
- (struct dm_status_thin_pool **) &seg_status->status))
- return_0;
- seg_status->type = SEG_STATUS_THIN_POOL;
+ if (!dm_get_status_thin_pool(seg_status->mem, params, &seg_status->thin_pool))
+ return_0;
+ seg_status->type = SEG_STATUS_THIN_POOL;
} else if (!strcmp(segtype->name, "snapshot")) {
- if (!dm_get_status_snapshot(seg_status->mem, params,
- (struct dm_status_snapshot **) &seg_status->status))
- return_0;
+ if (!dm_get_status_snapshot(seg_status->mem, params, &seg_status->snapshot))
+ return_0;
seg_status->type = SEG_STATUS_SNAPSHOT;
+ } else {
+ log_error(INTERNAL_ERROR "Unsupported segment type %s.", segtype->name);
+ return 0;
}
return 1;
@@ -185,6 +177,16 @@ typedef enum {
MKNODES
} info_type_t;
+/* Return length of segment depending on type and reshape_len */
+static uint32_t _seg_len(const struct lv_segment *seg)
+{
+ if (seg_is_raid(seg))
+ return seg->len - (((seg->area_count - seg->segtype->parity_devs) * seg->reshape_len) /
+ (seg_is_any_raid10(seg) ? seg->data_copies : 1));
+
+ return seg->len;
+}
+
static int _info_run(info_type_t type, const char *name, const char *dlid,
struct dm_info *dminfo, uint32_t *read_ahead,
struct lv_seg_status *seg_status,
@@ -236,7 +238,7 @@ static int _info_run(info_type_t type, const char *name, const char *dlid,
target = dm_get_next_target(dmt, target, &target_start,
&target_length, &target_name, &target_params);
if (((uint64_t) seg_status->seg->le * extent_size == target_start) &&
- ((uint64_t) (seg_status->seg->len - seg_status->seg->reshape_len) * extent_size == target_length)) {
+ ((uint64_t) _seg_len(seg_status->seg) * extent_size == target_length)) {
params_to_process = target_params;
break;
}
@@ -430,7 +432,7 @@ static int _ignore_blocked_mirror_devices(struct device *dev,
next = dm_get_next_target(dmt, next, &s, &l,
&target_type, &params);
if ((s == start) && (l == length)) {
- if (strcmp(target_type, "mirror"))
+ if (strcmp(target_type, SEG_TYPE_NAME_MIRROR))
goto_out;
if (((p = strstr(params, " block_on_error")) &&
@@ -454,6 +456,78 @@ out:
return r;
}
+static int _device_is_suspended(int major, int minor)
+{
+ struct dm_task *dmt;
+ struct dm_info info;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ return 0;
+
+ if (!dm_task_set_major_minor(dmt, major, minor, 1))
+ goto_out;
+
+ if (activation_checks() && !dm_task_enable_checks(dmt))
+ goto_out;
+
+ if (!dm_task_run(dmt) ||
+ !dm_task_get_info(dmt, &info)) {
+ log_error("Failed to get info for device %d:%d", major, minor);
+ goto out;
+ }
+
+ r = info.exists && info.suspended;
+out:
+ dm_task_destroy(dmt);
+ return r;
+}
+
+static int _ignore_suspended_snapshot_component(struct device *dev)
+{
+ struct dm_task *dmt;
+ void *next = NULL;
+ char *params, *target_type = NULL;
+ uint64_t start, length;
+ int major1, minor1, major2, minor2;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_TABLE)))
+ return_0;
+
+ if (!dm_task_set_major_minor(dmt, MAJOR(dev->dev), MINOR(dev->dev), 1))
+ goto_out;
+
+ if (activation_checks() && !dm_task_enable_checks(dmt))
+ goto_out;
+
+ if (!dm_task_run(dmt)) {
+ log_error("Failed to get state of snapshot or snapshot origin device");
+ goto out;
+ }
+
+ do {
+ next = dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
+ if (!strcmp(target_type, "snapshot")) {
+ if (sscanf(params, "%d:%d %d:%d", &major1, &minor1, &major2, &minor2) != 4) {
+ log_error("Incorrect snapshot table found");
+ goto_out;
+ }
+ r = r || _device_is_suspended(major1, minor1) || _device_is_suspended(major2, minor2);
+ } else if (!strcmp(target_type, "snapshot-origin")) {
+ if (sscanf(params, "%d:%d", &major1, &minor1) != 2) {
+ log_error("Incorrect snapshot-origin table found");
+ goto_out;
+ }
+ r = r || _device_is_suspended(major1, minor1);
+ }
+ } while (next);
+
+out:
+ dm_task_destroy(dmt);
+ return r;
+}
+
/*
* device_is_usable
* @dev
@@ -541,7 +615,7 @@ int device_is_usable(struct device *dev, struct dev_usable_check_params check)
next = dm_get_next_target(dmt, next, &start, &length,
&target_type, &params);
- if (check.check_blocked && target_type && !strcmp(target_type, "mirror")) {
+ if (check.check_blocked && target_type && !strcmp(target_type, SEG_TYPE_NAME_MIRROR)) {
if (ignore_lvm_mirrors()) {
log_debug_activation("%s: Scanning mirror devices is disabled.", dev_name(dev));
goto out;
@@ -555,16 +629,30 @@ int device_is_usable(struct device *dev, struct dev_usable_check_params check)
}
/*
- * Snapshot origin could be sitting on top of a mirror which
- * could be blocking I/O. Skip snapshot origins entirely for
- * now.
+ * FIXME: Snapshot origin could be sitting on top of a mirror
+ * which could be blocking I/O. We should add a check for the
+ * stack here and see if there's blocked mirror underneath.
+ * Currently, mirrors used as origin or snapshot is not
+ * supported anymore and in general using mirrors in a stack
+ * is disabled by default (with a warning that if enabled,
+ * it could cause various deadlocks).
+ * Similar situation can happen with RAID devices where
+ * a RAID device can be snapshotted.
+ * If one of the RAID legs are down and we're doing
+ * lvconvert --repair, there's a time period in which
+ * snapshot components are (besides other devs) suspended.
+ * See also https://bugzilla.redhat.com/show_bug.cgi?id=1219222
+ * for an example where this causes problems.
*
- * FIXME: rather than skipping origin, check if mirror is
- * underneath and if the mirror is blocking I/O.
+ * This is a quick check for now, but replace it with more
+ * robust and better check that would check the stack
+ * correctly, not just snapshots but any cobimnation possible
+ * in a stack - use proper dm tree to check this instead.
*/
- if (check.check_suspended && target_type && !strcmp(target_type, "snapshot-origin")) {
- log_debug_activation("%s: Snapshot-origin device %s not usable.",
- dev_name(dev), name);
+ if (check.check_suspended && target_type &&
+ (!strcmp(target_type, "snapshot") || !strcmp(target_type, "snapshot-origin")) &&
+ _ignore_suspended_snapshot_component(dev)) {
+ log_debug_activation("%s: %s device %s not usable.", dev_name(dev), target_type, name);
goto out;
}
@@ -1840,7 +1928,6 @@ struct pool_cb_data {
int skip_zero; /* to skip zeroed device header (check first 64B) */
int exec; /* which binary to call */
int opts;
- const char *defaults;
const char *global;
};
@@ -1848,7 +1935,6 @@ static int _pool_callback(struct dm_tree_node *node,
dm_node_callback_t type, void *cb_data)
{
int ret, status, fd;
- char *split;
const struct dm_config_node *cn;
const struct dm_config_value *cv;
const struct pool_cb_data *data = cb_data;
@@ -1863,23 +1949,19 @@ static int _pool_callback(struct dm_tree_node *node,
if (!*argv[0])
return 1; /* Checking disabled */
- if ((cn = find_config_tree_node(mlv->vg->cmd, data->opts, NULL))) {
- for (cv = cn->v; cv && args < 16; cv = cv->next) {
- if (cv->type != DM_CFG_STRING) {
- log_error("Invalid string in config file: "
- "global/%s_check_options",
- data->global);
- return 0;
- }
- argv[++args] = cv->v.str;
- }
- } else {
- /* Use default options (no support for options with spaces) */
- if (!(split = dm_pool_strdup(data->dm->mem, data->defaults))) {
- log_error("Failed to duplicate defaults.");
+ if (!(cn = find_config_tree_array(mlv->vg->cmd, data->opts, NULL))) {
+ log_error(INTERNAL_ERROR "Unable to find configuration for pool check options.");
+ return 0;
+ }
+
+ for (cv = cn->v; cv && args < 16; cv = cv->next) {
+ if (cv->type != DM_CFG_STRING) {
+ log_error("Invalid string in config file: "
+ "global/%s_check_options",
+ data->global);
return 0;
}
- args = dm_split_words(split, 16, 0, (char**) argv + 1);
+ argv[++args] = cv->v.str;
}
if (args == 16) {
@@ -1970,14 +2052,12 @@ static int _pool_register_callback(struct dev_manager *dm,
data->skip_zero = 1;
data->exec = global_thin_check_executable_CFG;
data->opts = global_thin_check_options_CFG;
- data->defaults = DEFAULT_THIN_CHECK_OPTIONS;
data->global = "thin";
} else if (lv_is_cache(lv)) { /* cache pool */
data->pool_lv = first_seg(lv)->pool_lv;
data->skip_zero = dm->activation;
data->exec = global_cache_check_executable_CFG;
data->opts = global_cache_check_options_CFG;
- data->defaults = DEFAULT_CACHE_CHECK_OPTIONS;
data->global = "cache";
} else {
log_error(INTERNAL_ERROR "Registering unsupported pool callback.");
@@ -1989,6 +2069,11 @@ static int _pool_register_callback(struct dev_manager *dm,
return 1;
}
+/* Declaration to resolve suspend tree and message passing for thin-pool */
+static int _add_target_to_dtree(struct dev_manager *dm,
+ struct dm_tree_node *dnode,
+ struct lv_segment *seg,
+ struct lv_activate_opts *laopts);
/*
* Add LV and any known dependencies
*/
@@ -2057,15 +2142,43 @@ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
*/
if (!_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv)))
return_0;
+
+ /*
+ * TODO: change API and move this code
+ * Could be easier to handle this in _add_dev_to_dtree()
+ * and base this according to info.exists ?
+ */
if (!dm->activation) {
- /* Setup callback for non-activation partial tree */
- /* Activation gets own callback when needed */
- /* TODO: extend _cached_dm_info() to return dnode */
if (!(uuid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
return_0;
- if ((node = dm_tree_find_node_by_uuid(dtree, uuid)) &&
- !_pool_register_callback(dm, node, lv))
- return_0;
+ if ((node = dm_tree_find_node_by_uuid(dtree, uuid))) {
+ if (origin_only) {
+ struct lv_activate_opts laopts = {
+ .origin_only = 1,
+ .send_messages = 1 /* Node with messages */
+ };
+ /*
+ * Add some messsages if right node exist in the table only
+ * when building SUSPEND tree for origin-only thin-pool.
+ *
+ * TODO: Fix call of '_add_target_to_dtree()' to add message
+ * to thin-pool node as we already know the pool node exists
+ * in the table. Any better/cleaner API way ?
+ *
+ * Probably some 'new' target method to add messages for any node?
+ */
+ if (dm->suspend &&
+ !dm_list_empty(&(first_seg(lv)->thin_messages)) &&
+ !_add_target_to_dtree(dm, node, first_seg(lv), &laopts))
+ return_0;
+ } else {
+ /* Setup callback for non-activation partial tree */
+ /* Activation gets own callback when needed */
+ /* TODO: extend _cached_dm_info() to return dnode */
+ if (!_pool_register_callback(dm, node, lv))
+ return_0;
+ }
+ }
}
}
@@ -2164,7 +2277,7 @@ static struct dm_tree *_create_partial_dtree(struct dev_manager *dm, const struc
dm_tree_set_optional_uuid_suffixes(dtree, &uuid_suffix_list[0]);
- if (!_add_lv_to_dtree(dm, dtree, lv, (lv_is_origin(lv) || lv_is_thin_volume(lv)) ? origin_only : 0))
+ if (!_add_lv_to_dtree(dm, dtree, lv, (lv_is_origin(lv) || lv_is_thin_volume(lv) || lv_is_thin_pool(lv)) ? origin_only : 0))
goto_bad;
return dtree;
@@ -2183,7 +2296,7 @@ static char *_add_error_device(struct dev_manager *dm, struct dm_tree *dtree,
struct lv_segment *seg_i;
struct dm_info info;
int segno = -1, i = 0;
- uint64_t size = (uint64_t) (seg->len - seg->reshape_len) * seg->lv->vg->extent_size;
+ uint64_t size = (uint64_t) _seg_len(seg) * seg->lv->vg->extent_size;
dm_list_iterate_items(seg_i, &seg->lv->segments) {
if (seg == seg_i)
@@ -2301,7 +2414,7 @@ int add_areas_line(struct dev_manager *dm, struct lv_segment *seg,
* is used in the CTR table.
*/
if ((seg_type(seg, s) == AREA_UNASSIGNED) ||
- ((seg_lv(seg, s)->status & VISIBLE_LV) &&
+ (lv_is_visible(seg_lv(seg, s)) &&
!(seg_lv(seg, s)->status & LVM_WRITE))) {
/* One each for metadata area and data area */
if (!dm_tree_node_add_null_area(node, 0) ||
@@ -2462,7 +2575,7 @@ PFLA("%s seg->len=%u seg->reshape_len=%u", seg->lv->name ? seg->lv->name : "NOLV
return seg->segtype->ops->add_target_line(dm, dm->mem, dm->cmd,
&dm->target_state, seg,
laopts, dnode,
- extent_size * (seg->len - seg->reshape_len),
+ extent_size * _seg_len(seg),
&dm->pvmove_mirror_count);
}
@@ -2624,7 +2737,7 @@ static int _add_segment_to_dtree(struct dev_manager *dm,
return_0;
/* Add pool layer */
- if (seg->pool_lv &&
+ if (seg->pool_lv && !laopts->origin_only &&
!_add_new_lv_to_dtree(dm, dtree, seg->pool_lv, laopts,
lv_layer(seg->pool_lv)))
return_0;
@@ -2653,7 +2766,7 @@ static int _add_segment_to_dtree(struct dev_manager *dm,
/* Replace target and all its used devs with error mapping */
log_debug_activation("Using error for pending delete %s.",
seg->lv->name);
- if (!dm_tree_node_add_error_target(dnode, (uint64_t)seg->lv->vg->extent_size * (seg->len - seg->reshape_len)))
+ if (!dm_tree_node_add_error_target(dnode, (uint64_t) seg->lv->vg->extent_size * _seg_len(seg)))
return_0;
} else if (!_add_target_to_dtree(dm, dnode, seg, laopts))
return_0;
@@ -2831,6 +2944,7 @@ static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
if (lv_is_origin(lv) && !layer) {
if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, "real"))
return_0;
+
if (!laopts->no_merging && lv_is_merging_origin(lv)) {
if (!_add_new_lv_to_dtree(dm, dtree,
find_snapshot(lv)->cow, laopts, "cow"))
@@ -2854,6 +2968,7 @@ static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
} else if (lv_is_cow(lv) && !layer) {
if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, "cow"))
return_0;
+
if (!_add_snapshot_target_to_dtree(dm, dnode, lv, laopts))
return_0;
} else if (!layer && ((lv_is_thin_pool(lv) && !lv_is_new_thin_pool(lv)) ||
@@ -3054,7 +3169,10 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
int r = 0;
if (action < DM_ARRAY_SIZE(_action_names))
- log_debug_activation("Creating %s tree for %s.", _action_names[action], lv->name);
+ log_debug_activation("Creating %s%s tree for %s.",
+ _action_names[action],
+ (laopts->origin_only) ? " origin-only" : "",
+ display_lvname(lv));
/* Some LV can be used for top level tree */
/* TODO: add more.... */
@@ -3064,6 +3182,7 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
}
/* Some targets may build bigger tree for activation */
dm->activation = ((action == PRELOAD) || (action == ACTIVATE));
+ dm->suspend = (action == SUSPEND_WITH_LOCKFS) || (action == SUSPEND);
if (!(dtree = _create_partial_dtree(dm, lv, laopts->origin_only)))
return_0;
@@ -3108,7 +3227,9 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
case PRELOAD:
case ACTIVATE:
/* Add all required new devices to tree */
- if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, (lv_is_origin(lv) && laopts->origin_only) ? "real" : NULL))
+ if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts,
+ (lv_is_origin(lv) && laopts->origin_only) ? "real" :
+ (lv_is_thin_pool(lv) && laopts->origin_only) ? "tpool" : NULL))
goto_out;
/* Preload any devices required before any suspensions */
@@ -3146,7 +3267,6 @@ out_no_root:
int dev_manager_activate(struct dev_manager *dm, const struct logical_volume *lv,
struct lv_activate_opts *laopts)
{
- laopts->send_messages = 1;
if (!_tree_action(dm, lv, laopts, ACTIVATE))
return_0;
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
index 416907e99..cfa1d5f22 100644
--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -56,6 +56,9 @@ struct lvmcache_vginfo {
char _padding[7];
struct lvmcache_vginfo *next; /* Another VG with same name? */
char *creation_host;
+ char *lock_type;
+ uint32_t mda_checksum;
+ size_t mda_size;
size_t vgmetadata_size;
char *vgmetadata; /* Copy of VG metadata as format_text string */
struct dm_config_tree *cft; /* Config tree created from vgmetadata */
@@ -65,6 +68,7 @@ struct lvmcache_vginfo {
unsigned vg_use_count; /* Counter of vg reusage */
unsigned precommitted; /* Is vgmetadata live or precommitted? */
unsigned cached_vg_invalidated; /* Signal to regenerate cached_vg */
+ unsigned preferred_duplicates; /* preferred duplicate pvs have been set */
};
static struct dm_hash_table *_pvid_hash = NULL;
@@ -76,6 +80,7 @@ static int _scanning_in_progress = 0;
static int _has_scanned = 0;
static int _vgs_locked = 0;
static int _vg_global_lock_held = 0; /* Global lock held when cache wiped? */
+static int _found_duplicate_pvs = 0; /* If we never see a duplicate PV we can skip checking for them later. */
int lvmcache_init(void)
{
@@ -112,6 +117,47 @@ int lvmcache_init(void)
return 1;
}
+/*
+ * Once PV info has been populated in lvmcache and
+ * lvmcache has chosen preferred duplicate devices,
+ * set this flag so that lvmcache will not try to
+ * compare and choose preferred duplicate devices
+ * again (which may result in different preferred
+ * devices.) PV info can be populated in lvmcache
+ * multiple times, each time causing lvmcache to
+ * compare the duplicate devices, so we need to
+ * record that the comparison/preferences have
+ * already been done, so the preferrences from the
+ * first time through are not changed.
+ *
+ * This is something of a hack to work around the
+ * fact that the code isn't really designed to
+ * handle duplicate PVs, and the fact that lvmetad
+ * has its own way of picking a preferred duplicate
+ * and lvmcache has another way based on having
+ * more information than lvmetad does.
+ *
+ * If we come up with a better overall method to
+ * handle duplicate PVs, then this can probably be
+ * removed.
+ *
+ * FIXME: if we want to make lvmetad work with clvmd,
+ * then this may need to be changed to set
+ * preferred_duplicates back to 0.
+ */
+
+void lvmcache_set_preferred_duplicates(const char *vgid)
+{
+ struct lvmcache_vginfo *vginfo;
+
+ if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) {
+ stack;
+ return;
+ }
+
+ vginfo->preferred_duplicates = 1;
+}
+
void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd)
{
if (!lvmetad_active() || _has_scanned)
@@ -284,6 +330,9 @@ void lvmcache_commit_metadata(const char *vgname)
void lvmcache_drop_metadata(const char *vgname, int drop_precommitted)
{
+ if (lvmcache_vgname_is_locked(VG_GLOBAL) && !vg_write_lock_held())
+ return;
+
/* For VG_ORPHANS, we need to invalidate all labels on orphan PVs. */
if (!strcmp(vgname, VG_ORPHANS)) {
_drop_metadata(FMT_TEXT_ORPHAN_VG_NAME, 0);
@@ -292,7 +341,7 @@ void lvmcache_drop_metadata(const char *vgname, int drop_precommitted)
/* Indicate that PVs could now be missing from the cache */
init_full_scan_done(0);
- } else if (!lvmcache_vgname_is_locked(VG_GLOBAL))
+ } else
_drop_metadata(vgname, drop_precommitted);
}
@@ -402,6 +451,16 @@ int lvmcache_vgs_locked(void)
return _vgs_locked;
}
+/*
+ * When lvmcache sees a duplicate PV, this is set.
+ * process_each_pv() can avoid searching for duplicates
+ * by checking this and seeing that no duplicate PVs exist.
+ */
+int lvmcache_found_duplicate_pvs(void)
+{
+ return _found_duplicate_pvs;
+}
+
static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo,
struct lvmcache_info *info)
{
@@ -847,6 +906,37 @@ int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo
}
// #endif
+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+ struct dm_list *vgnameids)
+{
+ struct vgnameid_list *vgnl;
+ struct lvmcache_vginfo *vginfo;
+
+ lvmcache_label_scan(cmd, 0);
+
+ dm_list_iterate_items(vginfo, &_vginfos) {
+ if (!include_internal && is_orphan_vg(vginfo->vgname))
+ continue;
+
+ if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+ log_error("vgnameid_list allocation failed.");
+ return 0;
+ }
+
+ vgnl->vgid = dm_pool_strdup(cmd->mem, vginfo->vgid);
+ vgnl->vg_name = dm_pool_strdup(cmd->mem, vginfo->vgname);
+
+ if (!vgnl->vgid || !vgnl->vg_name) {
+ log_error("vgnameid_list member allocation failed.");
+ return 0;
+ }
+
+ dm_list_add(vgnameids, &vgnl->list);
+ }
+
+ return 1;
+}
+
struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
int include_internal)
{
@@ -1358,7 +1448,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info,
}
static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstatus,
- const char *creation_host)
+ const char *creation_host, const char *lock_type)
{
if (!info || !info->vginfo)
return 1;
@@ -1371,11 +1461,11 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
info->vginfo->status = vgstatus;
if (!creation_host)
- return 1;
+ goto set_lock_type;
if (info->vginfo->creation_host && !strcmp(creation_host,
info->vginfo->creation_host))
- return 1;
+ goto set_lock_type;
if (info->vginfo->creation_host)
dm_free(info->vginfo->creation_host);
@@ -1389,6 +1479,44 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
log_debug_cache("lvmcache: %s: VG %s: Set creation host to %s.",
dev_name(info->dev), info->vginfo->vgname, creation_host);
+set_lock_type:
+
+ if (!lock_type)
+ goto out;
+
+ if (info->vginfo->lock_type && !strcmp(lock_type, info->vginfo->lock_type))
+ goto out;
+
+ if (info->vginfo->lock_type)
+ dm_free(info->vginfo->lock_type);
+
+ if (!(info->vginfo->lock_type = dm_strdup(lock_type))) {
+ log_error("cache creation host alloc failed for %s",
+ lock_type);
+ return 0;
+ }
+
+out:
+ return 1;
+}
+
+static int _lvmcache_update_vg_mda_info(struct lvmcache_info *info, uint32_t mda_checksum,
+ size_t mda_size)
+{
+ if (!info || !info->vginfo || !mda_size)
+ return 1;
+
+ if (info->vginfo->mda_checksum == mda_checksum || info->vginfo->mda_size == mda_size)
+ return 1;
+
+ info->vginfo->mda_checksum = mda_checksum;
+ info->vginfo->mda_size = mda_size;
+
+ /* FIXME Add checksum index */
+
+ log_debug_cache("lvmcache: %s: VG %s: Stored metadata checksum %" PRIu32 " with size %" PRIsize_t ".",
+ dev_name(info->dev), info->vginfo->vgname, mda_checksum, mda_size);
+
return 1;
}
@@ -1402,10 +1530,11 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt)
return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt);
}
-int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
- const char *vgname, const char *vgid,
- uint32_t vgstatus, const char *creation_host)
+int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary)
{
+ const char *vgname = vgsummary->vgname;
+ const char *vgid = (char *)&vgsummary->vgid;
+
if (!vgname && !info->vginfo) {
log_error(INTERNAL_ERROR "NULL vgname handed to cache");
/* FIXME Remove this */
@@ -1433,10 +1562,11 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
if (!is_orphan_vg(vgname))
info->status &= ~CACHE_INVALID;
- if (!_lvmcache_update_vgname(info, vgname, vgid, vgstatus,
- creation_host, info->fmt) ||
+ if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus,
+ vgsummary->creation_host, info->fmt) ||
!_lvmcache_update_vgid(info, info->vginfo, vgid) ||
- !_lvmcache_update_vgstatus(info, vgstatus, creation_host))
+ !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type) ||
+ !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size))
return_0;
return 1;
@@ -1447,6 +1577,12 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
struct pv_list *pvl;
struct lvmcache_info *info;
char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+ struct lvmcache_vgsummary vgsummary = {
+ .vgname = vg->name,
+ .vgstatus = vg->status,
+ .vgid = vg->id,
+ .lock_type = vg->lock_type
+ };
pvid_s[sizeof(pvid_s) - 1] = '\0';
@@ -1454,9 +1590,7 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s) - 1);
/* FIXME Could pvl->pv->dev->pvid ever be different? */
if ((info = lvmcache_info_from_pvid(pvid_s, 0)) &&
- !lvmcache_update_vgname_and_id(info, vg->name,
- (char *) &vg->id,
- vg->status, NULL))
+ !lvmcache_update_vgname_and_id(info, &vgsummary))
return_0;
}
@@ -1467,6 +1601,85 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
return 1;
}
+/*
+ * Replace pv->dev with dev so that dev will appear for reporting.
+ */
+
+void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,
+ struct device *dev)
+{
+ struct lvmcache_info *info;
+ char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+
+ strncpy(pvid_s, (char *) &pv->id, sizeof(pvid_s) - 1);
+ pvid_s[sizeof(pvid_s) - 1] = '\0';
+
+ if (!(info = lvmcache_info_from_pvid(pvid_s, 0)))
+ return;
+
+ info->dev = dev;
+ info->label->dev = dev;
+ pv->dev = dev;
+}
+
+/*
+ * We can see multiple different devices with the
+ * same pvid, i.e. duplicates.
+ *
+ * There may be different reasons for seeing two
+ * devices with the same pvid:
+ * - multipath showing two paths to the same thing
+ * - one device copied to another, e.g. with dd,
+ * also referred to as cloned devices.
+ * - a "subsystem" taking a device and creating
+ * another device of its own that represents the
+ * underlying device it is using, e.g. using dm
+ * to create an identity mapping of a PV.
+ *
+ * Given duplicate devices, we have to choose one
+ * of them to be the "preferred" dev, i.e. the one
+ * that will be referenced in lvmcache, by pv->dev.
+ * We can keep the existing dev, that's currently
+ * used in lvmcache, or we can replace the existing
+ * dev with the new duplicate.
+ *
+ * Regardless of which device is preferred, we need
+ * to print messages explaining which devices were
+ * found so that a user can sort out for themselves
+ * what has happened if the preferred device is not
+ * the one they are interested in.
+ *
+ * If a user wants to use the non-preferred device,
+ * they will need to filter out the device that
+ * lvm is preferring.
+ *
+ * The dev_subsystem calls check if the major number
+ * of the dev is part of a subsystem like DM/MD/DRBD.
+ * A dev that's part of a subsystem is preferred over a
+ * duplicate of that dev that is not part of a
+ * subsystem.
+ *
+ * The has_holders calls check if the device is being
+ * used by another, and prefers one that's being used.
+ *
+ * FIXME: why do we prefer a device without holders
+ * over a device with holders? We should understand
+ * the reason for that choice.
+ *
+ * FIXME: there may be other reasons to prefer one
+ * device over another:
+ *
+ * . are there other use/open counts we could check
+ * beyond the holders?
+ *
+ * . check if either is bad/usable and prefer
+ * the good one?
+ *
+ * . prefer the one with smaller minor number?
+ * Might avoid disturbing things due to a new
+ * transient duplicate?
+ */
+
struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
struct device *dev,
const char *vgname, const char *vgid,
@@ -1477,6 +1690,14 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
struct label *label;
struct lvmcache_info *existing, *info;
char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+ struct lvmcache_vgsummary vgsummary = {
+ .vgname = vgname,
+ .vgstatus = vgstatus,
+ };
+
+ /* N.B. vgid is not NUL-terminated when called from _text_pv_write */
+ if (vgid)
+ strncpy((char *)&vgsummary.vgid, vgid, sizeof(vgsummary.vgid));
if (!_vgname_hash && !lvmcache_init()) {
log_error("Internal cache initialisation failed");
@@ -1506,49 +1727,166 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
lvmcache_del_bas(info);
} else {
if (existing->dev != dev) {
- /* Is the existing entry a duplicate pvid e.g. md ? */
- if (dev_subsystem_part_major(dt, existing->dev) &&
- !dev_subsystem_part_major(dt, dev)) {
- log_very_verbose("Ignoring duplicate PV %s on "
- "%s - using %s %s",
- pvid, dev_name(dev),
- dev_subsystem_name(dt, existing->dev),
- dev_name(existing->dev));
+ int old_in_subsystem = 0;
+ int new_in_subsystem = 0;
+ int old_is_dm = 0;
+ int new_is_dm = 0;
+ int old_has_holders = 0;
+ int new_has_holders = 0;
+
+ /*
+ * Here are different devices with the same pvid:
+ * duplicates. See comment above.
+ */
+
+ /*
+ * This flag tells the process_each_pv code to search
+ * the devices list for duplicates, so that devices
+ * can be processed together with their duplicates
+ * (while processing the VG, rather than reporting
+ * pv->dev under the VG, and its duplicate outside
+ * the VG context.)
+ */
+ _found_duplicate_pvs = 1;
+
+ /*
+ * The new dev may not have pvid set.
+ * The process_each_pv code needs to have the pvid
+ * set in each device to detect that the devices
+ * are duplicates.
+ */
+ strncpy(dev->pvid, pvid_s, sizeof(dev->pvid));
+
+ /*
+ * Now decide if we are going to ignore the new
+ * device, or replace the existing/old device in
+ * lvmcache with the new one.
+ */
+ old_in_subsystem = dev_subsystem_part_major(dt, existing->dev);
+ new_in_subsystem = dev_subsystem_part_major(dt, dev);
+
+ old_is_dm = dm_is_dm_major(MAJOR(existing->dev->dev));
+ new_is_dm = dm_is_dm_major(MAJOR(dev->dev));
+
+ old_has_holders = dm_device_has_holders(MAJOR(existing->dev->dev), MINOR(existing->dev->dev));
+ new_has_holders = dm_device_has_holders(MAJOR(dev->dev), MINOR(dev->dev));
+
+ if (old_has_holders && new_has_holders) {
+ /*
+ * This is not a selection of old or new, but
+ * just a warning to be aware of.
+ */
+ log_warn("WARNING: duplicate PV %s is being used from both devices %s and %s",
+ pvid_s,
+ dev_name(existing->dev),
+ dev_name(dev));
+ }
+
+ if (existing->vginfo->preferred_duplicates) {
+ /*
+ * The preferred duplicate devs have already
+ * been chosen during a previous populating of
+ * lvmcache, so just use the existing preferences.
+ */
+ log_verbose("Found duplicate PV %s: using existing dev %s",
+ pvid_s,
+ dev_name(existing->dev));
+ return NULL;
+ }
+
+ if (old_in_subsystem && !new_in_subsystem) {
+ /* Use old, ignore new. */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(existing->dev),
+ dev_name(dev));
+ log_warn("Using duplicate PV %s from subsystem %s, ignoring %s",
+ dev_name(existing->dev),
+ dev_subsystem_name(dt, existing->dev),
+ dev_name(dev));
return NULL;
- } else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
- !dm_is_dm_major(MAJOR(dev->dev))) {
- log_very_verbose("Ignoring duplicate PV %s on "
- "%s - using dm %s",
- pvid, dev_name(dev),
- dev_name(existing->dev));
+
+ } else if (!old_in_subsystem && new_in_subsystem) {
+ /* Use new, replace old. */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(dev),
+ dev_name(existing->dev));
+ log_warn("Using duplicate PV %s from subsystem %s, replacing %s",
+ dev_name(dev),
+ dev_subsystem_name(dt, dev),
+ dev_name(existing->dev));
+
+ } else if (old_has_holders && !new_has_holders) {
+ /* Use new, replace old. */
+ /* FIXME: why choose the one without olders? */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(dev),
+ dev_name(existing->dev));
+ log_warn("Using duplicate PV %s without holders, replacing %s",
+ dev_name(dev),
+ dev_name(existing->dev));
+
+ } else if (!old_has_holders && new_has_holders) {
+ /* Use old, ignore new. */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(existing->dev),
+ dev_name(dev));
+ log_warn("Using duplicate PV %s without holders, ignoring %s",
+ dev_name(existing->dev),
+ dev_name(dev));
return NULL;
- } else if (!dev_subsystem_part_major(dt, existing->dev) &&
- dev_subsystem_part_major(dt, dev))
- log_very_verbose("Duplicate PV %s on %s - "
- "using %s %s", pvid,
- dev_name(existing->dev),
- dev_subsystem_name(dt, existing->dev),
- dev_name(dev));
- else if (!dm_is_dm_major(MAJOR(existing->dev->dev)) &&
- dm_is_dm_major(MAJOR(dev->dev)))
- log_very_verbose("Duplicate PV %s on %s - "
- "using dm %s", pvid,
- dev_name(existing->dev),
- dev_name(dev));
- /* FIXME If both dm, check dependencies */
- //else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
- //dm_is_dm_major(MAJOR(dev->dev)))
- //
- else if (!strcmp(pvid_s, existing->dev->pvid))
- log_error("Found duplicate PV %s: using %s not "
- "%s", pvid, dev_name(dev),
- dev_name(existing->dev));
+
+ } else if (old_is_dm && new_is_dm) {
+ /* Use new, replace old. */
+ /* FIXME: why choose the new instead of the old? */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(dev),
+ dev_name(existing->dev));
+ log_warn("Using duplicate PV %s which is last seen, replacing %s",
+ dev_name(dev),
+ dev_name(existing->dev));
+
+ } else if (!strcmp(pvid_s, existing->dev->pvid)) {
+ /* No criteria to use for preferring old or new. */
+ /* FIXME: why choose the new instead of the old? */
+ /* FIXME: a transient duplicate would be a reason
+ * to select the old instead of the new. */
+ log_warn("Found duplicate PV %s: using %s not %s",
+ pvid_s,
+ dev_name(dev),
+ dev_name(existing->dev));
+ log_warn("Using duplicate PV %s which is last seen, replacing %s",
+ dev_name(dev),
+ dev_name(existing->dev));
+ }
+ } else {
+ /*
+ * The new dev is the same as the existing dev.
+ *
+ * FIXME: Why can't we just return NULL here if the
+ * device already exists? Things don't seem to work
+ * if we do that for some reason.
+ */
+ log_verbose("Found same device %s with same pvid %s",
+ dev_name(existing->dev), pvid_s);
}
- if (strcmp(pvid_s, existing->dev->pvid))
- log_debug_cache("Updating pvid cache to %s (%s) from %s (%s)",
- pvid_s, dev_name(dev),
- existing->dev->pvid, dev_name(existing->dev));
- /* Switch over to new preferred device */
+
+ /*
+ * This happens when running pvcreate on an existing PV.
+ */
+ if (strcmp(pvid_s, existing->dev->pvid)) {
+ log_verbose("Replacing dev %s pvid %s with dev %s pvid %s",
+ dev_name(existing->dev), existing->dev->pvid,
+ dev_name(dev), pvid_s);
+ }
+
+ /*
+ * Switch over to new preferred device.
+ */
existing->dev = dev;
info = existing;
/* Has labeller changed? */
@@ -1573,7 +1911,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
return NULL;
}
- if (!lvmcache_update_vgname_and_id(info, vgname, vgid, vgstatus, NULL)) {
+ if (!lvmcache_update_vgname_and_id(info, &vgsummary)) {
if (!existing) {
dm_hash_remove(_pvid_hash, pvid_s);
strcpy(info->dev->pvid, "");
@@ -1982,3 +2320,41 @@ uint64_t lvmcache_smallest_mda_size(struct lvmcache_info *info)
const struct format_type *lvmcache_fmt(struct lvmcache_info *info) {
return info->fmt;
}
+
+int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary)
+{
+ struct lvmcache_vginfo *vginfo;
+
+ if (!vgsummary->mda_size)
+ return 0;
+
+ /* FIXME Index the checksums */
+ dm_list_iterate_items(vginfo, &_vginfos) {
+ if (vgsummary->mda_checksum == vginfo->mda_checksum &&
+ vgsummary->mda_size == vginfo->mda_size &&
+ !is_orphan_vg(vginfo->vgname)) {
+ vgsummary->vgname = vginfo->vgname;
+ vgsummary->creation_host = vginfo->creation_host;
+ vgsummary->vgstatus = vginfo->status;
+ /* vginfo->vgid has 1 extra byte then vgsummary->vgid */
+ memcpy(&vgsummary->vgid, vginfo->vgid, sizeof(vgsummary->vgid));
+
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd)
+{
+ struct lvmcache_vginfo *vginfo;
+
+ dm_list_iterate_items(vginfo, &_vginfos) {
+ if (vginfo->lock_type && !strcmp(vginfo->lock_type, "sanlock"))
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h
index d43866dca..76b9b10ae 100644
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@@ -39,6 +39,27 @@ struct disk_locn;
struct lvmcache_vginfo;
+/*
+ * vgsummary represents a summary of the VG that is read
+ * without a lock. The info does not come through vg_read(),
+ * but through reading mdas. It provides information about
+ * the VG that is needed to lock the VG and then read it fully
+ * with vg_read(), after which the VG summary should be checked
+ * against the full VG metadata to verify it was correct (since
+ * it was read without a lock.)
+ *
+ * Once read, vgsummary information is saved in lvmcache_vginfo.
+ */
+struct lvmcache_vgsummary {
+ const char *vgname;
+ struct id vgid;
+ uint64_t vgstatus;
+ char *creation_host;
+ const char *lock_type;
+ uint32_t mda_checksum;
+ size_t mda_size;
+};
+
int lvmcache_init(void);
void lvmcache_allow_reads_with_lvmetad(void);
@@ -58,8 +79,7 @@ void lvmcache_del(struct lvmcache_info *info);
/* Update things */
int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
- const char *vgname, const char *vgid,
- uint32_t vgstatus, const char *hostname);
+ struct lvmcache_vgsummary *vgsummary);
int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted);
void lvmcache_lock_vgname(const char *vgname, int read_only);
@@ -68,6 +88,7 @@ int lvmcache_verify_lock_order(const char *vgname);
/* Queries */
const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, const char *vgname, const char *vgid, unsigned revalidate_labels);
+int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary);
/* Decrement and test if there are still vg holders in vginfo. */
int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo);
@@ -98,6 +119,9 @@ struct dm_list *lvmcache_get_vgnames(struct cmd_context *cmd,
struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
int include_internal);
+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+ struct dm_list *vgnameids);
+
/* Returns list of struct dm_str_list containing pool-allocated copy of pvids */
struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname,
const char *vgid);
@@ -157,4 +181,13 @@ unsigned lvmcache_mda_count(struct lvmcache_info *info);
int lvmcache_vgid_is_cached(const char *vgid);
uint64_t lvmcache_smallest_mda_size(struct lvmcache_info *info);
+void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,
+ struct device *dev);
+
+int lvmcache_found_duplicate_pvs(void);
+
+void lvmcache_set_preferred_duplicates(const char *vgid);
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd);
+
#endif
diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c
index 4e969cb48..856b30f1b 100644
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -22,15 +22,7 @@
#include "format-text.h" // TODO for disk_locn, used as a DA representation
#include "crc.h"
#include "lvm-signal.h"
-
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
+#include "lvmlockd.h"
#define SCAN_TIMEOUT_SECONDS 80
#define MAX_RESCANS 10 /* Maximum number of times to scan all PVs and retry if the daemon returns a token mismatch error */
@@ -43,12 +35,13 @@ static char *_lvmetad_token = NULL;
static const char *_lvmetad_socket = NULL;
static struct cmd_context *_lvmetad_cmd = NULL;
+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
+
void lvmetad_disconnect(void)
{
if (_lvmetad_connected)
daemon_close(_lvmetad);
_lvmetad_connected = 0;
- _lvmetad_cmd = NULL;
}
void lvmetad_init(struct cmd_context *cmd)
@@ -56,6 +49,10 @@ void lvmetad_init(struct cmd_context *cmd)
if (!_lvmetad_use && !access(getenv("LVM_LVMETAD_PIDFILE") ? : LVMETAD_PIDFILE, F_OK))
log_warn("WARNING: lvmetad is running but disabled."
" Restart lvmetad before enabling it!");
+
+ if (_lvmetad_connected)
+ log_debug(INTERNAL_ERROR "Refreshing lvmetad global handle while connection with the daemon is active");
+
_lvmetad_cmd = cmd;
}
@@ -145,10 +142,13 @@ void lvmetad_set_socket(const char *sock)
_lvmetad_socket = sock;
}
+static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
+ int ignore_obsolete);
+
static daemon_reply _lvmetad_send(const char *id, ...)
{
va_list ap;
- daemon_reply repl;
+ daemon_reply repl = { 0 };
daemon_request req;
unsigned num_rescans = 0;
unsigned total_usecs_waited = 0;
@@ -158,8 +158,10 @@ static daemon_reply _lvmetad_send(const char *id, ...)
retry:
req = daemon_request_make(id);
- if (_lvmetad_token)
- daemon_request_extend(req, "token = %s", _lvmetad_token, NULL);
+ if (_lvmetad_token && !daemon_request_extend(req, "token = %s", _lvmetad_token, NULL)) {
+ repl.error = ENOMEM;
+ return repl;
+ }
va_start(ap, id);
daemon_request_extend_v(req, ap);
@@ -192,7 +194,7 @@ retry:
max_remaining_sleep_times--; /* Sleep once before rescanning the first time, then 5 times each time after that. */
} else {
/* If the re-scan fails here, we try again later. */
- (void) lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL);
+ (void) _lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL, 0);
num_rescans++;
max_remaining_sleep_times = 5;
}
@@ -271,19 +273,21 @@ static int _read_mda(struct lvmcache_info *info,
return 0;
}
-static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
- struct dm_config_node *cn,
- dev_t fallback)
+static int _pv_populate_lvmcache(struct cmd_context *cmd,
+ struct dm_config_node *cn,
+ struct format_type *fmt, dev_t fallback)
{
- struct device *dev;
+ struct device *dev, *dev_alternate, *dev_alternate_cache = NULL;
+ struct label *label;
struct id pvid, vgid;
char mda_id[32];
char da_id[32];
int i = 0;
- struct dm_config_node *mda = NULL;
- struct dm_config_node *da = NULL;
+ struct dm_config_node *mda, *da;
+ struct dm_config_node *alt_devices = dm_config_find_node(cn->child, "devices_alternate");
+ struct dm_config_value *alt_device = NULL;
uint64_t offset, size;
- struct lvmcache_info *info;
+ struct lvmcache_info *info, *info_alternate;
const char *pvid_txt = dm_config_find_str(cn->child, "id", NULL),
*vgid_txt = dm_config_find_str(cn->child, "vgid", NULL),
*vgname = dm_config_find_str(cn->child, "vgname", NULL),
@@ -292,11 +296,12 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
uint64_t devsize = dm_config_find_int64(cn->child, "dev_size", 0),
label_sector = dm_config_find_int64(cn->child, "label_sector", 0);
- struct format_type *fmt = fmt_name ? get_format_by_name(cmd, fmt_name) : NULL;
+ if (!fmt && fmt_name)
+ fmt = get_format_by_name(cmd, fmt_name);
if (!fmt) {
log_error("PV %s not recognised. Is the device missing?", pvid_txt);
- return NULL;
+ return 0;
}
dev = dev_cache_get_by_devt(devt, cmd->filter);
@@ -305,17 +310,17 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
if (!dev) {
log_warn("WARNING: Device for PV %s not found or rejected by a filter.", pvid_txt);
- return NULL;
+ return 0;
}
if (!pvid_txt || !id_read_format(&pvid, pvid_txt)) {
log_error("Missing or ill-formatted PVID for PV: %s.", pvid_txt);
- return NULL;
+ return 0;
}
if (vgid_txt) {
if (!id_read_format(&vgid, vgid_txt))
- return_NULL;
+ return_0;
} else
strcpy((char*)&vgid, fmt->orphan_vg_name);
@@ -324,7 +329,7 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
if (!(info = lvmcache_add(fmt->labeller, (const char *)&pvid, dev,
vgname, (const char *)&vgid, 0)))
- return_NULL;
+ return_0;
lvmcache_get_label(info)->sector = label_sector;
lvmcache_get_label(info)->dev = dev;
@@ -365,12 +370,59 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
++i;
} while (da);
- return info;
+ if (alt_devices)
+ alt_device = alt_devices->v;
+
+ while (alt_device) {
+ dev_alternate = dev_cache_get_by_devt(alt_device->v.i, cmd->filter);
+ if (dev_alternate) {
+ if ((info_alternate = lvmcache_add(fmt->labeller, (const char *)&pvid, dev_alternate,
+ vgname, (const char *)&vgid, 0))) {
+ dev_alternate_cache = dev_alternate;
+ info = info_alternate;
+ lvmcache_get_label(info)->dev = dev_alternate;
+ }
+ } else {
+ log_warn("Duplicate of PV %s dev %s exists on unknown device %"PRId64 ":%" PRId64,
+ pvid_txt, dev_name(dev), MAJOR(alt_device->v.i), MINOR(alt_device->v.i));
+ }
+ alt_device = alt_device->next;
+ }
+
+ /*
+ * Update lvmcache with the info about the alternate device by
+ * reading its label, which should update lvmcache.
+ */
+ if (dev_alternate_cache) {
+ if (!label_read(dev_alternate_cache, &label, 0)) {
+ log_warn("No PV label found on duplicate device %s.", dev_name(dev_alternate_cache));
+ }
+ }
+
+ lvmcache_set_preferred_duplicates((const char *)&vgid);
+ return 1;
+}
+
+static int _pv_update_struct_pv(struct physical_volume *pv, struct format_instance *fid)
+{
+ struct lvmcache_info *info;
+ if ((info = lvmcache_info_from_pvid((const char *)&pv->id, 0))) {
+ pv->label_sector = lvmcache_get_label(info)->sector;
+ pv->dev = lvmcache_device(info);
+ if (!pv->dev)
+ pv->status |= MISSING_PV;
+ if (!lvmcache_fid_add_mdas_pv(info, fid))
+ return_0;
+ pv->fid = fid;
+ } else
+ pv->status |= MISSING_PV; /* probably missing */
+ return 1;
}
struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgname, const char *vgid)
{
struct volume_group *vg = NULL;
+ struct volume_group *vg2 = NULL;
daemon_reply reply;
int found;
char uuid[64];
@@ -382,7 +434,6 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
struct format_type *fmt;
struct dm_config_node *pvcn;
struct pv_list *pvl;
- struct lvmcache_info *info;
if (!lvmetad_active())
return NULL;
@@ -431,24 +482,40 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
if ((pvcn = dm_config_find_node(top, "metadata/physical_volumes")))
for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
- _pv_populate_lvmcache(cmd, pvcn, 0);
+ _pv_populate_lvmcache(cmd, pvcn, fmt, 0);
+
+ if ((pvcn = dm_config_find_node(top, "metadata/outdated_pvs")))
+ for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
+ _pv_populate_lvmcache(cmd, pvcn, fmt, 0);
top->key = name;
- if (!(vg = import_vg_from_config_tree(reply.cft, fid)))
+ if (!(vg = import_vg_from_lvmetad_config_tree(reply.cft, fid)))
goto_out;
+ /*
+ * locking may have detected a newer vg version and
+ * invalidated the cached vg.
+ */
+ if (dm_config_find_node(reply.cft->root, "vg_invalid")) {
+ log_debug_lvmetad("Update invalid lvmetad cache for VG %s", vgname);
+ vg2 = lvmetad_pvscan_vg(cmd, vg);
+ release_vg(vg);
+ vg = vg2;
+ fid = vg->fid;
+ }
+
dm_list_iterate_items(pvl, &vg->pvs) {
- if ((info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
- pvl->pv->label_sector = lvmcache_get_label(info)->sector;
- pvl->pv->dev = lvmcache_device(info);
- if (!pvl->pv->dev)
- pvl->pv->status |= MISSING_PV;
- if (!lvmcache_fid_add_mdas_pv(info, fid)) {
- vg = NULL;
- goto_out; /* FIXME error path */
- }
- } else
- pvl->pv->status |= MISSING_PV; /* probably missing */
+ if (!_pv_update_struct_pv(pvl->pv, fid)) {
+ vg = NULL;
+ goto_out; /* FIXME error path */
+ }
+ }
+
+ dm_list_iterate_items(pvl, &vg->pvs_outdated) {
+ if (!_pv_update_struct_pv(pvl->pv, fid)) {
+ vg = NULL;
+ goto_out; /* FIXME error path */
+ }
}
lvmcache_update_vg(vg, 0);
@@ -582,7 +649,7 @@ int lvmetad_pv_lookup(struct cmd_context *cmd, struct id pvid, int *found)
if (!(cn = dm_config_find_node(reply.cft->root, "physical_volume")))
goto_out;
- else if (!_pv_populate_lvmcache(cmd, cn, 0))
+ else if (!_pv_populate_lvmcache(cmd, cn, NULL, 0))
goto_out;
out_success:
@@ -612,7 +679,7 @@ int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *f
goto out_success;
cn = dm_config_find_node(reply.cft->root, "physical_volume");
- if (!cn || !_pv_populate_lvmcache(cmd, cn, dev->dev))
+ if (!cn || !_pv_populate_lvmcache(cmd, cn, NULL, dev->dev))
goto_out;
out_success:
@@ -640,13 +707,63 @@ int lvmetad_pv_list_to_lvmcache(struct cmd_context *cmd)
if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes")))
for (cn = cn->child; cn; cn = cn->sib)
- _pv_populate_lvmcache(cmd, cn, 0);
+ _pv_populate_lvmcache(cmd, cn, NULL, 0);
daemon_reply_destroy(reply);
return 1;
}
+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids)
+{
+ struct vgnameid_list *vgnl;
+ struct id vgid;
+ const char *vgid_txt;
+ const char *vg_name;
+ daemon_reply reply;
+ struct dm_config_node *cn;
+
+ log_debug_lvmetad("Asking lvmetad for complete list of known VG ids/names");
+ reply = _lvmetad_send("vg_list", NULL);
+ if (!_lvmetad_handle_reply(reply, "list VGs", "", NULL)) {
+ daemon_reply_destroy(reply);
+ return_0;
+ }
+
+ if ((cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
+ for (cn = cn->child; cn; cn = cn->sib) {
+ vgid_txt = cn->key;
+ if (!id_read_format(&vgid, vgid_txt)) {
+ stack;
+ continue;
+ }
+
+ if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+ log_error("vgnameid_list allocation failed.");
+ return 0;
+ }
+
+ if (!(vg_name = dm_config_find_str(cn->child, "name", NULL))) {
+ log_error("vg_list no name found.");
+ return 0;
+ }
+
+ vgnl->vgid = dm_pool_strdup(cmd->mem, (char *)&vgid);
+ vgnl->vg_name = dm_pool_strdup(cmd->mem, vg_name);
+
+ if (!vgnl->vgid || !vgnl->vg_name) {
+ log_error("vgnameid_list member allocation failed.");
+ return 0;
+ }
+
+ dm_list_add(vgnameids, &vgnl->list);
+ }
+ }
+
+ daemon_reply_destroy(reply);
+ return 1;
+}
+
int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd)
{
struct volume_group *tmp;
@@ -835,6 +952,51 @@ int lvmetad_pv_found(const struct id *pvid, struct device *dev, const struct for
daemon_reply_int(reply, "seqno_after", -1) != daemon_reply_int(reply, "seqno_before", -1)))
log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);
+ /*
+ * pvscan --cache does not perform any lvmlockd locking, and
+ * pvscan --cache -aay skips autoactivation in lockd VGs.
+ *
+ * pvscan --cache populates lvmetad with VG metadata from disk.
+ * No lvmlockd locking is needed. It is expected that lockd VG
+ * metadata that is read by pvscan and populated in lvmetad may
+ * be immediately stale due to changes to the VG from other hosts
+ * during or after this pvscan. This is normal and not a problem.
+ * When a subsequent lvm command uses the VG, it will lock the VG
+ * with lvmlockd, read the VG from lvmetad, and update the cached
+ * copy from disk if necessary.
+ *
+ * pvscan --cache -aay does not activate LVs in lockd VGs because
+ * activation requires locking, and a lock-start operation is needed
+ * on a lockd VG before any locking can be performed in it.
+ *
+ * An equivalent of pvscan --cache -aay for lockd VGs is:
+ * 1. pvscan --cache
+ * 2. vgchange --lock-start
+ * 3. vgchange -aay -S 'locktype=sanlock || locktype=dlm'
+ *
+ * [We could eventually add support for autoactivating lockd VGs
+ * using pvscan by incorporating the lock start step (which can
+ * take a long time), but there may be a better option than
+ * continuing to overload pvscan.]
+ *
+ * Stages of starting a lockd VG:
+ *
+ * . pvscan --cache populates lockd VGs in lvmetad without locks,
+ * and this initial cached copy may quickly become stale.
+ *
+ * . vgchange --lock-start VG reads the VG without the VG lock
+ * because no locks are available until the locking is started.
+ * It only uses the VG name and lock_type from the VG metadata,
+ * and then only uses it to start the VG lockspace in lvmlockd.
+ *
+ * . Further lvm commands, e.g. activation, can then lock the VG
+ * with lvmlockd and use current VG metdata.
+ */
+ if (handler && vg && is_lockd_type(vg->lock_type)) {
+ log_debug_lvmetad("Skip pvscan activation for lockd type VG %s", vg->name);
+ handler = NULL;
+ }
+
if (result && handler) {
status = daemon_reply_str(reply, "status", "<missing>");
vgname = daemon_reply_str(reply, "vgname", "<missing>");
@@ -900,7 +1062,10 @@ struct _lvmetad_pvscan_baton {
static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
{
struct _lvmetad_pvscan_baton *b = baton;
- struct volume_group *this = mda->ops->vg_read(b->fid, "", mda, 1);
+ struct volume_group *this;
+
+ if (!(this = mda_is_ignored(mda) ? NULL : mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
+ return 1;
/* FIXME Also ensure contents match etc. */
if (!b->vg || this->seqno > b->vg->seqno)
@@ -911,8 +1076,102 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
return 1;
}
+/*
+ * The lock manager may detect that the vg cached in lvmetad is out of date,
+ * due to something like an lvcreate from another host.
+ * This is limited to changes that only affect the vg (not global state like
+ * orphan PVs), so we only need to reread mdas on the vg's existing pvs.
+ */
+
+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+ struct volume_group *vg_ret = NULL;
+ struct dm_config_tree *vgmeta_ret = NULL;
+ struct dm_config_tree *vgmeta;
+ struct pv_list *pvl;
+ struct lvmcache_info *info;
+ struct format_instance *fid;
+ struct format_instance_ctx fic = { .type = 0 };
+ struct _lvmetad_pvscan_baton baton;
+
+ dm_list_iterate_items(pvl, &vg->pvs) {
+ /* missing pv */
+ if (!pvl->pv->dev)
+ continue;
+
+ if (!(info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
+ log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv));
+ return NULL;
+ }
+
+ baton.vg = NULL;
+ baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+
+ if (!baton.fid)
+ return NULL;
+
+ if (baton.fid->fmt->features & FMT_OBSOLETE) {
+ log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+ baton.fid->fmt->name, dev_name(pvl->pv->dev));
+ lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+ return NULL;
+ }
+
+ lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
+
+ if (!baton.vg) {
+ lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+ return NULL;
+ }
+
+ if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
+ log_error("VG export to config tree failed");
+ release_vg(baton.vg);
+ return NULL;
+ }
+
+ if (!vgmeta_ret) {
+ vgmeta_ret = vgmeta;
+ } else {
+ if (!compare_config(vgmeta_ret->root, vgmeta->root)) {
+ log_error("VG metadata comparison failed");
+ dm_config_destroy(vgmeta);
+ dm_config_destroy(vgmeta_ret);
+ release_vg(baton.vg);
+ return NULL;
+ }
+ dm_config_destroy(vgmeta);
+ }
+
+ release_vg(baton.vg);
+ }
+
+ if (vgmeta_ret) {
+ fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+ if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) {
+ log_error("VG import from config tree failed");
+ lvmcache_fmt(info)->ops->destroy_instance(fid);
+ goto out;
+ }
+
+ /*
+ * Update lvmetad with the newly read version of the VG.
+ * The "precommitted" name is a misnomer in this case,
+ * but that is the field which lvmetad_vg_update() uses
+ * to send the metadata cft to lvmetad.
+ */
+ vg_ret->cft_precommitted = vgmeta_ret;
+ if (!lvmetad_vg_update(vg_ret))
+ log_error("Failed to update lvmetad with new VG meta");
+ vg_ret->cft_precommitted = NULL;
+ dm_config_destroy(vgmeta_ret);
+ }
+out:
+ return vg_ret;
+}
+
int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
- activation_handler handler)
+ activation_handler handler, int ignore_obsolete)
{
struct label *label;
struct lvmcache_info *info;
@@ -941,9 +1200,16 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
goto_bad;
if (baton.fid->fmt->features & FMT_OBSOLETE) {
- log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
- baton.fid->fmt->name, dev_name(dev));
+ if (ignore_obsolete)
+ log_warn("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+ baton.fid->fmt->name, dev_name(dev));
+ else
+ log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+ baton.fid->fmt->name, dev_name(dev));
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+
+ if (ignore_obsolete)
+ return 1;
return 0;
}
@@ -956,7 +1222,7 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
* can scan further devices.
*/
if (!baton.vg && !(baton.fid->fmt->features & FMT_MDAS))
- baton.vg = ((struct metadata_area *) dm_list_first(&baton.fid->metadata_areas_in_use))->ops->vg_read(baton.fid, lvmcache_vgname_from_info(info), NULL, 1);
+ baton.vg = ((struct metadata_area *) dm_list_first(&baton.fid->metadata_areas_in_use))->ops->vg_read(baton.fid, lvmcache_vgname_from_info(info), NULL, NULL, NULL, 1);
if (!baton.vg)
lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
@@ -982,7 +1248,8 @@ bad:
return 0;
}
-int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
+static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
+ int ignore_obsolete)
{
struct dev_iter *iter;
struct device *dev;
@@ -1024,7 +1291,7 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
stack;
break;
}
- if (!lvmetad_pvscan_single(cmd, dev, handler))
+ if (!lvmetad_pvscan_single(cmd, dev, handler, ignore_obsolete))
r = 0;
}
@@ -1039,3 +1306,340 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
return r;
}
+int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
+{
+ return _lvmetad_pvscan_all_devs(cmd, handler, 0);
+}
+
+/*
+ * FIXME Implement this function, skipping PVs known to belong to local or clustered,
+ * non-exported VGs.
+ */
+int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handler)
+{
+ return _lvmetad_pvscan_all_devs(cmd, handler, 1);
+}
+
+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg)
+{
+ char uuid[64];
+ daemon_reply reply;
+ int result;
+
+ if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
+ return_0;
+
+ reply = _lvmetad_send("vg_clear_outdated_pvs", "vgid = %s", uuid, NULL);
+ result = _lvmetad_handle_reply(reply, "clear the list of outdated PVs", vg->name, NULL);
+ daemon_reply_destroy(reply);
+
+ return result;
+}
+
+/*
+ * Records the state of cached PVs in lvmetad so we can look for changes
+ * after rescanning.
+ */
+struct pv_cache_list {
+ struct dm_list list;
+ dev_t devt;
+ struct id pvid;
+ const char *vgid;
+ unsigned found : 1;
+ unsigned update_udev : 1;
+};
+
+/*
+ * Get the list of PVs known to lvmetad.
+ */
+static int _lvmetad_get_pv_cache_list(struct cmd_context *cmd, struct dm_list *pvc_list)
+{
+ daemon_reply reply;
+ struct dm_config_node *cn;
+ struct pv_cache_list *pvcl;
+ const char *pvid_txt;
+ const char *vgid;
+
+ if (!lvmetad_active())
+ return 1;
+
+ log_debug_lvmetad("Asking lvmetad for complete list of known PVs");
+ reply = _lvmetad_send("pv_list", NULL);
+ if (!_lvmetad_handle_reply(reply, "list PVs", "", NULL)) {
+ log_error("lvmetad message failed.");
+ daemon_reply_destroy(reply);
+ return_0;
+ }
+
+ if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes"))) {
+ for (cn = cn->child; cn; cn = cn->sib) {
+ if (!(pvcl = dm_pool_zalloc(cmd->mem, sizeof(*pvcl)))) {
+ log_error("pv_cache_list allocation failed.");
+ return 0;
+ }
+
+ pvid_txt = cn->key;
+ if (!id_read_format(&pvcl->pvid, pvid_txt)) {
+ stack;
+ continue;
+ }
+
+ pvcl->devt = dm_config_find_int(cn->child, "device", 0);
+
+ if ((vgid = dm_config_find_str(cn->child, "vgid", NULL)))
+ pvcl->vgid = dm_pool_strdup(cmd->mem, vgid);
+
+ dm_list_add(pvc_list, &pvcl->list);
+ }
+ }
+
+ daemon_reply_destroy(reply);
+
+ return 1;
+}
+
+/*
+ * Opening the device RDWR should trigger a udev db update.
+ * FIXME: is there a better way to update the udev db than
+ * doing an open/close of the device? - For example writing
+ * "change" to /sys/block/<device>/uevent?
+ */
+static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt)
+{
+ struct device *dev;
+
+ log_debug_devs("device %d:%d open to update udev",
+ (int)MAJOR(devt), (int)MINOR(devt));
+
+ if (!(dev = dev_cache_get_by_devt(devt, cmd->lvmetad_filter))) {
+ log_error("_update_pv_in_udev no dev found");
+ return;
+ }
+
+ if (!dev_open(dev)) {
+ stack;
+ return;
+ }
+
+ if (!dev_close(dev))
+ stack;
+}
+
+/*
+ * Compare before and after PV lists from before/after rescanning,
+ * and update udev db for changes.
+ *
+ * For PVs that have changed pvid or vgid in lvmetad from rescanning,
+ * there may be information in the udev database to update, so open
+ * these devices to trigger a udev update.
+ *
+ * "before" refers to the list of pvs from lvmetad before rescanning
+ * "after" refers to the list of pvs from lvmetad after rescanning
+ *
+ * Comparing both lists, we can see which PVs changed (pvid or vgid),
+ * and trigger a udev db update for those.
+ */
+static void _update_changed_pvs_in_udev(struct cmd_context *cmd,
+ struct dm_list *pvc_before,
+ struct dm_list *pvc_after)
+{
+ struct pv_cache_list *before;
+ struct pv_cache_list *after;
+ char id_before[ID_LEN + 1] __attribute__((aligned(8)));
+ char id_after[ID_LEN + 1] __attribute__((aligned(8)));
+ int found;
+
+ dm_list_iterate_items(before, pvc_before) {
+ found = 0;
+
+ dm_list_iterate_items(after, pvc_after) {
+ if (after->found)
+ continue;
+
+ if (before->devt != after->devt)
+ continue;
+
+ if (!id_equal(&before->pvid, &after->pvid)) {
+ memset(id_before, 0, sizeof(id_before));
+ memset(id_after, 0, sizeof(id_after));
+ strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+ strncpy(&id_after[0], (char *) &after->pvid, sizeof(id_after) - 1);
+
+ log_debug_devs("device %d:%d changed pvid from %s to %s",
+ (int)MAJOR(before->devt), (int)MINOR(before->devt),
+ id_before, id_after);
+
+ before->update_udev = 1;
+
+ } else if ((before->vgid && !after->vgid) ||
+ (after->vgid && !before->vgid) ||
+ (before->vgid && after->vgid && strcmp(before->vgid, after->vgid))) {
+
+ log_debug_devs("device %d:%d changed vg from %s to %s",
+ (int)MAJOR(before->devt), (int)MINOR(before->devt),
+ before->vgid ?: "none", after->vgid ?: "none");
+
+ before->update_udev = 1;
+ }
+
+ after->found = 1;
+ before->found = 1;
+ found = 1;
+ break;
+ }
+
+ if (!found) {
+ memset(id_before, 0, sizeof(id_before));
+ strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+
+ log_debug_devs("device %d:%d pvid %s vg %s is gone",
+ (int)MAJOR(before->devt), (int)MINOR(before->devt),
+ id_before, before->vgid ? before->vgid : "none");
+
+ before->update_udev = 1;
+ }
+ }
+
+ dm_list_iterate_items(before, pvc_before) {
+ if (before->update_udev)
+ _update_pv_in_udev(cmd, before->devt);
+ }
+
+ dm_list_iterate_items(after, pvc_after) {
+ if (after->update_udev)
+ _update_pv_in_udev(cmd, after->devt);
+ }
+}
+
+/*
+ * Before this command was run, some external entity may have
+ * invalidated lvmetad's cache of global information, e.g. lvmlockd.
+ *
+ * The global information includes things like a new VG, a
+ * VG that was removed, the assignment of a PV to a VG;
+ * any change that is not isolated within a single VG.
+ *
+ * The external entity, like a lock manager, would invalidate
+ * the lvmetad global cache if it detected that the global
+ * information had been changed on disk by something other
+ * than a local lvm command, e.g. an lvm command on another
+ * host with access to the same devices. (How it detects
+ * the change is specific to lock manager or other entity.)
+ *
+ * The effect is that metadata on disk is newer than the metadata
+ * in the local lvmetad daemon, and the local lvmetad's cache
+ * should be updated from disk before this command uses it.
+ *
+ * So, using this function, a command checks if lvmetad's global
+ * cache is valid. If so, it does nothing. If not, it rescans
+ * devices to update the lvmetad cache, then it notifies lvmetad
+ * that it's cache is valid again (consistent with what's on disk.)
+ * This command can then go ahead and use the newly refreshed metadata.
+ *
+ * 1. Check if the lvmetad global cache is invalid.
+ * 2. If so, reread metadata from all devices and update the lvmetad cache.
+ * 3. Tell lvmetad that the global cache is now valid.
+ */
+
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
+{
+ struct dm_list pvc_before; /* pv_cache_list */
+ struct dm_list pvc_after; /* pv_cache_list */
+ daemon_reply reply;
+ int global_invalid;
+
+ dm_list_init(&pvc_before);
+ dm_list_init(&pvc_after);
+
+ if (!lvmlockd_use()) {
+ log_error(INTERNAL_ERROR "validate global cache without lvmlockd");
+ return;
+ }
+
+ if (!lvmetad_used())
+ return;
+
+ log_debug_lvmetad("Validating global lvmetad cache");
+
+ if (force)
+ goto do_scan;
+
+ reply = daemon_send_simple(_lvmetad, "get_global_info",
+ "token = %s", "skip",
+ NULL);
+
+ if (reply.error) {
+ log_error("lvmetad_validate_global_cache get_global_info error %d", reply.error);
+ goto do_scan;
+ }
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("lvmetad_validate_global_cache get_global_info not ok");
+ goto do_scan;
+ }
+
+ global_invalid = daemon_reply_int(reply, "global_invalid", -1);
+
+ daemon_reply_destroy(reply);
+
+ if (!global_invalid) {
+ /* cache is valid */
+ return;
+ }
+
+ do_scan:
+ /*
+ * Save the current state of pvs from lvmetad so after devices are
+ * scanned, we can compare to the new state to see if pvs changed.
+ */
+ _lvmetad_get_pv_cache_list(cmd, &pvc_before);
+
+ /*
+ * Update the local lvmetad cache so it correctly reflects any
+ * changes made on remote hosts.
+ */
+ lvmetad_pvscan_all_devs(cmd, NULL);
+
+ /*
+ * Clear the global_invalid flag in lvmetad.
+ * Subsequent local commands that read global state
+ * from lvmetad will not see global_invalid until
+ * another host makes another global change.
+ */
+ reply = daemon_send_simple(_lvmetad, "set_global_info",
+ "token = %s", "skip",
+ "global_invalid = %d", 0,
+ NULL);
+ if (reply.error)
+ log_error("lvmetad_validate_global_cache set_global_info error %d", reply.error);
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
+ log_error("lvmetad_validate_global_cache set_global_info not ok");
+
+ daemon_reply_destroy(reply);
+
+ /*
+ * Populate this command's lvmcache structures from lvmetad.
+ */
+ lvmcache_seed_infos_from_lvmetad(cmd);
+
+ /*
+ * Update the local udev database to reflect PV changes from
+ * other hosts.
+ *
+ * Compare the before and after PV lists, and if a PV's
+ * pvid or vgid has changed, then open that device to trigger
+ * a uevent to update the udev db.
+ *
+ * This has no direct benefit to lvm, but is just a best effort
+ * attempt to keep the udev db updated and reflecting current
+ * lvm information.
+ *
+ * FIXME: lvmcache_seed_infos_from_lvmetad() and _lvmetad_get_pv_cache_list()
+ * each get pv_list from lvmetad, and they could share a single pv_list reply.
+ */
+ if (!dm_list_empty(&pvc_before)) {
+ _lvmetad_get_pv_cache_list(cmd, &pvc_after);
+ _update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after);
+ }
+}
diff --git a/lib/cache/lvmetad.h b/lib/cache/lvmetad.h
index 2cd738ed9..af0d562fe 100644
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -29,8 +29,7 @@ typedef int (*activation_handler) (struct cmd_context *cmd,
#ifdef LVMETAD_SUPPORT
/*
- * Initialise the communication with lvmetad. Normally called by
- * lvmcache_init. Sets up a global handle for our process.
+ * Sets up a global handle for our process.
*/
void lvmetad_init(struct cmd_context *);
@@ -59,7 +58,9 @@ int lvmetad_socket_present(void);
/*
* Check whether lvmetad is active (where active means both that it is running
- * and that we have a working connection with it).
+ * and that we have a working connection with it). It opens new connection
+ * with lvmetad in the process when lvmetad is supposed to be used and the
+ * connection is not open yet.
*/
int lvmetad_active(void);
@@ -70,8 +71,9 @@ int lvmetad_active(void);
void lvmetad_connect_or_warn(void);
/*
- * Drop connection to lvmetad. A subsequent lvmetad_init() will re-establish
- * the connection (possibly at a different socket path).
+ * Drop connection to lvmetad. A subsequent lvmetad_connect_or_warn or
+ * lvmetad_active will re-establish the connection (possibly at a
+ * different socket path).
*/
void lvmetad_disconnect(void);
@@ -143,6 +145,12 @@ int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *f
int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd);
/*
+ * Request a list of vgid/vgname pairs for all VGs known to lvmetad.
+ * Does not do vg_lookup's on each VG, and does not populate lvmcache.
+ */
+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids);
+
+/*
* Find a VG by its ID or its name in the lvmetad cache. Gives NULL if the VG is
* not found.
*/
@@ -153,9 +161,13 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd,
* Scan a single device and update lvmetad with the result(s).
*/
int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
- activation_handler handler);
+ activation_handler handler, int ignore_obsolete);
int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler);
+int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handler);
+
+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);
# else /* LVMETAD_SUPPORT */
@@ -178,9 +190,13 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
# define lvmetad_pv_lookup(cmd, pvid, found) (0)
# define lvmetad_pv_lookup_by_dev(cmd, dev, found) (0)
# define lvmetad_vg_list_to_lvmcache(cmd) (1)
+# define lvmetad_get_vgnameids(cmd, vgnameids) do { } while (0)
# define lvmetad_vg_lookup(cmd, vgname, vgid) (NULL)
-# define lvmetad_pvscan_single(cmd, dev, handler) (0)
+# define lvmetad_pvscan_single(cmd, dev, handler, ignore_obsolete) (0)
# define lvmetad_pvscan_all_devs(cmd, handler) (0)
+# define lvmetad_pvscan_foreign_vgs(cmd, handler) (0)
+# define lvmetad_vg_clear_outdated_pvs(vg) (1)
+# define lvmetad_validate_global_cache(cmd, force) do { } while (0)
# endif /* LVMETAD_SUPPORT */
diff --git a/lib/cache_segtype/cache.c b/lib/cache_segtype/cache.c
index 73839f4f1..85cba1828 100644
--- a/lib/cache_segtype/cache.c
+++ b/lib/cache_segtype/cache.c
@@ -25,6 +25,11 @@
#include "lv_alloc.h"
#include "defaults.h"
+static const char _cache_module[] = "cache";
+
+/* TODO: using static field here, maybe should be a part of segment_type */
+static unsigned _feature_mask;
+
#define SEG_LOG_ERROR(t, p...) \
log_error(t " segment %s of logical volume %s.", ## p, \
dm_config_parent_name(sn), seg->lv->name), 0;
@@ -66,7 +71,7 @@ static int _cache_pool_text_import(struct lv_segment *seg,
if (dm_config_has_node(sn, "cache_mode")) {
if (!(str = dm_config_find_str(sn, "cache_mode", NULL)))
return SEG_LOG_ERROR("cache_mode must be a string in");
- if (!set_cache_pool_feature(&seg->feature_flags, str))
+ if (!cache_set_mode(seg, str))
return SEG_LOG_ERROR("Unknown cache_mode in");
}
@@ -75,9 +80,7 @@ static int _cache_pool_text_import(struct lv_segment *seg,
return SEG_LOG_ERROR("policy must be a string in");
if (!(seg->policy_name = dm_pool_strdup(mem, str)))
return SEG_LOG_ERROR("Failed to duplicate policy in");
- } else
- /* Cannot use 'just' default, so pick one */
- seg->policy_name = DEFAULT_CACHE_POOL_POLICY; /* FIXME make configurable */
+ }
/*
* Read in policy args:
@@ -97,6 +100,9 @@ static int _cache_pool_text_import(struct lv_segment *seg,
* If the policy is not present, default policy is used.
*/
if ((sn = dm_config_find_node(sn, "policy_settings"))) {
+ if (!seg->policy_name)
+ return SEG_LOG_ERROR("policy_settings must have a policy_name in");
+
if (sn->v)
return SEG_LOG_ERROR("policy_settings must be a section in");
@@ -125,24 +131,33 @@ static int _cache_pool_text_export(const struct lv_segment *seg,
{
const char *cache_mode;
- if (!(cache_mode = get_cache_pool_cachemode_name(seg)))
- return_0;
-
outf(f, "data = \"%s\"", seg_lv(seg, 0)->name);
outf(f, "metadata = \"%s\"", seg->metadata_lv->name);
outf(f, "chunk_size = %" PRIu32, seg->chunk_size);
- outf(f, "cache_mode = \"%s\"", cache_mode);
- if (seg->policy_name)
+ /*
+ * Cache pool used by a cache LV holds data. Not ideal,
+ * but not worth to break backward compatibility, by shifting
+ * content to cache segment
+ */
+ if (cache_mode_is_set(seg)) {
+ if (!(cache_mode = get_cache_mode_name(seg)))
+ return_0;
+ outf(f, "cache_mode = \"%s\"", cache_mode);
+ }
+
+ if (seg->policy_name) {
outf(f, "policy = \"%s\"", seg->policy_name);
- if (seg->policy_settings) {
- if (strcmp(seg->policy_settings->key, "policy_settings")) {
- log_error(INTERNAL_ERROR "Incorrect policy_settings tree, %s.",
- seg->policy_settings->key);
- return 0;
+ if (seg->policy_settings) {
+ if (strcmp(seg->policy_settings->key, "policy_settings")) {
+ log_error(INTERNAL_ERROR "Incorrect policy_settings tree, %s.",
+ seg->policy_settings->key);
+ return 0;
+ }
+ if (seg->policy_settings->child)
+ out_config_node(f, seg->policy_settings);
}
- out_config_node(f, seg->policy_settings);
}
return 1;
@@ -155,12 +170,29 @@ static void _destroy(struct segment_type *segtype)
#ifdef DEVMAPPER_SUPPORT
static int _target_present(struct cmd_context *cmd,
- const struct lv_segment *seg __attribute__((unused)),
- unsigned *attributes __attribute__((unused)))
+ const struct lv_segment *seg __attribute__((unused)),
+ unsigned *attributes __attribute__((unused)))
{
- uint32_t maj, min, patchlevel;
+ /* List of features with their kernel target version */
+ static const struct feature {
+ uint32_t maj;
+ uint32_t min;
+ unsigned cache_feature;
+ const char feature[12];
+ const char module[12]; /* check dm-%s */
+ } _features[] = {
+ { 1, 3, CACHE_FEATURE_POLICY_MQ, "policy_mq", "cache-mq" },
+ { 1, 8, CACHE_FEATURE_POLICY_SMQ, "policy_smq", "cache-smq" },
+ };
+ static const char _lvmconf[] = "global/cache_disabled_features";
+ static unsigned _attrs = 0;
static int _cache_checked = 0;
static int _cache_present = 0;
+ uint32_t maj, min, patchlevel;
+ unsigned i;
+ const struct dm_config_node *cn;
+ const struct dm_config_value *cv;
+ const char *str;
if (!_cache_checked) {
_cache_present = target_present(cmd, "cache", 1);
@@ -174,11 +206,53 @@ static int _target_present(struct cmd_context *cmd,
if ((maj < 1) ||
((maj == 1) && (min < 3))) {
- log_error("The cache kernel module is version %u.%u.%u."
- " Version 1.3.0+ is required.",
+ _cache_present = 0;
+ log_error("The cache kernel module is version %u.%u.%u. "
+ "Version 1.3.0+ is required.",
maj, min, patchlevel);
return 0;
}
+
+
+ for (i = 0; i < DM_ARRAY_SIZE(_features); ++i) {
+ if (((maj > _features[i].maj) ||
+ (maj == _features[i].maj && min >= _features[i].min)) &&
+ (!_features[i].module[0] || module_present(cmd, _features[i].module)))
+ _attrs |= _features[i].cache_feature;
+ else
+ log_very_verbose("Target %s does not support %s.",
+ _cache_module, _features[i].feature);
+ }
+ }
+
+ if (attributes) {
+ if (!_feature_mask) {
+ /* Support runtime lvm.conf changes, N.B. avoid 32 feature */
+ if ((cn = find_config_tree_array(cmd, global_cache_disabled_features_CFG, NULL))) {
+ for (cv = cn->v; cv; cv = cv->next) {
+ if (cv->type != DM_CFG_STRING) {
+ log_error("Ignoring invalid string in config file %s.",
+ _lvmconf);
+ continue;
+ }
+ str = cv->v.str;
+ if (!*str)
+ continue;
+ for (i = 0; i < DM_ARRAY_SIZE(_features); ++i)
+ if (strcasecmp(str, _features[i].feature) == 0)
+ _feature_mask |= _features[i].cache_feature;
+ }
+ }
+
+ _feature_mask = ~_feature_mask;
+
+ for (i = 0; i < DM_ARRAY_SIZE(_features); ++i)
+ if ((_attrs & _features[i].cache_feature) &&
+ !(_feature_mask & _features[i].cache_feature))
+ log_very_verbose("Target %s %s support disabled by %s",
+ _cache_module, _features[i].feature, _lvmconf);
+ }
+ *attributes = _attrs & _feature_mask;
}
return _cache_present;
@@ -280,9 +354,16 @@ static int _cache_add_target_line(struct dev_manager *dm,
struct dm_tree_node *node, uint64_t len,
uint32_t *pvmove_mirror_count __attribute__((unused)))
{
- struct lv_segment *cache_pool_seg = first_seg(seg->pool_lv);
+ struct lv_segment *cache_pool_seg;
char *metadata_uuid, *data_uuid, *origin_uuid;
+ if (!seg->pool_lv || !seg_is_cache(seg)) {
+ log_error(INTERNAL_ERROR "Passed segment is not cache.");
+ return 0;
+ }
+
+ cache_pool_seg = first_seg(seg->pool_lv);
+
if (!(metadata_uuid = build_dm_uuid(mem, cache_pool_seg->metadata_lv, NULL)))
return_0;
@@ -297,7 +378,9 @@ static int _cache_add_target_line(struct dev_manager *dm,
metadata_uuid,
data_uuid,
origin_uuid,
- seg->cleaner_policy ? "cleaner" : cache_pool_seg->policy_name,
+ seg->cleaner_policy ? "cleaner" :
+ /* undefined policy name -> likely an old "mq" */
+ cache_pool_seg->policy_name ? : "mq",
seg->cleaner_policy ? NULL : cache_pool_seg->policy_settings,
cache_pool_seg->chunk_size))
return_0;
@@ -359,5 +442,8 @@ int init_cache_segtypes(struct cmd_context *cmd,
return_0;
log_very_verbose("Initialised segtype: %s", segtype->name);
+ /* Reset mask for recalc */
+ _feature_mask = 0;
+
return 1;
}
diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c
index 3900179c5..11affee6c 100644
--- a/lib/commands/toolcontext.c
+++ b/lib/commands/toolcontext.c
@@ -30,6 +30,7 @@
#include "lvmcache.h"
#include "lvmetad.h"
#include "archiver.h"
+#include "lvmpolld-client.h"
#ifdef HAVE_LIBDL
#include "sharedlib.h"
@@ -55,6 +56,128 @@
static const size_t linebuffer_size = 4096;
+/*
+ * Copy the input string, removing invalid characters.
+ */
+const char *system_id_from_string(struct cmd_context *cmd, const char *str)
+{
+ char *system_id;
+
+ if (!str || !*str) {
+ log_warn("WARNING: Empty system ID supplied.");
+ return "";
+ }
+
+ if (!(system_id = dm_pool_zalloc(cmd->libmem, strlen(str) + 1))) {
+ log_warn("WARNING: Failed to allocate system ID.");
+ return NULL;
+ }
+
+ copy_systemid_chars(str, system_id);
+
+ if (!*system_id) {
+ log_warn("WARNING: Invalid system ID format: %s", str);
+ return NULL;
+ }
+
+ if (!strncmp(system_id, "localhost", 9)) {
+ log_warn("WARNING: system ID may not begin with the string \"localhost\".");
+ return NULL;
+ }
+
+ return system_id;
+}
+
+static const char *_read_system_id_from_file(struct cmd_context *cmd, const char *file)
+{
+ char *line = NULL;
+ size_t line_size;
+ char *start, *end;
+ const char *system_id = NULL;
+ FILE *fp;
+
+ if (!file || !strlen(file) || !file[0])
+ return_NULL;
+
+ if (!(fp = fopen(file, "r"))) {
+ log_warn("WARNING: %s: fopen failed: %s", file, strerror(errno));
+ return NULL;
+ }
+
+ while (getline(&line, &line_size, fp) > 0) {
+ start = line;
+
+ /* Ignore leading whitespace */
+ while (*start && isspace(*start))
+ start++;
+
+ /* Ignore rest of line after # */
+ if (!*start || *start == '#')
+ continue;
+
+ if (system_id && *system_id) {
+ log_warn("WARNING: Ignoring extra line(s) in system ID file %s.", file);
+ break;
+ }
+
+ /* Remove any comments from end of line */
+ for (end = start; *end; end++)
+ if (*end == '#') {
+ *end = '\0';
+ break;
+ }
+
+ system_id = system_id_from_string(cmd, start);
+ }
+
+ free(line);
+
+ if (fclose(fp))
+ stack;
+
+ return system_id;
+}
+
+static const char *_system_id_from_source(struct cmd_context *cmd, const char *source)
+{
+ char filebuf[PATH_MAX];
+ const char *file;
+ const char *etc_str;
+ const char *str;
+ const char *system_id = NULL;
+
+ if (!strcasecmp(source, "uname")) {
+ if (cmd->hostname)
+ system_id = system_id_from_string(cmd, cmd->hostname);
+ goto out;
+ }
+
+ /* lvm.conf and lvmlocal.conf are merged into one config tree */
+ if (!strcasecmp(source, "lvmlocal")) {
+ if ((str = find_config_tree_str(cmd, local_system_id_CFG, NULL)))
+ system_id = system_id_from_string(cmd, str);
+ goto out;
+ }
+
+ if (!strcasecmp(source, "machineid") || !strcasecmp(source, "machine-id")) {
+ etc_str = find_config_tree_str(cmd, global_etc_CFG, NULL);
+ if (dm_snprintf(filebuf, sizeof(filebuf), "%s/machine-id", etc_str) != -1)
+ system_id = _read_system_id_from_file(cmd, filebuf);
+ goto out;
+ }
+
+ if (!strcasecmp(source, "file")) {
+ file = find_config_tree_str(cmd, global_system_id_file_CFG, NULL);
+ system_id = _read_system_id_from_file(cmd, file);
+ goto out;
+ }
+
+ log_warn("WARNING: Unrecognised system_id_source \"%s\".", source);
+
+out:
+ return system_id;
+}
+
static int _get_env_vars(struct cmd_context *cmd)
{
const char *e;
@@ -122,8 +245,10 @@ static int _parse_debug_classes(struct cmd_context *cmd)
const struct dm_config_value *cv;
int debug_classes = 0;
- if (!(cn = find_config_tree_node(cmd, log_debug_classes_CFG, NULL)))
- return DEFAULT_LOGGED_DEBUG_CLASSES;
+ if (!(cn = find_config_tree_array(cmd, log_debug_classes_CFG, NULL))) {
+ log_error(INTERNAL_ERROR "Unable to find configuration for log/debug_classes.");
+ return -1;
+ }
for (cv = cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
@@ -151,6 +276,8 @@ static int _parse_debug_classes(struct cmd_context *cmd)
debug_classes |= LOG_CLASS_CACHE;
else if (!strcasecmp(cv->v.str, "locking"))
debug_classes |= LOG_CLASS_LOCKING;
+ else if (!strcasecmp(cv->v.str, "lvmpolld"))
+ debug_classes |= LOG_CLASS_LVMPOLLD;
else
log_verbose("Unrecognised value for log/debug_classes: %s", cv->v.str);
}
@@ -288,7 +415,59 @@ static int _check_config(struct cmd_context *cmd)
return 1;
}
-int process_profilable_config(struct cmd_context *cmd) {
+static const char *_set_time_format(struct cmd_context *cmd)
+{
+ /* Compared to strftime, we do not allow "newline" character - the %n in format. */
+ static const char *allowed_format_chars = "aAbBcCdDeFGghHIjklmMpPrRsStTuUVwWxXyYzZ%";
+ static const char *allowed_alternative_format_chars_e = "cCxXyY";
+ static const char *allowed_alternative_format_chars_o = "deHImMSuUVwWy";
+ static const char *chars_to_check;
+ const char *tf = find_config_tree_str(cmd, report_time_format_CFG, NULL);
+ const char *p_fmt;
+ size_t i;
+ char c;
+
+ if (!*tf) {
+ log_error("Configured time format is empty string.");
+ goto bad;
+ } else {
+ p_fmt = tf;
+ while ((c = *p_fmt)) {
+ if (c == '%') {
+ c = *++p_fmt;
+ if (c == 'E') {
+ c = *++p_fmt;
+ chars_to_check = allowed_alternative_format_chars_e;
+ } else if (c == 'O') {
+ c = *++p_fmt;
+ chars_to_check = allowed_alternative_format_chars_o;
+ } else
+ chars_to_check = allowed_format_chars;
+
+ for (i = 0; chars_to_check[i]; i++) {
+ if (c == chars_to_check[i])
+ break;
+ }
+ if (!chars_to_check[i])
+ goto_bad;
+ }
+ else if (isprint(c))
+ p_fmt++;
+ else {
+ log_error("Configured time format contains non-printable characters.");
+ goto bad;
+ }
+ }
+ }
+
+ return tf;
+bad:
+ log_error("Invalid time format \"%s\" supplied.", tf);
+ return NULL;
+}
+
+int process_profilable_config(struct cmd_context *cmd)
+{
if (!(cmd->default_settings.unit_factor =
dm_units_to_factor(find_config_tree_str(cmd, global_units_CFG, NULL),
&cmd->default_settings.unit_type, 1, NULL))) {
@@ -300,6 +479,46 @@ int process_profilable_config(struct cmd_context *cmd) {
cmd->report_binary_values_as_numeric = find_config_tree_bool(cmd, report_binary_values_as_numeric_CFG, NULL);
cmd->default_settings.suffix = find_config_tree_bool(cmd, global_suffix_CFG, NULL);
cmd->report_list_item_separator = find_config_tree_str(cmd, report_list_item_separator_CFG, NULL);
+ if (!(cmd->time_format = _set_time_format(cmd)))
+ return 0;
+
+ return 1;
+}
+
+static int _init_system_id(struct cmd_context *cmd)
+{
+ const char *source, *system_id;
+ int local_set = 0;
+
+ cmd->system_id = NULL;
+ cmd->unknown_system_id = 0;
+
+ system_id = find_config_tree_str_allow_empty(cmd, local_system_id_CFG, NULL);
+ if (system_id && *system_id)
+ local_set = 1;
+
+ source = find_config_tree_str(cmd, global_system_id_source_CFG, NULL);
+ if (!source)
+ source = "none";
+
+ /* Defining local system_id but not using it is probably a config mistake. */
+ if (local_set && strcmp(source, "lvmlocal"))
+ log_warn("WARNING: local/system_id is set, so should global/system_id_source be \"lvmlocal\" not \"%s\"?", source);
+
+ if (!strcmp(source, "none"))
+ return 1;
+
+ if ((system_id = _system_id_from_source(cmd, source)) && *system_id) {
+ cmd->system_id = system_id;
+ return 1;
+ }
+
+ /*
+ * The source failed to resolve a system_id. In this case allow
+ * VGs with no system_id to be accessed, but not VGs with a system_id.
+ */
+ log_warn("WARNING: No system ID found from system_id_source %s.", source);
+ cmd->unknown_system_id = 1;
return 1;
}
@@ -307,12 +526,12 @@ int process_profilable_config(struct cmd_context *cmd) {
static int _process_config(struct cmd_context *cmd)
{
mode_t old_umask;
+ const char *dev_ext_info_src;
const char *read_ahead;
struct stat st;
const struct dm_config_node *cn;
const struct dm_config_value *cv;
int64_t pv_min_kb;
- const char *lvmetad_socket;
int udev_disabled = 0;
char sysfs_dir[PATH_MAX];
@@ -340,6 +559,16 @@ static int _process_config(struct cmd_context *cmd)
return_0;
#endif
+ dev_ext_info_src = find_config_tree_str(cmd, devices_external_device_info_source_CFG, NULL);
+ if (!strcmp(dev_ext_info_src, "none"))
+ init_external_device_info_source(DEV_EXT_NONE);
+ else if (!strcmp(dev_ext_info_src, "udev"))
+ init_external_device_info_source(DEV_EXT_UDEV);
+ else {
+ log_error("Invalid external device info source specification.");
+ return 0;
+ }
+
/* proc dir */
if (dm_snprintf(cmd->proc_dir, sizeof(cmd->proc_dir), "%s",
find_config_tree_str(cmd, global_proc_CFG, NULL)) < 0) {
@@ -423,7 +652,7 @@ static int _process_config(struct cmd_context *cmd)
}
}
- if ((cn = find_config_tree_node(cmd, activation_mlock_filter_CFG, NULL)))
+ if ((cn = find_config_tree_array(cmd, activation_mlock_filter_CFG, NULL)))
for (cv = cn->v; cv; cv = cv->next)
if ((cv->type != DM_CFG_STRING) || !cv->v.str[0])
log_error("Ignoring invalid activation/mlock_filter entry in config file");
@@ -445,29 +674,8 @@ static int _process_config(struct cmd_context *cmd)
init_detect_internal_vg_cache_corruption
(find_config_tree_bool(cmd, global_detect_internal_vg_cache_corruption_CFG, NULL));
- lvmetad_disconnect();
-
- lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
- if (!lvmetad_socket)
- lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
-
- /* TODO?
- lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
- DEFAULT_RUN_DIR "/lvmetad.socket");
- */
- lvmetad_set_socket(lvmetad_socket);
- cn = find_config_tree_node(cmd, devices_global_filter_CFG, NULL);
- lvmetad_set_token(cn ? cn->v : NULL);
-
- if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
- find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
- log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
- "Clustered environment not supported by lvmetad yet.");
- lvmetad_set_active(NULL, 0);
- } else
- lvmetad_set_active(NULL, find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
-
- lvmetad_init(cmd);
+ if (!_init_system_id(cmd))
+ return_0;
return 1;
}
@@ -526,11 +734,12 @@ static int _init_tags(struct cmd_context *cmd, struct dm_config_tree *cft)
const char *tag;
int passes;
- if (!(tn = find_config_tree_node(cmd, tags_CFG_SECTION, NULL)) || !tn->child)
+ /* Access tags section directly */
+ if (!(tn = find_config_node(cmd, cft, tags_CFG_SECTION)) || !tn->child)
return 1;
/* NB hosttags 0 when already 1 intentionally does not delete the tag */
- if (!cmd->hosttags && find_config_tree_bool(cmd, tags_hosttags_CFG, NULL)) {
+ if (!cmd->hosttags && find_config_bool(cmd, cft, tags_hosttags_CFG)) {
/* FIXME Strip out invalid chars: only A-Za-z0-9_+.- */
if (!_set_tag(cmd, cmd->hostname))
return_0;
@@ -561,7 +770,7 @@ static int _init_tags(struct cmd_context *cmd, struct dm_config_tree *cft)
return 1;
}
-static int _load_config_file(struct cmd_context *cmd, const char *tag)
+static int _load_config_file(struct cmd_context *cmd, const char *tag, int local)
{
static char config_file[PATH_MAX] = "";
const char *filler = "";
@@ -569,6 +778,10 @@ static int _load_config_file(struct cmd_context *cmd, const char *tag)
if (*tag)
filler = "_";
+ else if (local) {
+ filler = "";
+ tag = "local";
+ }
if (dm_snprintf(config_file, sizeof(config_file), "%s/lvm%s%s.conf",
cmd->system_dir, filler, tag) < 0) {
@@ -596,7 +809,9 @@ static int _load_config_file(struct cmd_context *cmd, const char *tag)
return 1;
}
-/* Find and read first config file */
+/*
+ * Find and read lvm.conf.
+ */
static int _init_lvm_conf(struct cmd_context *cmd)
{
/* No config file if LVM_SYSTEM_DIR is empty */
@@ -608,7 +823,7 @@ static int _init_lvm_conf(struct cmd_context *cmd)
return 1;
}
- if (!_load_config_file(cmd, ""))
+ if (!_load_config_file(cmd, "", 0))
return_0;
return 1;
@@ -621,7 +836,7 @@ static int _init_tag_configs(struct cmd_context *cmd)
/* Tag list may grow while inside this loop */
dm_list_iterate_items(sl, &cmd->tags) {
- if (!_load_config_file(cmd, sl->str))
+ if (!_load_config_file(cmd, sl->str, 0))
return_0;
}
@@ -768,15 +983,9 @@ static int _init_dev_cache(struct cmd_context *cmd)
init_obtain_device_list_from_udev(device_list_from_udev);
- if (!(cn = find_config_tree_node(cmd, devices_scan_CFG, NULL))) {
- if (!dev_cache_add_dir("/dev")) {
- log_error("Failed to add /dev to internal "
- "device cache");
- return 0;
- }
- log_verbose("device/scan not in config file: "
- "Defaulting to /dev");
- return 1;
+ if (!(cn = find_config_tree_array(cmd, devices_scan_CFG, NULL))) {
+ log_error(INTERNAL_ERROR "Unable to find configuration for devices/scan.");
+ return_0;
}
for (cv = cn->v; cv; cv = cv->next) {
@@ -814,7 +1023,7 @@ static int _init_dev_cache(struct cmd_context *cmd)
}
}
- if (!(cn = find_config_tree_node(cmd, devices_loopfiles_CFG, NULL)))
+ if (!(cn = find_config_tree_array(cmd, devices_loopfiles_CFG, NULL)))
return 1;
for (cv = cn->v; cv; cv = cv->next) {
@@ -835,7 +1044,7 @@ static int _init_dev_cache(struct cmd_context *cmd)
return 1;
}
-#define MAX_FILTERS 7
+#define MAX_FILTERS 8
static struct dev_filter *_init_lvmetad_filter_chain(struct cmd_context *cmd)
{
@@ -905,7 +1114,14 @@ static struct dev_filter *_init_lvmetad_filter_chain(struct cmd_context *cmd)
nr_filt++;
}
- if (!(composite = composite_filter_create(nr_filt, filters)))
+ /* firmware raid filter. Optional, non-critical. */
+ if (find_config_tree_bool(cmd, devices_fw_raid_component_detection_CFG, NULL)) {
+ init_fwraid_filtering(1);
+ if ((filters[nr_filt] = fwraid_filter_create(cmd->dev_types)))
+ nr_filt++;
+ }
+
+ if (!(composite = composite_filter_create(nr_filt, 1, filters)))
goto_bad;
return composite;
@@ -926,7 +1142,7 @@ bad:
* sysfs filter -> global regex filter -> type filter ->
* usable device filter(FILTER_MODE_PRE_LVMETAD) ->
* mpath component filter -> partitioned filter ->
- * md component filter
+ * md component filter -> fw raid filter
*
* - cmd->filter - the filter chain used for lvmetad responses:
* persistent filter -> usable device filter(FILTER_MODE_POST_LVMETAD) ->
@@ -942,15 +1158,21 @@ bad:
* global regex filter -> type filter ->
* usable device filter(FILTER_MODE_NO_LVMETAD) ->
* mpath component filter -> partitioned filter ->
- * md component filter
+ * md component filter -> fw raid filter
*
*/
-static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
{
const char *dev_cache;
struct dev_filter *filter = NULL, *filter_components[2] = {0};
struct stat st;
const struct dm_config_node *cn;
+ struct timespec ts, cts;
+
+ if (!cmd->initialized.connections) {
+ log_error(INTERNAL_ERROR "connections must be initialized before filters");
+ return 0;
+ }
cmd->dump_filter = 0;
@@ -981,11 +1203,11 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
}
/* filter component 1 */
- if ((cn = find_config_tree_node(cmd, devices_filter_CFG, NULL))) {
+ if ((cn = find_config_tree_array(cmd, devices_filter_CFG, NULL))) {
if (!(filter_components[1] = regex_filter_create(cn->v)))
goto_bad;
/* we have two filter components - create composite filter */
- if (!(filter = composite_filter_create(2, filter_components)))
+ if (!(filter = composite_filter_create(2, 0, filter_components)))
goto_bad;
} else
/* we have only one filter component - no need to create composite filter */
@@ -1004,7 +1226,7 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
if (lvmetad_used()) {
filter_components[0] = cmd->lvmetad_filter;
filter_components[1] = cmd->filter;
- if (!(cmd->full_filter = composite_filter_create(2, filter_components)))
+ if (!(cmd->full_filter = composite_filter_create(2, 0, filter_components)))
goto_bad;
} else
cmd->full_filter = filter;
@@ -1023,12 +1245,16 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
*/
if (!find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL) &&
load_persistent_cache && !cmd->is_long_lived &&
- !stat(dev_cache, &st) &&
- (st.st_ctime > config_file_timestamp(cmd->cft)) &&
- !persistent_filter_load(cmd->filter, NULL))
- log_verbose("Failed to load existing device cache from %s",
- dev_cache);
+ !stat(dev_cache, &st)) {
+ lvm_stat_ctim(&ts, &st);
+ cts = config_file_timestamp(cmd->cft);
+ if (timespeccmp(&ts, &cts, >) &&
+ !persistent_filter_load(cmd->filter, NULL))
+ log_verbose("Failed to load existing device cache from %s",
+ dev_cache);
+ }
+ cmd->initialized.filters = 1;
return 1;
bad:
if (!filter) {
@@ -1052,6 +1278,7 @@ bad:
if (cmd->lvmetad_filter)
cmd->lvmetad_filter->destroy(cmd->lvmetad_filter);
+ cmd->initialized.filters = 0;
return 0;
}
@@ -1095,7 +1322,7 @@ static int _init_formats(struct cmd_context *cmd)
#ifdef HAVE_LIBDL
/* Load any formats in shared libs if not static */
if (!is_static() &&
- (cn = find_config_tree_node(cmd, global_format_libraries_CFG, NULL))) {
+ (cn = find_config_tree_array(cmd, global_format_libraries_CFG, NULL))) {
const struct dm_config_value *cv;
struct format_type *(*init_format_fn) (struct cmd_context *);
@@ -1261,7 +1488,7 @@ static int _init_segtypes(struct cmd_context *cmd)
#ifdef HAVE_LIBDL
/* Load any formats in shared libs unless static */
if (!is_static() &&
- (cn = find_config_tree_node(cmd, global_segment_libraries_CFG, NULL))) {
+ (cn = find_config_tree_array(cmd, global_segment_libraries_CFG, NULL))) {
const struct dm_config_value *cv;
int (*init_multiple_segtypes_fn) (struct cmd_context *,
@@ -1425,11 +1652,80 @@ static int _reopen_stream(FILE *stream, int fd, const char *mode, const char *na
return 1;
}
+static int _init_lvmetad(struct cmd_context *cmd)
+{
+ const struct dm_config_node *cn;
+ const char *lvmetad_socket;
+
+ lvmetad_disconnect();
+
+ lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
+ if (!lvmetad_socket)
+ lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
+
+ /* TODO?
+ lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
+ DEFAULT_RUN_DIR "/lvmetad.socket");
+ */
+
+ lvmetad_set_socket(lvmetad_socket);
+ cn = find_config_tree_array(cmd, devices_global_filter_CFG, NULL);
+ lvmetad_set_token(cn ? cn->v : NULL);
+
+ if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
+ find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
+ log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
+ "Clustered environment not supported by lvmetad yet.");
+ lvmetad_set_active(NULL, 0);
+ } else
+ lvmetad_set_active(NULL, find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
+
+ lvmetad_init(cmd);
+ return 1;
+}
+
+static int _init_lvmpolld(struct cmd_context *cmd)
+{
+ const char *lvmpolld_socket;
+
+ lvmpolld_disconnect();
+
+ lvmpolld_socket = getenv("LVM_LVMPOLLD_SOCKET");
+ if (!lvmpolld_socket)
+ lvmpolld_socket = DEFAULT_RUN_DIR "/lvmpolld.socket";
+ lvmpolld_set_socket(lvmpolld_socket);
+
+ lvmpolld_set_active(find_config_tree_bool(cmd, global_use_lvmpolld_CFG, NULL));
+ return 1;
+}
+
+int init_connections(struct cmd_context *cmd)
+{
+
+ if (!_init_lvmetad(cmd)) {
+ log_error("Failed to initialize lvmetad connection.");
+ goto bad;
+ }
+
+ if (!_init_lvmpolld(cmd)) {
+ log_error("Failed to initialize lvmpolld connection.");
+ goto bad;
+ }
+
+ cmd->initialized.connections = 1;
+ return 1;
+bad:
+ cmd->initialized.connections = 0;
+ return 0;
+}
+
/* Entry point */
struct cmd_context *create_toolcontext(unsigned is_long_lived,
const char *system_dir,
unsigned set_buffering,
- unsigned threaded)
+ unsigned threaded,
+ unsigned set_connections,
+ unsigned set_filters)
{
struct cmd_context *cmd;
FILE *new_stream;
@@ -1550,6 +1846,10 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
if (!_init_tags(cmd, cmd->cft))
goto_out;
+ /* Load lvmlocal.conf */
+ if (*cmd->system_dir && !_load_config_file(cmd, "", 1))
+ goto_out;
+
if (!_init_tag_configs(cmd))
goto_out;
@@ -1563,15 +1863,12 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
goto_out;
if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
- find_config_tree_node(cmd, devices_types_CFG, NULL))))
+ find_config_tree_array(cmd, devices_types_CFG, NULL))))
goto_out;
if (!_init_dev_cache(cmd))
goto_out;
- if (!_init_filters(cmd, 1))
- goto_out;
-
memlock_init(cmd);
if (!_init_formats(cmd))
@@ -1590,12 +1887,18 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
_init_globals(cmd);
+ if (set_connections && !init_connections(cmd))
+ return_0;
+
+ if (set_filters && !init_filters(cmd, 1))
+ goto_out;
+
cmd->default_settings.cache_vgmetadata = 1;
cmd->current_settings = cmd->default_settings;
- cmd->config_initialized = 1;
+ cmd->initialized.config = 1;
out:
- if (!cmd->config_initialized) {
+ if (!cmd->initialized.config) {
destroy_toolcontext(cmd);
cmd = NULL;
}
@@ -1667,14 +1970,19 @@ static void _destroy_filters(struct cmd_context *cmd)
cmd->full_filter->destroy(cmd->full_filter);
cmd->lvmetad_filter = cmd->filter = cmd->full_filter = NULL;
}
+ cmd->initialized.filters = 0;
}
int refresh_filters(struct cmd_context *cmd)
{
int r, saved_ignore_suspended_devices = ignore_suspended_devices();
+ if (!cmd->initialized.filters)
+ /* if filters not initialized, there's nothing to refresh */
+ return 1;
+
_destroy_filters(cmd);
- if (!(r = _init_filters(cmd, 0)))
+ if (!(r = init_filters(cmd, 0)))
stack;
/*
@@ -1703,7 +2011,6 @@ int refresh_toolcontext(struct cmd_context *cmd)
label_exit();
_destroy_segtypes(&cmd->segtypes);
_destroy_formats(cmd, &cmd->formats);
- _destroy_filters(cmd);
if (!dev_cache_exit())
stack;
@@ -1721,7 +2028,7 @@ int refresh_toolcontext(struct cmd_context *cmd)
_destroy_config(cmd);
- cmd->config_initialized = 0;
+ cmd->initialized.config = 0;
cmd->hosttags = 0;
@@ -1754,6 +2061,10 @@ int refresh_toolcontext(struct cmd_context *cmd)
if (!_init_tags(cmd, cft_tmp))
return_0;
+ /* Load lvmlocal.conf */
+ if (*cmd->system_dir && !_load_config_file(cmd, "", 1))
+ return_0;
+
/* Doesn't change cmd->cft */
if (!_init_tag_configs(cmd))
return_0;
@@ -1774,15 +2085,12 @@ int refresh_toolcontext(struct cmd_context *cmd)
return_0;
if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
- find_config_tree_node(cmd, devices_types_CFG, NULL))))
+ find_config_tree_array(cmd, devices_types_CFG, NULL))))
return_0;
if (!_init_dev_cache(cmd))
return_0;
- if (!_init_filters(cmd, 0))
- return_0;
-
if (!_init_formats(cmd))
return_0;
@@ -1795,7 +2103,13 @@ int refresh_toolcontext(struct cmd_context *cmd)
if (!_init_backup(cmd))
return_0;
- cmd->config_initialized = 1;
+ cmd->initialized.config = 1;
+
+ if (cmd->initialized.connections && !init_connections(cmd))
+ return_0;
+
+ if (!refresh_filters(cmd))
+ return_0;
reset_lvm_errno(1);
return 1;
@@ -1864,6 +2178,7 @@ void destroy_toolcontext(struct cmd_context *cmd)
lvmetad_release_token();
lvmetad_disconnect();
+ lvmpolld_disconnect();
release_log_memory();
activation_exit();
diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h
index d99cc280b..04401636f 100644
--- a/lib/commands/toolcontext.h
+++ b/lib/commands/toolcontext.h
@@ -60,28 +60,59 @@ struct config_tree_list {
struct dm_config_tree *cft;
};
+struct cmd_context_initialized_parts {
+ unsigned config:1; /* used to reinitialize config if previous init was not successful */
+ unsigned filters:1;
+ unsigned connections:1;
+};
+
/* FIXME Split into tool & library contexts */
/* command-instance-related variables needed by library */
struct cmd_context {
- struct dm_pool *libmem; /* For permanent config data */
- struct dm_pool *mem; /* Transient: Cleared between each command */
-
- const struct format_type *fmt; /* Current format to use by default */
- struct format_type *fmt_backup; /* Format to use for backups */
-
- struct dm_list formats; /* Available formats */
- struct dm_list segtypes; /* Available segment types */
- const char *hostname;
- const char *kernel_vsn;
+ /*
+ * Memory handlers.
+ */
+ struct dm_pool *libmem; /* for permanent config data */
+ struct dm_pool *mem; /* transient: cleared between each command */
- unsigned rand_seed;
- char *linebuffer;
+ /*
+ * Command line and arguments.
+ */
const char *cmd_line;
struct command *command;
char **argv;
struct arg_values *arg_values;
struct dm_list arg_value_groups;
- unsigned is_long_lived:1; /* Optimises persistent_filter handling */
+
+ /*
+ * Format handlers.
+ */
+ const struct format_type *fmt; /* current format to use by default */
+ struct format_type *fmt_backup; /* format to use for backups */
+ struct dm_list formats; /* available formats */
+ struct dm_list segtypes; /* available segment types */
+
+ /*
+ * Machine and system identification.
+ */
+ const char *system_id;
+ const char *hostname;
+ const char *kernel_vsn;
+
+ /*
+ * Device identification.
+ */
+ struct dev_types *dev_types; /* recognized extra device types. */
+
+ /*
+ * Initialization state.
+ */
+ struct cmd_context_initialized_parts initialized;
+
+ /*
+ * Switches.
+ */
+ unsigned is_long_lived:1; /* optimises persistent_filter handling */
unsigned handles_missing_pvs:1;
unsigned handles_unknown_segments:1;
unsigned use_linear_target:1;
@@ -92,55 +123,72 @@ struct cmd_context {
unsigned report_binary_values_as_numeric:1;
unsigned metadata_read_only:1;
unsigned ignore_clustered_vgs:1;
- unsigned threaded:1; /* Set if running within a thread e.g. clvmd */
-
- unsigned independent_metadata_areas:1; /* Active formats have MDAs outside PVs */
-
- struct dev_types *dev_types;
+ unsigned threaded:1; /* set if running within a thread e.g. clvmd */
+ unsigned independent_metadata_areas:1; /* active formats have MDAs outside PVs */
+ unsigned unknown_system_id:1;
+ unsigned include_foreign_vgs:1; /* report/display cmds can reveal foreign VGs */
+ unsigned include_shared_vgs:1; /* report/display cmds can reveal lockd VGs */
+ unsigned include_active_foreign_vgs:1; /* cmd should process foreign VGs with active LVs */
+ unsigned vg_read_print_access_error:1; /* print access errors from vg_read */
+ unsigned lockd_gl_disable:1;
+ unsigned lockd_vg_disable:1;
+ unsigned lockd_lv_disable:1;
+ unsigned lockd_gl_removed:1;
+ unsigned lockd_vg_default_sh:1;
+ unsigned lockd_vg_enforce_sh:1;
/*
- * Use of filters depends on whether lvmetad is used or not:
- *
- * - if lvmetad is used:
- * - cmd->lvmetad_filter used when scanning devices for lvmetad
- * - cmd->filter used when processing lvmetad responses
- * - cmd->full_filter used for remaining situations
- *
- * - if lvmetad is not used:
- * - cmd->lvmetad_filter is NULL
- * - cmd->filter == cmd->full_filter used for all situations
- *
+ * Filtering.
*/
- struct dev_filter *lvmetad_filter;
- struct dev_filter *filter;
- struct dev_filter *full_filter;
- int dump_filter; /* Dump filter when exiting? */
-
- struct dm_list config_files; /* master lvm config + any existing tag configs */
- struct profile_params *profile_params; /* profile handling params including loaded profile configs */
- struct dm_config_tree *cft; /* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
- int config_initialized; /* used to reinitialize config if previous init was not successful */
-
- struct dm_hash_table *cft_def_hash; /* config definition hash used for validity check (item type + item recognized) */
+ struct dev_filter *lvmetad_filter; /* pre-lvmetad filter chain */
+ struct dev_filter *filter; /* post-lvmetad filter chain */
+ struct dev_filter *full_filter; /* lvmetad_filter + filter */
+ int dump_filter; /* Dump filter when exiting? */
- /* selected settings with original default/configured value which can be changed during cmd processing */
- struct config_info default_settings;
- /* may contain changed values compared to default_settings */
- struct config_info current_settings;
+ /*
+ * Configuration.
+ */
+ struct dm_list config_files; /* master lvm config + any existing tag configs */
+ struct profile_params *profile_params; /* profile handling params including loaded profile configs */
+ struct dm_config_tree *cft; /* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
+ struct dm_hash_table *cft_def_hash; /* config definition hash used for validity check (item type + item recognized) */
+ struct config_info default_settings; /* selected settings with original default/configured value which can be changed during cmd processing */
+ struct config_info current_settings; /* may contain changed values compared to default_settings */
+ /*
+ * Archives and backups.
+ */
struct archive_params *archive_params;
struct backup_params *backup_params;
const char *stripe_filler;
- /* List of defined tags */
- struct dm_list tags;
- const char *report_list_item_separator;
+ /*
+ * Host tags.
+ */
+ struct dm_list tags; /* list of defined tags */
int hosttags;
- const char *lib_dir; /* Cache value global/library_dir */
+ /*
+ * Paths.
+ */
+ const char *lib_dir; /* cache value global/library_dir */
char system_dir[PATH_MAX];
char dev_dir[PATH_MAX];
char proc_dir[PATH_MAX];
+
+ /*
+ * Buffers.
+ */
+ char display_buffer[NAME_LEN * 10]; /* ring buffer for upto 10 longest vg/lv names */
+ unsigned display_lvname_idx; /* index to ring buffer */
+ char *linebuffer;
+
+ /*
+ * Others - unsorted.
+ */
+ const char *report_list_item_separator;
+ const char *time_format;
+ unsigned rand_seed;
};
/*
@@ -150,14 +198,20 @@ struct cmd_context {
struct cmd_context *create_toolcontext(unsigned is_long_lived,
const char *system_dir,
unsigned set_buffering,
- unsigned threaded);
+ unsigned threaded,
+ unsigned set_connections,
+ unsigned set_filters);
void destroy_toolcontext(struct cmd_context *cmd);
int refresh_toolcontext(struct cmd_context *cmd);
int refresh_filters(struct cmd_context *cmd);
int process_profilable_config(struct cmd_context *cmd);
int config_files_changed(struct cmd_context *cmd);
int init_lvmcache_orphans(struct cmd_context *cmd);
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache);
+int init_connections(struct cmd_context *cmd);
struct format_type *get_format_by_name(struct cmd_context *cmd, const char *format);
+const char *system_id_from_string(struct cmd_context *cmd, const char *str);
+
#endif
diff --git a/lib/config/config.c b/lib/config/config.c
index 58efb3131..ac1049d1e 100644
--- a/lib/config/config.c
+++ b/lib/config/config.c
@@ -53,7 +53,7 @@ struct config_file {
struct config_source {
config_source_t type;
- time_t timestamp;
+ struct timespec timestamp;
union {
struct config_file *file;
struct config_file *profile;
@@ -65,11 +65,11 @@ struct config_source {
* Map each ID to respective definition of the configuration item.
*/
static struct cfg_def_item _cfg_def_items[CFG_COUNT + 1] = {
-#define cfg_section(id, name, parent, flags, since_version, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, comment},
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, comment},
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, comment},
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, {0}, deprecated_since_version, deprecation_comment, comment},
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
#include "config_settings.h"
#undef cfg_section
#undef cfg
@@ -173,7 +173,7 @@ int config_file_check(struct dm_config_tree *cft, const char **filename, struct
return 0;
}
- cs->timestamp = info->st_ctime;
+ lvm_stat_ctim(&cs->timestamp, info);
cf->exists = 1;
cf->st_size = info->st_size;
@@ -193,6 +193,7 @@ int config_file_changed(struct dm_config_tree *cft)
struct config_source *cs = dm_config_get_custom(cft);
struct config_file *cf;
struct stat info;
+ struct timespec ts;
if (cs->type != CONFIG_FILE) {
log_error(INTERNAL_ERROR "config_file_changed: expected file config source, "
@@ -226,7 +227,9 @@ int config_file_changed(struct dm_config_tree *cft)
}
/* Unchanged? */
- if (cs->timestamp == info.st_ctime && cf->st_size == info.st_size)
+ lvm_stat_ctim(&ts, &info);
+ if ((timespeccmp(&cs->timestamp, &ts, ==)) &&
+ cf->st_size == info.st_size)
return 0;
reload:
@@ -478,9 +481,15 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
return 0;
}
+/*
+ * When checksum_only is set, the checksum of buffer is only matched
+ * and function avoids parsing of mda into config tree which
+ * remains unmodified and should not be used.
+ */
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
off_t offset, size_t size, off_t offset2, size_t size2,
- checksum_fn_t checksum_fn, uint32_t checksum)
+ checksum_fn_t checksum_fn, uint32_t checksum,
+ int checksum_only)
{
char *fb, *fe;
int r = 0;
@@ -529,9 +538,11 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
goto out;
}
- fe = fb + size + size2;
- if (!dm_config_parse(cft, fb, fe))
- goto_out;
+ if (!checksum_only) {
+ fe = fb + size + size2;
+ if (!dm_config_parse(cft, fb, fe))
+ goto_out;
+ }
r = 1;
@@ -570,12 +581,15 @@ int config_file_read(struct dm_config_tree *cft)
if (!(cf->dev = dev_create_file(filename, NULL, NULL, 1)))
return_0;
- if (!dev_open_readonly_buffered(cf->dev))
+ if (!dev_open_readonly_buffered(cf->dev)) {
+ dev_destroy_file(cf->dev);
+ cf->dev = NULL;
return_0;
+ }
}
r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0,
- (checksum_fn_t) NULL, 0);
+ (checksum_fn_t) NULL, 0, 0);
if (!cf->keep_open) {
if (!dev_close(cf->dev))
@@ -586,13 +600,14 @@ int config_file_read(struct dm_config_tree *cft)
return r;
}
-time_t config_file_timestamp(struct dm_config_tree *cft)
+struct timespec config_file_timestamp(struct dm_config_tree *cft)
{
struct config_source *cs = dm_config_get_custom(cft);
return cs->timestamp;
}
#define cfg_def_get_item_p(id) (&_cfg_def_items[id])
+#define cfg_def_get_default_unconfigured_value_hint(cmd,item) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_unconfigured_value.fn_UNCONFIGURED(cmd) : item->default_unconfigured_value.v_UNCONFIGURED)
#define cfg_def_get_default_value_hint(cmd,item,type,profile) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_value.fn_##type(cmd,profile) : item->default_value.v_##type)
#define cfg_def_get_default_value(cmd,item,type,profile) (item->flags & CFG_DEFAULT_UNDEFINED ? 0 : cfg_def_get_default_value_hint(cmd,item,type,profile))
@@ -649,27 +664,33 @@ static void _log_type_error(const char *path, cfg_def_type_t actual,
_get_type_name(actual_type_name, sizeof(actual_type_name), actual);
_get_type_name(expected_type_name, sizeof(expected_type_name), expected);
- log_warn_suppress(suppress_messages, "Configuration setting \"%s\" has invalid type. "
- "Found%s, expected%s.", path,
+ log_warn_suppress(suppress_messages, "WARNING: Configuration setting \"%s\" has invalid type. "
+ "Found%s but expected%s.", path,
actual_type_name, expected_type_name);
}
-static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
- const cfg_def_item_t *def)
+static struct dm_config_value *_get_def_array_values(struct cmd_context *cmd,
+ struct dm_config_tree *cft,
+ const cfg_def_item_t *def,
+ uint32_t format_flags)
{
+ const char *def_enc_value;
char *enc_value, *token, *p, *r;
struct dm_config_value *array = NULL, *v = NULL, *oldv = NULL;
- if (!def->default_value.v_CFG_TYPE_STRING) {
+ def_enc_value = cfg_def_get_default_value(cmd, def, CFG_TYPE_ARRAY, NULL);
+
+ if (!def_enc_value) {
if (!(array = dm_config_create_value(cft))) {
log_error("Failed to create default empty array for %s.", def->name);
return NULL;
}
array->type = DM_CFG_EMPTY_ARRAY;
+ dm_config_value_set_format_flags(array, format_flags);
return array;
}
- if (!(p = token = enc_value = dm_strdup(def->default_value.v_CFG_TYPE_STRING))) {
+ if (!(p = token = enc_value = dm_strdup(def_enc_value))) {
log_error("_get_def_array_values: dm_strdup failed");
return NULL;
}
@@ -698,6 +719,9 @@ static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
dm_free(enc_value);
return NULL;
}
+
+ dm_config_value_set_format_flags(v, format_flags);
+
if (oldv)
oldv->next = v;
if (!array)
@@ -790,6 +814,11 @@ static int _config_def_check_node_single_value(struct cft_check_handle *handle,
} else if (!(def->type & CFG_TYPE_STRING)) {
_log_type_error(rp, CFG_TYPE_STRING, def->type, handle->suppress_messages);
return 0;
+ } else if (!(def->flags & CFG_ALLOW_EMPTY) && !*v->v.str) {
+ log_warn_suppress(handle->suppress_messages,
+ "Configuration setting \"%s\" invalid. "
+ "It cannot be set to an empty value.", rp);
+ return 0;
}
break;
default: ;
@@ -809,6 +838,12 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
float f;
const char *str;
+ if ((handle->ignoreunsupported && (def->flags & CFG_UNSUPPORTED)) ||
+ (handle->ignoreadvanced && (def->flags & CFG_ADVANCED))) {
+ diff = 0;
+ goto out;
+ }
+
/* if default value is undefined, the value used differs from default */
if (def->flags & CFG_DEFAULT_UNDEFINED) {
diff = 1;
@@ -816,7 +851,7 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
}
if (!v_def && (def->type & CFG_TYPE_ARRAY)) {
- if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cft, def)))
+ if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cmd, handle->cft, def, 0)))
return_0;
do {
/* iterate over each element of the array and check its value */
@@ -1008,9 +1043,14 @@ static int _config_def_check_tree(struct cft_check_handle *handle,
size_t buf_size, struct dm_config_node *root)
{
struct dm_config_node *cn;
+ cfg_def_item_t *def;
int valid, r = 1;
size_t len;
+ def = cfg_def_get_item_p(root->id);
+ if (def->flags & CFG_SECTION_NO_CHECK)
+ return 1;
+
for (cn = root->child; cn; cn = cn->sib) {
if ((valid = _config_def_check_node(handle, vp, pvp, rp, prp,
buf_size, cn)) && !cn->v) {
@@ -1140,6 +1180,29 @@ static int _apply_local_profile(struct cmd_context *cmd, struct profile *profile
return override_config_tree_from_profile(cmd, profile);
}
+static int _config_disabled(struct cmd_context *cmd, cfg_def_item_t *item, const char *path)
+{
+ if ((item->flags & CFG_DISABLED) && dm_config_tree_find_node(cmd->cft, path)) {
+ log_warn("WARNING: Configuration setting %s is disabled. Using default value.", path);
+ return 1;
+ }
+
+ return 0;
+}
+
+const struct dm_config_node *find_config_node(struct cmd_context *cmd, struct dm_config_tree *cft, int id)
+{
+ cfg_def_item_t *item = cfg_def_get_item_p(id);
+ char path[CFG_PATH_MAX_LEN];
+ const struct dm_config_node *cn;
+
+ _cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+ cn = dm_config_tree_find_node(cft, path);
+
+ return cn;
+}
+
const struct dm_config_node *find_config_tree_node(struct cmd_context *cmd, int id, struct profile *profile)
{
cfg_def_item_t *item = cfg_def_get_item_p(id);
@@ -1171,7 +1234,8 @@ const char *find_config_tree_str(struct cmd_context *cmd, int id, struct profile
if (item->type != CFG_TYPE_STRING)
log_error(INTERNAL_ERROR "%s cfg tree element not declared as string.", path);
- str = dm_config_tree_find_str(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
+ str = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile)
+ : dm_config_tree_find_str(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1194,7 +1258,8 @@ const char *find_config_tree_str_allow_empty(struct cmd_context *cmd, int id, st
if (!(item->flags & CFG_ALLOW_EMPTY))
log_error(INTERNAL_ERROR "%s cfg tree element not declared to allow empty values.", path);
- str = dm_config_tree_find_str_allow_empty(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
+ str = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile)
+ : dm_config_tree_find_str_allow_empty(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1215,7 +1280,8 @@ int find_config_tree_int(struct cmd_context *cmd, int id, struct profile *profil
if (item->type != CFG_TYPE_INT)
log_error(INTERNAL_ERROR "%s cfg tree element not declared as integer.", path);
- i = dm_config_tree_find_int(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
+ i = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile)
+ : dm_config_tree_find_int(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1236,7 +1302,8 @@ int64_t find_config_tree_int64(struct cmd_context *cmd, int id, struct profile *
if (item->type != CFG_TYPE_INT)
log_error(INTERNAL_ERROR "%s cfg tree element not declared as integer.", path);
- i64 = dm_config_tree_find_int64(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
+ i64 = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile)
+ : dm_config_tree_find_int64(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1257,7 +1324,8 @@ float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *pr
if (item->type != CFG_TYPE_FLOAT)
log_error(INTERNAL_ERROR "%s cfg tree element not declared as float.", path);
- f = dm_config_tree_find_float(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile));
+ f = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile)
+ : dm_config_tree_find_float(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1265,6 +1333,23 @@ float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *pr
return f;
}
+int find_config_bool(struct cmd_context *cmd, struct dm_config_tree *cft, int id)
+{
+ cfg_def_item_t *item = cfg_def_get_item_p(id);
+ char path[CFG_PATH_MAX_LEN];
+ int b;
+
+ _cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+ if (item->type != CFG_TYPE_BOOL)
+ log_error(INTERNAL_ERROR "%s cfg tree element not declared as boolean.", path);
+
+ b = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, NULL)
+ : dm_config_tree_find_bool(cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, NULL));
+
+ return b;
+}
+
int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profile)
{
cfg_def_item_t *item = cfg_def_get_item_p(id);
@@ -1278,7 +1363,8 @@ int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profi
if (item->type != CFG_TYPE_BOOL)
log_error(INTERNAL_ERROR "%s cfg tree element not declared as boolean.", path);
- b = dm_config_tree_find_bool(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile));
+ b = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile)
+ : dm_config_tree_find_bool(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile));
if (profile_applied)
remove_config_tree_by_source(cmd, profile->source);
@@ -1286,6 +1372,106 @@ int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profi
return b;
}
+static struct dm_config_node *_get_array_def_node(struct cmd_context *cmd,
+ cfg_def_item_t *def,
+ struct profile *profile)
+{
+ struct dm_config_node *cn;
+
+ if (def->flags & CFG_DEFAULT_UNDEFINED)
+ return NULL;
+
+ if (!(cn = dm_config_create_node(cmd->cft, def->name))) {
+ log_error("Failed to create default array node for %s.", def->name);
+ return NULL;
+ }
+
+ if (!(cn->v = _get_def_array_values(cmd, cmd->cft, def, 0))) {
+ dm_pool_free(cmd->cft->mem, cn);
+ return_NULL;
+ }
+
+ return cn;
+}
+
+struct _config_array_out_handle {
+ struct dm_pool *mem;
+ char *str;
+};
+
+static int _config_array_line(const struct dm_config_node *cn, const char *line, void *baton)
+{
+ struct _config_array_out_handle *handle = (struct _config_array_out_handle *) baton;
+
+ if (!(handle->str = dm_pool_strdup(handle->mem, line))) {
+ log_error("_config_array_line: dm_pool_strdup failed");
+ return 0;
+ }
+
+ return 1;
+}
+
+static void _log_array_value_used(struct dm_pool *mem, const struct dm_config_node *cn,
+ const char *path, int default_used)
+{
+ struct _config_array_out_handle out_handle = { 0 };
+ struct dm_config_node_out_spec out_spec = { 0 };
+ uint32_t old_format_flags;
+
+ out_handle.mem = mem;
+ out_spec.line_fn = _config_array_line;
+
+ old_format_flags = dm_config_value_get_format_flags(cn->v);
+ dm_config_value_set_format_flags(cn->v,
+ DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES |
+ DM_CONFIG_VALUE_FMT_COMMON_ARRAY);
+
+ if (!dm_config_write_one_node_out(cn, &out_spec, &out_handle)) {
+ log_error("_log_array_value_used: failed to write node value");
+ out_handle.mem = NULL;
+ }
+
+ if (default_used)
+ log_very_verbose("%s not found in config: defaulting to %s",
+ path, out_handle.mem ? out_handle.str : "<unknown>");
+ else
+ log_very_verbose("Setting %s to %s",
+ path, out_handle.mem ? out_handle.str : "<unknown>");
+
+ if (out_handle.mem)
+ dm_pool_free(out_handle.mem, out_handle.str);
+ dm_config_value_set_format_flags(cn->v, old_format_flags);
+}
+
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile)
+{
+ cfg_def_item_t *item = cfg_def_get_item_p(id);
+ char path[CFG_PATH_MAX_LEN];
+ int profile_applied;
+ const struct dm_config_node *cn = NULL, *cn_def = NULL;
+ profile_applied = _apply_local_profile(cmd, profile);
+ _cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+ if (!(item->type & CFG_TYPE_ARRAY))
+ log_error(INTERNAL_ERROR "%s cfg tree element not declared as array.", path);
+
+ if (_config_disabled(cmd, item, path) ||
+ !(cn = find_config_tree_node(cmd, id, profile)))
+ cn_def = _get_array_def_node(cmd, item, profile);
+
+ if (cn)
+ _log_array_value_used(cmd->cft->mem, cn, path, 0);
+ else if (cn_def) {
+ _log_array_value_used(cmd->cft->mem, cn_def, path, 1);
+ cn = cn_def;
+ }
+
+ if (profile_applied)
+ remove_config_tree_by_source(cmd, profile->source);
+
+ return cn;
+}
+
/* Insert cn2 after cn1 */
static void _insert_config_node(struct dm_config_node **cn1,
struct dm_config_node *cn2)
@@ -1414,7 +1600,7 @@ int merge_config_tree(struct cmd_context *cmd, struct dm_config_tree *cft,
cs = dm_config_get_custom(cft);
csn = dm_config_get_custom(newdata);
- if (cs && csn && (cs->timestamp < csn->timestamp))
+ if (cs && csn && timespeccmp(&cs->timestamp, &csn->timestamp, <))
cs->timestamp = csn->timestamp;
return 1;
@@ -1426,6 +1612,55 @@ struct out_baton {
struct dm_pool *mem;
};
+#define MAX_COMMENT_LINE 512
+
+static int _copy_one_line(const char *comment, char *line, int *pos, int len)
+{
+ int p;
+ int i = 0;
+ char c;
+
+ if (*pos >= len)
+ return 0;
+
+ memset(line, 0, MAX_COMMENT_LINE+1);
+
+ for (p = *pos; ; p++) {
+ c = comment[p];
+
+ (*pos)++;
+
+ if (c == '\n' || c == '\0')
+ break;
+
+ line[i++] = c;
+
+ if (i == MAX_COMMENT_LINE)
+ break;
+ }
+
+ return i;
+}
+
+static int _get_config_node_version(uint16_t version_enc, char *version)
+{
+ if (dm_snprintf(version, 9, "%u.%u.%u",
+ (version_enc & 0xE000) >> 13,
+ (version_enc & 0x1E00) >> 9,
+ (version_enc & 0x1FF)) == -1) {
+ log_error("_get_config_node_version: couldn't create version string");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _def_node_is_deprecated(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+ return def->deprecated_since_version &&
+ (spec->version >= def->deprecated_since_version);
+}
+
static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, void *baton)
{
struct out_baton *out = baton;
@@ -1433,15 +1668,13 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi
char version[9]; /* 8+1 chars for max version of 7.15.511 */
const char *node_type_name = cn->v ? "option" : "section";
char path[CFG_PATH_MAX_LEN];
+ char commentline[MAX_COMMENT_LINE+1];
-
- if (cn->id < 0)
+ if (cn->id <= 0)
return 1;
- if (!cn->id) {
- log_error(INTERNAL_ERROR "Configuration node %s has invalid id.", cn->key);
- return 0;
- }
+ if (out->tree_spec->type == CFG_DEF_TREE_LIST)
+ return 1;
if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
(!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
@@ -1449,12 +1682,32 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi
cfg_def = cfg_def_get_item_p(cn->id);
- if (out->tree_spec->withcomments) {
+ if (out->tree_spec->withsummary || out->tree_spec->withcomments) {
_cfg_def_make_path(path, sizeof(path), cfg_def->id, cfg_def, 1);
+ fprintf(out->fp, "\n");
fprintf(out->fp, "%s# Configuration %s %s.\n", line, node_type_name, path);
- if (cfg_def->comment)
- fprintf(out->fp, "%s# %s\n", line, cfg_def->comment);
+ if (out->tree_spec->withcomments &&
+ _def_node_is_deprecated(cfg_def, out->tree_spec))
+ fprintf(out->fp, "%s# %s", line, cfg_def->deprecation_comment);
+
+ if (cfg_def->comment) {
+ int pos = 0;
+ while (_copy_one_line(cfg_def->comment, commentline, &pos, strlen(cfg_def->comment))) {
+ if ((commentline[0] == '#') && (strlen(commentline) == 1)) {
+ if (!out->tree_spec->withspaces)
+ continue;
+ commentline[0] = '\0';
+ }
+ fprintf(out->fp, "%s# %s\n", line, commentline);
+ /* withsummary prints only the first comment line. */
+ if (!out->tree_spec->withcomments)
+ break;
+ }
+ }
+
+ if (_def_node_is_deprecated(cfg_def, out->tree_spec))
+ fprintf(out->fp, "%s# This configuration %s is deprecated.\n", line, node_type_name);
if (cfg_def->flags & CFG_ADVANCED)
fprintf(out->fp, "%s# This configuration %s is advanced.\n", line, node_type_name);
@@ -1467,34 +1720,101 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi
if (cfg_def->flags & CFG_DEFAULT_UNDEFINED)
fprintf(out->fp, "%s# This configuration %s does not have a default value defined.\n", line, node_type_name);
+
+ if (cfg_def->flags & CFG_DEFAULT_COMMENTED)
+ fprintf(out->fp, "%s# This configuration %s has an automatic default value.\n", line, node_type_name);
+
+ if ((out->tree_spec->type == CFG_DEF_TREE_FULL) &&
+ (out->tree_spec->check_status[cn->id] & CFG_USED))
+ fprintf(out->fp, "%s# Value defined in existing configuration has been used for this setting.\n", line);
}
if (out->tree_spec->withversions) {
- if (dm_snprintf(version, 9, "%u.%u.%u",
- (cfg_def->since_version & 0xE000) >> 13,
- (cfg_def->since_version & 0x1E00) >> 9,
- (cfg_def->since_version & 0x1FF)) == -1) {
- log_error("_out_prefix_fn: couldn't create version string");
- return 0;
+ if (!_get_config_node_version(cfg_def->since_version, version))
+ return_0;
+ fprintf(out->fp, "%s# Available since version %s.\n", line, version);
+
+ if (_def_node_is_deprecated(cfg_def, out->tree_spec)) {
+ if (!_get_config_node_version(cfg_def->deprecated_since_version, version))
+ return_0;
+ fprintf(out->fp, "%s# Deprecated since version %s.\n", line, version);
}
- fprintf(out->fp, "%s# Since version %s.\n", line, version);
}
return 1;
}
+static int _should_print_cfg_with_undef_def_val(struct out_baton *out, cfg_def_item_t *cfg_def,
+ const struct dm_config_node *cn)
+{
+ if (!(cfg_def->flags & CFG_DEFAULT_UNDEFINED))
+ return 1;
+
+ /* print it only if the value is directly defined in some config = it's used */
+ return out->tree_spec->check_status && (out->tree_spec->check_status[cn->id] & CFG_USED);
+}
+
static int _out_line_fn(const struct dm_config_node *cn, const char *line, void *baton)
{
struct out_baton *out = baton;
- struct cfg_def_item *cfg_def = cfg_def_get_item_p(cn->id);
+ struct cfg_def_item *cfg_def;
+ char config_path[CFG_PATH_MAX_LEN];
+ char summary[MAX_COMMENT_LINE+1];
+ char version[9];
+ int pos = 0;
+ size_t len;
+ char *space_prefix;
if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
(!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
return 1;
- fprintf(out->fp, "%s%s\n", (out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
- (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
- (cfg_def->flags & CFG_DEFAULT_UNDEFINED) ? "#" : "", line);
+ cfg_def = cfg_def_get_item_p(cn->id);
+
+ if (out->tree_spec->type == CFG_DEF_TREE_LIST) {
+ /* List view with node paths and summary. */
+ if (cfg_def->type & CFG_TYPE_SECTION)
+ return 1;
+ if (!_cfg_def_make_path(config_path, CFG_PATH_MAX_LEN, cfg_def->id, cfg_def, 1))
+ return_0;
+ if (out->tree_spec->withversions && !_get_config_node_version(cfg_def->since_version, version))
+ return_0;
+
+ summary[0] = '\0';
+ if (out->tree_spec->withsummary && cfg_def->comment)
+ _copy_one_line(cfg_def->comment, summary, &pos, strlen(cfg_def->comment));
+
+ fprintf(out->fp, "%s%s%s%s%s%s%s\n", config_path,
+ *summary || out->tree_spec->withversions ? " - ": "",
+ *summary ? summary : "",
+ *summary ? " " : "",
+ out->tree_spec->withversions ? "[" : "",
+ out->tree_spec->withversions ? version : "",
+ out->tree_spec->withversions ? "]" : "");
+
+ return 1;
+ }
+
+ /* Usual tree view with nodes and their values. */
+
+ if ((out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
+ (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
+ (out->tree_spec->type != CFG_DEF_TREE_FULL) &&
+ (cfg_def->flags & (CFG_DEFAULT_UNDEFINED | CFG_DEFAULT_COMMENTED))) {
+ /* print with # at the front to comment out the line */
+ if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn)) {
+ space_prefix = ((len = strspn(line, "\t "))) ? dm_pool_strndup(out->mem, line, len) : NULL;
+ fprintf(out->fp, "%s%s%s\n", space_prefix ? : "", "# ", line + len);
+ if (space_prefix)
+ dm_pool_free(out->mem, space_prefix);
+ }
+ return 1;
+ }
+
+ /* print the line as it is */
+ if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn))
+ fprintf(out->fp, "%s\n", line);
+
return 1;
}
@@ -1562,20 +1882,31 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
{
struct dm_config_node *cn;
const char *str;
+ uint32_t format_flags = 0;
if (!(cn = dm_config_create_node(cft, def->name))) {
log_error("Failed to create default config setting node.");
return NULL;
}
- if (!(def->type & CFG_TYPE_SECTION) && (!(cn->v = dm_config_create_value(cft)))) {
- log_error("Failed to create default config setting node value.");
- return NULL;
+ if (!(def->type & CFG_TYPE_SECTION) && !(def->type & CFG_TYPE_ARRAY)) {
+ if (!(cn->v = dm_config_create_value(cft))) {
+ log_error("Failed to create default config setting node value.");
+ return NULL;
+ }
+ if (spec->withspaces)
+ format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
}
cn->id = def->id;
- if (!(def->type & CFG_TYPE_ARRAY)) {
+ if (spec->unconfigured && def->default_unconfigured_value.v_UNCONFIGURED) {
+ cn->v->type = DM_CFG_STRING;
+ cn->v->v.str = cfg_def_get_default_unconfigured_value_hint(spec->cmd, def);
+ if (def->type != CFG_TYPE_STRING)
+ format_flags |= DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES;
+ dm_config_value_set_format_flags(cn->v, format_flags);
+ } else if (!(def->type & CFG_TYPE_ARRAY)) {
switch (def->type) {
case CFG_TYPE_SECTION:
cn->v = NULL;
@@ -1587,6 +1918,8 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
case CFG_TYPE_INT:
cn->v->type = DM_CFG_INT;
cn->v->v.i = cfg_def_get_default_value_hint(spec->cmd, def, CFG_TYPE_INT, NULL);
+ if (def->flags & CFG_FORMAT_INT_OCTAL)
+ format_flags |= DM_CONFIG_VALUE_FMT_INT_OCTAL;
break;
case CFG_TYPE_FLOAT:
cn->v->type = DM_CFG_FLOAT;
@@ -1603,8 +1936,13 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
return NULL;
break;
}
- } else
- cn->v = _get_def_array_values(cft, def);
+ dm_config_value_set_format_flags(cn->v, format_flags);
+ } else {
+ if (spec->withspaces)
+ format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
+ format_flags |= DM_CONFIG_VALUE_FMT_COMMON_ARRAY;
+ cn->v = _get_def_array_values(spec->cmd, cft, def, format_flags);
+ }
cn->child = NULL;
if (parent) {
@@ -1620,6 +1958,11 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
return cn;
}
+static int _should_skip_deprecated_def_node(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+ return spec->ignoredeprecated && _def_node_is_deprecated(def, spec);
+}
+
static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_id, int id)
{
cfg_def_item_t *def = cfg_def_get_item_p(id);
@@ -1631,6 +1974,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
return 1;
switch (spec->type) {
+ case CFG_DEF_TREE_FULL:
+ /* fall through */
case CFG_DEF_TREE_MISSING:
if (!spec->check_status) {
log_error_once(INTERNAL_ERROR "couldn't determine missing "
@@ -1638,19 +1983,27 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
return 1;
}
if ((spec->check_status[id] & CFG_USED) ||
- (def->flags & CFG_NAME_VARIABLE) ||
- (def->since_version > spec->version))
+ (def->flags & CFG_NAME_VARIABLE))
+ return 1;
+
+ if ((spec->type == CFG_DEF_TREE_MISSING) &&
+ ((def->since_version > spec->version) ||
+ _should_skip_deprecated_def_node(def, spec)))
return 1;
break;
case CFG_DEF_TREE_NEW:
- if (def->since_version != spec->version)
+ if ((def->since_version != spec->version) ||
+ _should_skip_deprecated_def_node(def, spec))
return 1;
break;
case CFG_DEF_TREE_PROFILABLE:
+ /* fall through */
case CFG_DEF_TREE_PROFILABLE_CMD:
+ /* fall through */
case CFG_DEF_TREE_PROFILABLE_MDA:
if (!(def->flags & CFG_PROFILABLE) ||
- (def->since_version > spec->version))
+ (def->since_version > spec->version) ||
+ _should_skip_deprecated_def_node(def, spec))
return 1;
flags = def->flags & ~CFG_PROFILABLE;
if (spec->type == CFG_DEF_TREE_PROFILABLE_CMD) {
@@ -1662,7 +2015,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
}
break;
default:
- if (def->since_version > spec->version)
+ if ((def->since_version > spec->version) ||
+ _should_skip_deprecated_def_node(def, spec))
return 1;
break;
}
@@ -1701,7 +2055,7 @@ bad:
struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
{
- struct dm_config_tree *cft;
+ struct dm_config_tree *cft = NULL, *tmp_cft = NULL;
struct dm_config_node *root = NULL, *relay = NULL, *tmp;
int id;
@@ -1714,6 +2068,9 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
if (cfg_def_get_item_p(id)->parent != root_CFG_SECTION)
continue;
+ if (spec->ignorelocal && (id == local_CFG_SECTION))
+ continue;
+
if ((tmp = _add_def_section_subtree(cft, spec, root, relay, id))) {
relay = tmp;
if (!root)
@@ -1722,7 +2079,33 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
}
cft->root = root;
+
+ if (spec->type == CFG_DEF_TREE_FULL) {
+ if (!(tmp_cft = dm_config_create())) {
+ log_error("Failed to create temporary config tree while creating full tree.");
+ goto bad;
+ }
+
+ if (!(tmp_cft->root = dm_config_clone_node_with_mem(cft->mem, spec->current_cft->root, 1))) {
+ log_error("Failed to clone current config tree.");
+ goto bad;
+ }
+
+ if (!merge_config_tree(spec->cmd, cft, tmp_cft, CONFIG_MERGE_TYPE_RAW)) {
+ log_error("Failed to merge default and current config tree.");
+ goto bad;
+ }
+
+ dm_config_destroy(tmp_cft);
+ }
+
return cft;
+bad:
+ if (cft)
+ dm_config_destroy(cft);
+ if (tmp_cft)
+ dm_config_destroy(tmp_cft);
+ return NULL;
}
static int _check_profile(struct cmd_context *cmd, struct profile *profile)
@@ -1901,6 +2284,11 @@ const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct pr
return dm_pool_strdup(cmd->mem, buf);
}
+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd)
+{
+ return "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@";
+}
+
const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile)
{
const char *cache_dir = NULL, *cache_file_prefix = NULL;
@@ -1935,6 +2323,24 @@ const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profil
return dm_pool_strdup(cmd->mem, buf);
}
+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd)
+{
+ const char *cache_file_prefix = NULL;
+ static char buf[PATH_MAX];
+
+ if (find_config_tree_node(cmd, devices_cache_file_prefix_CFG, NULL))
+ cache_file_prefix = find_config_tree_str_allow_empty(cmd, devices_cache_file_prefix_CFG, NULL);
+
+ if (dm_snprintf(buf, sizeof(buf), "%s/%s.cache",
+ get_default_unconfigured_devices_cache_dir_CFG(cmd),
+ cache_file_prefix ? : DEFAULT_CACHE_FILE_PREFIX) < 0) {
+ log_error("Persistent cache filename too long.");
+ return NULL;
+ }
+
+ return dm_pool_strdup(cmd->mem, buf);
+}
+
const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile)
{
static char buf[PATH_MAX];
@@ -1948,6 +2354,11 @@ const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct pr
return dm_pool_strdup(cmd->mem, buf);
}
+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd)
+{
+ return "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@";
+}
+
const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile)
{
static char buf[PATH_MAX];
@@ -1961,6 +2372,11 @@ const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct p
return dm_pool_strdup(cmd->mem, buf);
}
+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd)
+{
+ return "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@";
+}
+
const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile)
{
static char buf[PATH_MAX];
@@ -1974,6 +2390,11 @@ const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct p
return dm_pool_strdup(cmd->mem, buf);
}
+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd)
+{
+ return "@DEFAULT_SYS_DIR@/@DEFAULT_PROFILE_SUBDIR@";
+}
+
const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile)
{
return find_config_tree_str(cmd, activation_mirror_device_fault_policy_CFG, profile);
diff --git a/lib/config/config.h b/lib/config/config.h
index d095519b5..e112223e7 100644
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -50,7 +50,7 @@ struct profile_params {
struct dm_list profiles; /* list of profiles which are loaded already and which are ready for use */
};
-#define CFG_PATH_MAX_LEN 64
+#define CFG_PATH_MAX_LEN 128
/*
* Structures used for definition of a configuration tree.
@@ -72,6 +72,7 @@ typedef int (*t_fn_CFG_TYPE_INT) (struct cmd_context *cmd, struct profile *profi
typedef float (*t_fn_CFG_TYPE_FLOAT) (struct cmd_context *cmd, struct profile *profile);
typedef const char* (*t_fn_CFG_TYPE_STRING) (struct cmd_context *cmd, struct profile *profile);
typedef const char* (*t_fn_CFG_TYPE_ARRAY) (struct cmd_context *cmd, struct profile *profile);
+typedef const char* (*t_fn_UNCONFIGURED) (struct cmd_context *cmd);
/* configuration definition item value (for item's default value) */
typedef union {
@@ -88,62 +89,86 @@ typedef union {
t_fn_CFG_TYPE_ARRAY fn_CFG_TYPE_ARRAY;
} cfg_def_value_t;
+typedef union {
+ const char *v_UNCONFIGURED;
+ t_fn_UNCONFIGURED fn_UNCONFIGURED;
+} cfg_def_unconfigured_value_t;
+
/* configuration definition item flags: */
+
/* whether the configuration item name is variable */
-#define CFG_NAME_VARIABLE 0x01
+#define CFG_NAME_VARIABLE 0x001
/* whether empty value is allowed */
-#define CFG_ALLOW_EMPTY 0x02
+#define CFG_ALLOW_EMPTY 0x002
/* whether the configuration item is for advanced use only */
-#define CFG_ADVANCED 0x04
+#define CFG_ADVANCED 0x004
/* whether the configuration item is not officially supported */
-#define CFG_UNSUPPORTED 0x08
+#define CFG_UNSUPPORTED 0x008
/* whether the configuration item is customizable by a profile */
-#define CFG_PROFILABLE 0x10
+#define CFG_PROFILABLE 0x010
/* whether the configuration item is customizable by a profile */
/* and whether it can be attached to VG/LV metadata at the same time
* The CFG_PROFILABLE_METADATA flag incorporates CFG_PROFILABLE flag!!! */
-#define CFG_PROFILABLE_METADATA 0x30
+#define CFG_PROFILABLE_METADATA 0x030
/* whether the default value is undefned */
-#define CFG_DEFAULT_UNDEFINED 0x40
-/* whether the defualt value is calculated during run time */
-#define CFG_DEFAULT_RUN_TIME 0x80
+#define CFG_DEFAULT_UNDEFINED 0x040
+/* whether the default value is commented out on output */
+#define CFG_DEFAULT_COMMENTED 0x080
+/* whether the default value is calculated during run time */
+#define CFG_DEFAULT_RUN_TIME 0x100
+/* whether the configuration setting is disabled (and hence defaults always used) */
+#define CFG_DISABLED 0x200
+/* whether to print integers in octal form (prefixed by "0") */
+#define CFG_FORMAT_INT_OCTAL 0x400
+/* whether to disable checks for the whole config section subtree */
+#define CFG_SECTION_NO_CHECK 0x800
/* configuration definition item structure */
typedef struct cfg_def_item {
- int id; /* ID of this item */
- int parent; /* ID of parent item */
- const char *name; /* name of the item in configuration tree */
- int type; /* configuration item type (bits of cfg_def_type_t) */
- cfg_def_value_t default_value; /* default value (only for settings) */
- uint16_t flags; /* configuration item definition flags */
- uint16_t since_version; /* version this item appeared in */
- const char *comment; /* brief comment */
+ int id; /* ID of this item */
+ int parent; /* ID of parent item */
+ const char *name; /* name of the item in configuration tree */
+ int type; /* configuration item type (bits of cfg_def_type_t) */
+ cfg_def_value_t default_value; /* default value (only for settings) */
+ uint16_t flags; /* configuration item definition flags */
+ uint16_t since_version; /* version this item appeared in */
+ cfg_def_unconfigured_value_t default_unconfigured_value; /* default value in terms of @FOO@, pre-configured (only for settings) */
+ uint16_t deprecated_since_version; /* version since this item is deprecated */
+ const char *deprecation_comment; /* comment about reasons for deprecation and settings that supersede this one */
+ const char *comment; /* comment */
} cfg_def_item_t;
/* configuration definition tree types */
typedef enum {
CFG_DEF_TREE_CURRENT, /* tree of nodes with values currently set in the config */
CFG_DEF_TREE_MISSING, /* tree of nodes missing in current config using default values */
- CFG_DEF_TREE_COMPLETE, /* CURRENT + MISSING, the tree actually used within execution, not implemented yet */
+ CFG_DEF_TREE_FULL, /* CURRENT + MISSING, the tree actually used within execution */
CFG_DEF_TREE_DEFAULT, /* tree of all possible config nodes with default values */
CFG_DEF_TREE_NEW, /* tree of all new nodes that appeared in given version */
CFG_DEF_TREE_PROFILABLE, /* tree of all nodes that are customizable by profiles */
CFG_DEF_TREE_PROFILABLE_CMD, /* tree of all nodes that are customizable by command profiles (subset of PROFILABLE) */
CFG_DEF_TREE_PROFILABLE_MDA, /* tree of all nodes that are customizable by metadata profiles (subset of PROFILABLE) */
CFG_DEF_TREE_DIFF, /* tree of all nodes that differ from defaults */
+ CFG_DEF_TREE_LIST, /* list all nodes */
} cfg_def_tree_t;
/* configuration definition tree specification */
struct config_def_tree_spec {
- struct cmd_context *cmd; /* command context (for run-time defaults */
- cfg_def_tree_t type; /* tree type */
- uint16_t version; /* tree at this LVM2 version */
+ struct cmd_context *cmd; /* command context (for run-time defaults */
+ struct dm_config_tree *current_cft; /* current config tree which is defined explicitly - defaults are not used */
+ cfg_def_tree_t type; /* tree type */
+ uint16_t version; /* tree at this LVM2 version */
unsigned ignoreadvanced:1; /* do not include advanced configs */
- unsigned ignoreunsupported:1; /* do not include unsupported configs */
- unsigned withcomments:1; /* include comments */
+ unsigned ignoreunsupported:1; /* do not include unsupported configs */
+ unsigned ignoredeprecated:1; /* do not include deprecated configs */
+ unsigned ignorelocal:1; /* do not include the local section */
+ unsigned withsummary:1; /* include first line of comments - a summary */
+ unsigned withcomments:1; /* include all comment lines */
unsigned withversions:1; /* include versions */
- uint8_t *check_status; /* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
+ unsigned withspaces:1; /* add more spaces in output for better readability */
+ unsigned unconfigured:1; /* use unconfigured path strings */
+ uint8_t *check_status; /* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
};
@@ -158,11 +183,11 @@ struct config_def_tree_spec {
* Register ID for each possible item in the configuration tree.
*/
enum {
-#define cfg_section(id, name, parent, flags, since_version, comment) id,
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) id,
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) id,
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) id,
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) id,
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) id,
#include "config_settings.h"
#undef cfg_section
#undef cfg
@@ -184,6 +209,8 @@ struct cft_check_handle {
unsigned skip_if_checked:1; /* skip the check if already done before - return last state */
unsigned suppress_messages:1; /* suppress messages during the check if config item is found invalid */
unsigned check_diff:1; /* check if the value used differs from default one */
+ unsigned ignoreadvanced:1; /* do not include advnced configs */
+ unsigned ignoreunsupported:1; /* do not include unsupported configs */
uint8_t status[CFG_COUNT]; /* flags for each configuration item - the result of the check */
};
@@ -202,7 +229,8 @@ typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_
struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
off_t offset, size_t size, off_t offset2, size_t size2,
- checksum_fn_t checksum_fn, uint32_t checksum);
+ checksum_fn_t checksum_fn, uint32_t checksum,
+ int skip_parse);
int config_file_read(struct dm_config_tree *cft);
struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source,
struct cmd_context *cmd);
@@ -211,7 +239,7 @@ int config_write(struct dm_config_tree *cft, struct config_def_tree_spec *tree_s
struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec);
void config_destroy(struct dm_config_tree *cft);
-time_t config_file_timestamp(struct dm_config_tree *cft);
+struct timespec config_file_timestamp(struct dm_config_tree *cft);
int config_file_changed(struct dm_config_tree *cft);
int config_file_check(struct dm_config_tree *cft, const char **filename, struct stat *info);
@@ -231,6 +259,12 @@ int merge_config_tree(struct cmd_context *cmd, struct dm_config_tree *cft,
struct dm_config_tree *newdata, config_merge_t);
/*
+ * The next two do not check config overrides and must only be used for the tags section.
+ */
+const struct dm_config_node *find_config_node(struct cmd_context *cmd, struct dm_config_tree *cft, int id);
+int find_config_bool(struct cmd_context *cmd, struct dm_config_tree *cft, int id);
+
+/*
* These versions check an override tree, if present, first.
*/
const struct dm_config_node *find_config_tree_node(struct cmd_context *cmd, int id, struct profile *profile);
@@ -240,18 +274,29 @@ int find_config_tree_int(struct cmd_context *cmd, int id, struct profile *profil
int64_t find_config_tree_int64(struct cmd_context *cmd, int id, struct profile *profile);
float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *profile);
int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profile);
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile);
/*
* Functions for configuration settings for which the default
* value is evaluated at runtime based on command context.
*/
const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd);
const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd);
const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd);
const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd);
const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd);
const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_activation_mirror_image_fault_policy_CFG NULL
int get_default_allocation_thin_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_thin_pool_chunk_size_CFG NULL
int get_default_allocation_cache_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_cache_pool_chunk_size_CFG NULL
+const char *get_default_allocation_cache_policy_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_cache_policy_CFG NULL
#endif
diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h
index c6e57c44e..90170435f 100644
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
@@ -15,13 +15,14 @@
/*
* MACROS:
* - define a configuration section:
- * cfg_section(id, name, parent, flags, since_version, comment)
+ * cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment)
*
* - define a configuration setting of simple type:
- * cfg(id, name, parent, flags, type, default_value, since_version, comment)
+ * cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_default_value, deprecated_since_version, deprecation_comment, comment)
*
* - define a configuration array of one or more types:
- * cfg_array(id, name, parent, flags, types, default_value, since_version, comment)
+ * cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_default_value, deprecated_since_version, deprecation_comment, comment)
+ *
*
* If default value can't be assigned statically because it depends on some
* run-time checks or if it depends on other settings already defined,
@@ -32,247 +33,1710 @@
*
*
* VARIABLES:
- * id: unique identifier
- * name: configuration node name
- * parent: id of parent configuration node
- * flags: configuration item flags:
- * CFG_NAME_VARIABLE - configuration node name is variable
- * CFG_ALLOW_EMPTY - node value can be emtpy
- * CFG_ADVANCED - this node belongs to advanced config set
- * CFG_UNSUPPORTED - this node belongs to unsupported config set
- * CFG_PROFILABLE - this node is customizable by a profile
- * CFG_PROFILABLE_METADATA - profilable and attachable to VG/LV metadata
- * CFG_DEFAULT_UNDEFINED - node's default value is undefined
- * type: allowed type for the value of simple configuation setting, one of:
- * CFG_TYPE_BOOL
- * CFG_TYPE_INT
- * CFG_TYPE_FLOAT
- * CFG_TYPE_STRING
- * types: allowed types for the values of array configuration setting
- * (use logical "OR" to define more than one allowed type,
- * e.g. CFG_TYPE_STRING | CFG_TYPE_INT)
- * default_value: default value of type 'type' for the configuration node,
- * if this is an array with several 'types' defined then
- * default value is a string where each string representation
- * of each value is prefixed by '#X' where X is one of:
- * 'B' for boolean value
- * 'I' for integer value
- * 'F' for float value
- * 'S' for string value
- * '#' for the '#' character itself
- * For example, "#Sfd#I16" means default value [ "fd", 16 ].
- * comment: brief comment used in configuration dumps
- * since_version: the version this configuration node first appeared in (be sure
- * that parent nodes are consistent with versioning, no check done
- * if parent node is older or the same age as any child node!)
+ *
+ * id: Unique identifier.
+ *
+ * name: Configuration node name.
+ *
+ * parent: Id of parent configuration node.
+ *
+ * flags: Configuration item flags:
+ * CFG_NAME_VARIABLE - configuration node name is variable
+ * CFG_ALLOW_EMPTY - node value can be emtpy
+ * CFG_ADVANCED - this node belongs to advanced config set
+ * CFG_UNSUPPORTED - this node is not officially supported and it's used primarily by developers
+ * CFG_PROFILABLE - this node is customizable by a profile
+ * CFG_PROFILABLE_METADATA - profilable and attachable to VG/LV metadata
+ * CFG_DEFAULT_UNDEFINED - node's default value is undefined (depends on other system/kernel values outside of lvm)
+ * CFG_DEFAULT_COMMENTED - node's default value is commented out on output
+ * CFG_DISABLED - configuration is disabled (defaults always used)
+ * CFG_FORMAT_INT_OCTAL - print integer number in octal form (also prefixed by "0")
+ * CFG_SECTION_NO_CHECK - do not check content of the section at all - use with care!!!
+ *
+ * type: Allowed type for the value of simple configuation setting, one of:
+ * CFG_TYPE_BOOL
+ * CFG_TYPE_INT
+ * CFG_TYPE_FLOAT
+ * CFG_TYPE_STRING
+ *
+ * types: Allowed types for the values of array configuration setting
+ * (use logical "OR" to define more than one allowed type,
+ * e.g. CFG_TYPE_STRING | CFG_TYPE_INT).
+ *
+ * default_value: Default value of type 'type' for the configuration node,
+ * if this is an array with several 'types' defined then
+ * default value is a string where each string representation
+ * of each value is prefixed by '#X' where X is one of:
+ * 'B' for boolean value
+ * 'I' for integer value
+ * 'F' for float value
+ * 'S' for string value
+ * '#' for the '#' character itself
+ * For example, "#Sfd#I16" means default value [ "fd", 16 ].
+ *
+ * since_version: The version this configuration node first appeared in (be sure
+ * that parent nodes are consistent with versioning, no check done
+ * if parent node is older or the same age as any child node!)
+ * Use "vsn" macro to translate the "major.minor.release" version
+ * into a single number that is being stored internally in memory.
+ * (see also lvmconfig ... --withversions)
+ *
+ * unconfigured_default_value: Unconfigured default value used as a default value which is
+ * in "@...@" form and which is then substituted with concrete value
+ * while running configure.
+ * (see also 'lvmconfig --type default --unconfigured')
+ *
+ * deprecated_since_version: The version since this configuration node is deprecated.
+ *
+ * deprecation_comment: Comment about deprecation reason and related info (e.g. which
+ * configuration is used now instead).
+ *
+ * comment: Comment used in configuration dumps. The very first line is the
+ * summarizing comment.
+ * (see also lvmconfig ... --withcomments and --withsummary)
+ *
+ *
+ * Difference between CFG_DEFAULT_COMMENTED and CFG_DEFAULT_UNDEFINED:
+ *
+ * UNDEFINED is used if default value is NULL or the value
+ * depends on other system/kernel values outside of lvm.
+ * The most common case is when dm-thin or dm-cache have
+ * built-in default settings in the kernel, and lvm will use
+ * those built-in default values unless the corresponding lvm
+ * config setting is set.
+ *
+ * COMMENTED is used to comment out the default setting in
+ * lvm.conf. The effect is that if the LVM version is
+ * upgraded, and the new version of LVM has new built-in
+ * default values, the new defaults are used by LVM unless
+ * the previous default value was set (uncommented) in lvm.conf.
*/
#include "defaults.h"
-cfg_section(root_CFG_SECTION, "(root)", root_CFG_SECTION, 0, vsn(0, 0, 0), NULL)
-
-cfg_section(config_CFG_SECTION, "config", root_CFG_SECTION, 0, vsn(2, 2, 99), "Configuration handling.")
-cfg_section(devices_CFG_SECTION, "devices", root_CFG_SECTION, 0, vsn(1, 0, 0), NULL)
-cfg_section(allocation_CFG_SECTION, "allocation", root_CFG_SECTION, CFG_PROFILABLE, vsn(2, 2, 77), NULL)
-cfg_section(log_CFG_SECTION, "log", root_CFG_SECTION, 0, vsn(1, 0, 0), NULL)
-cfg_section(backup_CFG_SECTION, "backup", root_CFG_SECTION, 0, vsn(1, 0, 0), NULL)
-cfg_section(shell_CFG_SECTION, "shell", root_CFG_SECTION, 0, vsn(1, 0, 0), NULL)
-cfg_section(global_CFG_SECTION, "global", root_CFG_SECTION, CFG_PROFILABLE, vsn(1, 0, 0), NULL)
-cfg_section(activation_CFG_SECTION, "activation", root_CFG_SECTION, CFG_PROFILABLE, vsn(1, 0, 0), NULL)
-cfg_section(metadata_CFG_SECTION, "metadata", root_CFG_SECTION, CFG_ADVANCED, vsn(1, 0, 0), NULL)
-cfg_section(report_CFG_SECTION, "report", root_CFG_SECTION, CFG_ADVANCED | CFG_PROFILABLE, vsn(1, 0, 0), NULL)
-cfg_section(dmeventd_CFG_SECTION, "dmeventd", root_CFG_SECTION, 0, vsn(1, 2, 3), NULL)
-cfg_section(tags_CFG_SECTION, "tags", root_CFG_SECTION, 0, vsn(1, 0, 18), NULL)
-
-cfg(config_checks_CFG, "checks", config_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 99), "Configuration tree check on each LVM command execution.")
-cfg(config_abort_on_errors_CFG, "abort_on_errors", config_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2,2,99), "Abort LVM command execution if configuration is invalid.")
-cfg_runtime(config_profile_dir_CFG, "profile_dir", config_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(2, 2, 99), "Directory with configuration profiles.")
-
-cfg(devices_dir_CFG, "dir", devices_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DEV_DIR, vsn(1, 0, 0), NULL)
-cfg_array(devices_scan_CFG, "scan", devices_CFG_SECTION, 0, CFG_TYPE_STRING, "#S/dev", vsn(1, 0, 0), NULL)
-cfg_array(devices_loopfiles_CFG, "loopfiles", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 2, 0), NULL)
-cfg(devices_obtain_device_list_from_udev_CFG, "obtain_device_list_from_udev", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV, vsn(2, 2, 85), NULL)
-cfg_array(devices_preferred_names_CFG, "preferred_names", devices_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 2, 19), NULL)
-cfg_array(devices_filter_CFG, "filter", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg_array(devices_global_filter_CFG, "global_filter", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 98), NULL)
-cfg_runtime(devices_cache_CFG, "cache", devices_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), NULL)
-cfg_runtime(devices_cache_dir_CFG, "cache_dir", devices_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 2, 19), NULL)
-cfg(devices_cache_file_prefix_CFG, "cache_file_prefix", devices_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, DEFAULT_CACHE_FILE_PREFIX, vsn(1, 2, 19), NULL)
-cfg(devices_write_cache_state_CFG, "write_cache_state", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(1, 0, 0), NULL)
-cfg_array(devices_types_CFG, "types", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT | CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg(devices_sysfs_scan_CFG, "sysfs_scan", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SYSFS_SCAN, vsn(1, 0, 8), NULL)
-cfg(devices_multipath_component_detection_CFG, "multipath_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MULTIPATH_COMPONENT_DETECTION, vsn(2, 2, 89), NULL)
-cfg(devices_md_component_detection_CFG, "md_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MD_COMPONENT_DETECTION, vsn(1, 0, 18), NULL)
-cfg(devices_md_chunk_alignment_CFG, "md_chunk_alignment", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MD_CHUNK_ALIGNMENT, vsn(2, 2, 48), NULL)
-cfg(devices_default_data_alignment_CFG, "default_data_alignment", devices_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_DATA_ALIGNMENT, vsn(2, 2, 75), NULL)
-cfg(devices_data_alignment_detection_CFG, "data_alignment_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DATA_ALIGNMENT_DETECTION, vsn(2, 2, 51), NULL)
-cfg(devices_data_alignment_CFG, "data_alignment", devices_CFG_SECTION, 0, CFG_TYPE_INT, 0, vsn(2, 2, 45), NULL)
-cfg(devices_data_alignment_offset_detection_CFG, "data_alignment_offset_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION, vsn(2, 2, 50), NULL)
-cfg(devices_ignore_suspended_devices_CFG, "ignore_suspended_devices", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_IGNORE_SUSPENDED_DEVICES, vsn(1, 2, 19), NULL)
-cfg(devices_ignore_lvm_mirrors_CFG, "ignore_lvm_mirrors", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_IGNORE_LVM_MIRRORS, vsn(2, 2, 104), NULL)
-cfg(devices_disable_after_error_count_CFG, "disable_after_error_count", devices_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_DISABLE_AFTER_ERROR_COUNT, vsn(2, 2, 75), NULL)
-cfg(devices_require_restorefile_with_uuid_CFG, "require_restorefile_with_uuid", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID, vsn(2, 2, 73), NULL)
-cfg(devices_pv_min_size_CFG, "pv_min_size", devices_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_PV_MIN_SIZE_KB, vsn(2, 2, 85), NULL)
-cfg(devices_issue_discards_CFG, "issue_discards", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ISSUE_DISCARDS, vsn(2, 2, 85), NULL)
-
-cfg_array(allocation_cling_tag_list_CFG, "cling_tag_list", allocation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 77), NULL)
-cfg(allocation_maximise_cling_CFG, "maximise_cling", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MAXIMISE_CLING, vsn(2, 2, 85), NULL)
-cfg(allocation_use_blkid_wiping_CFG, "use_blkid_wiping", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 105), NULL)
-cfg(allocation_wipe_signatures_when_zeroing_new_lvs_CFG, "wipe_signatures_when_zeroing_new_lvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 105), NULL)
-cfg(allocation_mirror_logs_require_separate_pvs_CFG, "mirror_logs_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS, vsn(2, 2, 85), NULL)
-cfg(allocation_cache_pool_metadata_require_separate_pvs_CFG, "cache_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_CACHE_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 106), NULL)
-cfg(allocation_cache_pool_cachemode_CFG, "cache_pool_cachemode", allocation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_CACHE_POOL_CACHEMODE, vsn(2, 2, 113), NULL)
-cfg_runtime(allocation_cache_pool_chunk_size_CFG, "cache_pool_chunk_size", allocation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, vsn(2, 2, 106), NULL)
-cfg(allocation_thin_pool_metadata_require_separate_pvs_CFG, "thin_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 89), NULL)
-cfg(allocation_thin_pool_zero_CFG, "thin_pool_zero", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_ZERO, vsn(2, 2, 99), NULL)
-cfg(allocation_thin_pool_discards_CFG, "thin_pool_discards", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_STRING, DEFAULT_THIN_POOL_DISCARDS, vsn(2, 2, 99), NULL)
-cfg(allocation_thin_pool_chunk_size_policy_CFG, "thin_pool_chunk_size_policy", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_STRING, DEFAULT_THIN_POOL_CHUNK_SIZE_POLICY, vsn(2, 2, 101), NULL)
-cfg_runtime(allocation_thin_pool_chunk_size_CFG, "thin_pool_chunk_size", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, vsn(2, 2, 99), NULL)
-cfg(allocation_physical_extent_size_CFG, "physical_extent_size", allocation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_EXTENT_SIZE, vsn(2, 2, 112), NULL)
-
-cfg(log_verbose_CFG, "verbose", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_VERBOSE, vsn(1, 0, 0), NULL)
-cfg(log_silent_CFG, "silent", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SILENT, vsn(2, 2, 98), NULL)
-cfg(log_syslog_CFG, "syslog", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SYSLOG, vsn(1, 0, 0), NULL)
-cfg(log_file_CFG, "file", log_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg(log_overwrite_CFG, "overwrite", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OVERWRITE, vsn(1, 0, 0), NULL)
-cfg(log_level_CFG, "level", log_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_LOGLEVEL, vsn(1, 0, 0), NULL)
-cfg(log_indent_CFG, "indent", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_INDENT, vsn(1, 0, 0), NULL)
-cfg(log_command_names_CFG, "command_names", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_CMD_NAME, vsn(1, 0, 0), NULL)
-cfg(log_prefix_CFG, "prefix", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, DEFAULT_MSG_PREFIX, vsn(1, 0, 0), NULL)
-cfg(log_activation_CFG, "activation", log_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(1, 0, 0), NULL)
-cfg(log_activate_file_CFG, "activate_file", log_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg_array(log_debug_classes_CFG, "debug_classes", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, "#Smemory#Sdevices#Sactivation#Sallocation#Slvmetad#Smetadata#Scache#Slocking", vsn(2, 2, 99), NULL)
-
-cfg(backup_backup_CFG, "backup", backup_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_BACKUP_ENABLED, vsn(1, 0, 0), NULL)
-cfg_runtime(backup_backup_dir_CFG, "backup_dir", backup_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), NULL)
-cfg(backup_archive_CFG, "archive", backup_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ARCHIVE_ENABLED, vsn(1, 0, 0), NULL)
-cfg_runtime(backup_archive_dir_CFG, "archive_dir", backup_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), NULL)
-cfg(backup_retain_min_CFG, "retain_min", backup_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_ARCHIVE_NUMBER, vsn(1, 0, 0), NULL)
-cfg(backup_retain_days_CFG, "retain_days", backup_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_ARCHIVE_DAYS, vsn(1, 0, 0), NULL)
-
-cfg(shell_history_size_CFG, "history_size", shell_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_MAX_HISTORY, vsn(1, 0, 0), NULL)
-
-cfg(global_umask_CFG, "umask", global_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_UMASK, vsn(1, 0, 0), NULL)
-cfg(global_test_CFG, "test", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(1, 0, 0), NULL)
-cfg(global_units_CFG, "units", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_UNITS, vsn(1, 0, 0), NULL)
-cfg(global_si_unit_consistency_CFG, "si_unit_consistency", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_SI_UNIT_CONSISTENCY, vsn(2, 2, 54), NULL)
-cfg(global_activation_CFG, "activation", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ACTIVATION, vsn(1, 0, 0), NULL)
-cfg(global_suffix_CFG, "suffix", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_SUFFIX, vsn(1, 0, 0), NULL)
-cfg(global_fallback_to_lvm1_CFG, "fallback_to_lvm1", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_LVM1, vsn(1, 0, 18), NULL)
-cfg(global_format_CFG, "format", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_FORMAT, vsn(1, 0, 0), NULL)
-cfg_array(global_format_libraries_CFG, "format_libraries", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg_array(global_segment_libraries_CFG, "segment_libraries", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL)
-cfg(global_proc_CFG, "proc", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_PROC_DIR, vsn(1, 0, 0), NULL)
-cfg(global_locking_type_CFG, "locking_type", global_CFG_SECTION, 0, CFG_TYPE_INT, 1, vsn(1, 0, 0), NULL)
-cfg(global_wait_for_locks_CFG, "wait_for_locks", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_WAIT_FOR_LOCKS, vsn(2, 2, 50), NULL)
-cfg(global_fallback_to_clustered_locking_CFG, "fallback_to_clustered_locking", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING, vsn(2, 2, 42), NULL)
-cfg(global_fallback_to_local_locking_CFG, "fallback_to_local_locking", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_LOCAL_LOCKING, vsn(2, 2, 42), NULL)
-cfg(global_locking_dir_CFG, "locking_dir", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_LOCK_DIR, vsn(1, 0, 0), NULL)
-cfg(global_prioritise_write_locks_CFG, "prioritise_write_locks", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_PRIORITISE_WRITE_LOCKS, vsn(2, 2, 52), NULL)
-cfg(global_library_dir_CFG, "library_dir", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-cfg(global_locking_library_CFG, "locking_library", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, DEFAULT_LOCKING_LIB, vsn(1, 0, 0), NULL)
-cfg(global_abort_on_internal_errors_CFG, "abort_on_internal_errors", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ABORT_ON_INTERNAL_ERRORS, vsn(2, 2, 57), NULL)
-cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION, vsn(2, 2, 96), NULL)
-cfg(global_metadata_read_only_CFG, "metadata_read_only", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_METADATA_READ_ONLY, vsn(2, 2, 75), NULL)
-cfg(global_mirror_segtype_default_CFG, "mirror_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_SEGTYPE, vsn(2, 2, 87), NULL)
-cfg(global_raid10_segtype_default_CFG, "raid10_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_RAID10_SEGTYPE, vsn(2, 2, 99), NULL)
-cfg(global_raid_stripe_size_default_CFG, "raid_stripe_size_default", global_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RAID_STRIPE_SIZE, vsn(2, 2, 120), NULL)
-cfg(global_sparse_segtype_default_CFG, "sparse_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_SPARSE_SEGTYPE, vsn(2, 2, 112), NULL)
-cfg(global_lvdisplay_shows_full_device_path_CFG, "lvdisplay_shows_full_device_path", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH, vsn(2, 2, 89), NULL)
-cfg(global_use_lvmetad_CFG, "use_lvmetad", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2, 2, 93), NULL)
-cfg(global_thin_check_executable_CFG, "thin_check_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, THIN_CHECK_CMD, vsn(2, 2, 94), NULL)
-cfg_array(global_thin_check_options_CFG, "thin_check_options", global_CFG_SECTION, 0, CFG_TYPE_STRING, "#S" DEFAULT_THIN_CHECK_OPTIONS, vsn(2, 2, 96), NULL)
-cfg_array(global_thin_disabled_features_CFG, "thin_disabled_features", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, NULL, vsn(2, 2, 99), NULL)
-cfg(global_thin_dump_executable_CFG, "thin_dump_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, THIN_DUMP_CMD, vsn(2, 2, 100), NULL)
-cfg(global_thin_repair_executable_CFG, "thin_repair_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, THIN_REPAIR_CMD, vsn(2, 2, 100), NULL)
-cfg_array(global_thin_repair_options_CFG, "thin_repair_options", global_CFG_SECTION, 0, CFG_TYPE_STRING, "#S" DEFAULT_THIN_REPAIR_OPTIONS, vsn(2, 2, 100), NULL)
-cfg(global_cache_check_executable_CFG, "cache_check_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, CACHE_CHECK_CMD, vsn(2, 2, 108), NULL)
-cfg_array(global_cache_check_options_CFG, "cache_check_options", global_CFG_SECTION, 0, CFG_TYPE_STRING, "#S" DEFAULT_CACHE_CHECK_OPTIONS, vsn(2, 2, 108), NULL)
-cfg(global_cache_dump_executable_CFG, "cache_dump_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, CACHE_DUMP_CMD, vsn(2, 2, 108), NULL)
-cfg(global_cache_repair_executable_CFG, "cache_repair_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, CACHE_REPAIR_CMD, vsn(2, 2, 108), NULL)
-cfg_array(global_cache_repair_options_CFG, "cache_repair_options", global_CFG_SECTION, 0, CFG_TYPE_STRING, "#S" DEFAULT_CACHE_REPAIR_OPTIONS, vsn(2, 2, 108), NULL)
-
-cfg(activation_checks_CFG, "checks", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ACTIVATION_CHECKS, vsn(2, 2, 86), NULL)
-cfg(activation_udev_sync_CFG, "udev_sync", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_UDEV_SYNC, vsn(2, 2, 51), NULL)
-cfg(activation_udev_rules_CFG, "udev_rules", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_UDEV_RULES, vsn(2, 2, 57), NULL)
-cfg(activation_verify_udev_operations_CFG, "verify_udev_operations", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_VERIFY_UDEV_OPERATIONS, vsn(2, 2, 86), NULL)
-cfg(activation_retry_deactivation_CFG, "retry_deactivation", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_RETRY_DEACTIVATION, vsn(2, 2, 89), NULL)
-cfg(activation_missing_stripe_filler_CFG, "missing_stripe_filler", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_STRIPE_FILLER, vsn(1, 0, 0), NULL)
-cfg(activation_use_linear_target_CFG, "use_linear_target", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_LINEAR_TARGET, vsn(2, 2, 89), NULL)
-cfg(activation_reserved_stack_CFG, "reserved_stack", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RESERVED_STACK, vsn(1, 0, 0), NULL)
-cfg(activation_reserved_memory_CFG, "reserved_memory", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RESERVED_MEMORY, vsn(1, 0, 0), NULL)
-cfg(activation_process_priority_CFG, "process_priority", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_PROCESS_PRIORITY, vsn(1, 0, 0), NULL)
-cfg_array(activation_volume_list_CFG, "volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY|CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL)
-cfg_array(activation_auto_activation_volume_list_CFG, "auto_activation_volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 97), NULL)
-cfg_array(activation_read_only_volume_list_CFG, "read_only_volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 89), NULL)
-cfg(activation_mirror_region_size_CFG, "mirror_region_size", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RAID_REGION_SIZE, vsn(1, 0, 0), NULL)
-cfg(activation_raid_region_size_CFG, "raid_region_size", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RAID_REGION_SIZE, vsn(2, 2, 99), NULL)
-cfg(activation_readahead_CFG, "readahead", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_READ_AHEAD, vsn(1, 0, 23), NULL)
-cfg(activation_raid_fault_policy_CFG, "raid_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_RAID_FAULT_POLICY, vsn(2, 2, 89), NULL)
-cfg(activation_mirror_device_fault_policy_CFG, "mirror_device_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_DEVICE_FAULT_POLICY, vsn(1, 2, 10), NULL)
-cfg(activation_mirror_log_fault_policy_CFG, "mirror_log_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_LOG_FAULT_POLICY, vsn(1, 2, 18), NULL)
-cfg_runtime(activation_mirror_image_fault_policy_CFG, "mirror_image_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(2, 2, 57), NULL)
-cfg(activation_snapshot_autoextend_threshold_CFG, "snapshot_autoextend_threshold", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_SNAPSHOT_AUTOEXTEND_THRESHOLD, vsn(2, 2, 75), NULL)
-cfg(activation_snapshot_autoextend_percent_CFG, "snapshot_autoextend_percent", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_SNAPSHOT_AUTOEXTEND_PERCENT, vsn(2, 2, 75), NULL)
-cfg(activation_thin_pool_autoextend_threshold_CFG, "thin_pool_autoextend_threshold", activation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_INT, DEFAULT_THIN_POOL_AUTOEXTEND_THRESHOLD, vsn(2, 2, 89), NULL)
-cfg(activation_thin_pool_autoextend_percent_CFG, "thin_pool_autoextend_percent", activation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_INT, DEFAULT_THIN_POOL_AUTOEXTEND_PERCENT, vsn(2, 2, 89), NULL)
-cfg_array(activation_mlock_filter_CFG, "mlock_filter", activation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 62), NULL)
-cfg(activation_use_mlockall_CFG, "use_mlockall", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_MLOCKALL, vsn(2, 2, 62), NULL)
-cfg(activation_monitoring_CFG, "monitoring", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DMEVENTD_MONITOR, vsn(2, 2, 63), NULL)
-cfg(activation_polling_interval_CFG, "polling_interval", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_INTERVAL, vsn(2, 2, 63), NULL)
-cfg(activation_auto_set_activation_skip_CFG, "auto_set_activation_skip", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_AUTO_SET_ACTIVATION_SKIP, vsn(2,2,99), NULL)
-cfg(activation_mode_CFG, "activation_mode", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_ACTIVATION_MODE, vsn(2,2,108), NULL)
-
-cfg(metadata_pvmetadatacopies_CFG, "pvmetadatacopies", metadata_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_INT, DEFAULT_PVMETADATACOPIES, vsn(1, 0, 0), NULL)
-cfg(metadata_vgmetadatacopies_CFG, "vgmetadatacopies", metadata_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_INT, DEFAULT_VGMETADATACOPIES, vsn(2, 2, 69), NULL)
-cfg(metadata_pvmetadatasize_CFG, "pvmetadatasize", metadata_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_INT, DEFAULT_PVMETADATASIZE, vsn(1, 0, 0), NULL)
-cfg(metadata_pvmetadataignore_CFG, "pvmetadataignore", metadata_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_BOOL, DEFAULT_PVMETADATAIGNORE, vsn(2, 2, 69), NULL)
-cfg(metadata_stripesize_CFG, "stripesize", metadata_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_INT, DEFAULT_STRIPESIZE, vsn(1, 0, 0), NULL)
-cfg_array(metadata_dirs_CFG, "dirs", metadata_CFG_SECTION, CFG_ADVANCED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-
-cfg_section(metadata_disk_areas_CFG_SUBSECTION, "disk_areas", metadata_CFG_SECTION, CFG_ADVANCED | CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, vsn(1, 0, 0), NULL)
-cfg_section(disk_area_CFG_SUBSECTION, "disk_area", metadata_disk_areas_CFG_SUBSECTION, CFG_NAME_VARIABLE | CFG_ADVANCED | CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, vsn(1, 0, 0), NULL)
-cfg(disk_area_start_sector_CFG, "start_sector", disk_area_CFG_SUBSECTION, CFG_ADVANCED | CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, 0, vsn(1, 0, 0), NULL)
-cfg(disk_area_size_CFG, "size", disk_area_CFG_SUBSECTION, CFG_ADVANCED | CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, 0, vsn(1, 0, 0), NULL)
-cfg(disk_area_id_CFG, "id", disk_area_CFG_SUBSECTION, CFG_ADVANCED | CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL)
-
-cfg(report_compact_output_CFG, "compact_output", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_COMPACT_OUTPUT, vsn(2, 2, 115), NULL)
-cfg(report_aligned_CFG, "aligned", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_ALIGNED, vsn(1, 0, 0), NULL)
-cfg(report_buffered_CFG, "buffered", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_BUFFERED, vsn(1, 0, 0), NULL)
-cfg(report_headings_CFG, "headings", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_HEADINGS, vsn(1, 0, 0), NULL)
-cfg(report_separator_CFG, "separator", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_REP_SEPARATOR, vsn(1, 0, 0), NULL)
-cfg(report_list_item_separator_CFG, "list_item_separator", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_REP_LIST_ITEM_SEPARATOR, vsn(2, 2, 108), NULL)
-cfg(report_prefixes_CFG, "prefixes", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_PREFIXES, vsn(2, 2, 36), NULL)
-cfg(report_quoted_CFG, "quoted", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_QUOTED, vsn(2, 2, 39), NULL)
-cfg(report_colums_as_rows_CFG, "colums_as_rows", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_REP_COLUMNS_AS_ROWS, vsn(1, 0, 0), NULL)
-cfg(report_binary_values_as_numeric_CFG, "binary_values_as_numeric", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, 0, vsn(2, 2, 108), NULL)
-cfg(report_devtypes_sort_CFG, "devtypes_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_DEVTYPES_SORT, vsn(2, 2, 101), NULL)
-cfg(report_devtypes_cols_CFG, "devtypes_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_DEVTYPES_COLS, vsn(2, 2, 101), NULL)
-cfg(report_devtypes_cols_verbose_CFG, "devtypes_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_DEVTYPES_COLS_VERB, vsn(2, 2, 101), NULL)
-cfg(report_lvs_sort_CFG, "lvs_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_LVS_SORT, vsn(1, 0, 0), NULL)
-cfg(report_lvs_cols_CFG, "lvs_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_LVS_COLS, vsn(1, 0, 0), NULL)
-cfg(report_lvs_cols_verbose_CFG, "lvs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_LVS_COLS_VERB, vsn(1, 0, 0), NULL)
-cfg(report_vgs_sort_CFG, "vgs_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_VGS_SORT, vsn(1, 0, 0), NULL)
-cfg(report_vgs_cols_CFG, "vgs_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_VGS_COLS, vsn(1, 0, 0), NULL)
-cfg(report_vgs_cols_verbose_CFG, "vgs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_VGS_COLS_VERB, vsn(1, 0, 0), NULL)
-cfg(report_pvs_sort_CFG, "pvs_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVS_SORT, vsn(1, 0, 0), NULL)
-cfg(report_pvs_cols_CFG, "pvs_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVS_COLS, vsn(1, 0, 0), NULL)
-cfg(report_pvs_cols_verbose_CFG, "pvs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVS_COLS_VERB, vsn(1, 0, 0), NULL)
-cfg(report_segs_sort_CFG, "segs_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_SEGS_SORT, vsn(1, 0, 0), NULL)
-cfg(report_segs_cols_CFG, "segs_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_SEGS_COLS, vsn(1, 0, 0), NULL)
-cfg(report_segs_cols_verbose_CFG, "segs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_SEGS_COLS_VERB, vsn(1, 0, 0), NULL)
-cfg(report_pvsegs_sort_CFG, "pvsegs_sort", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVSEGS_SORT, vsn(1, 1, 3), NULL)
-cfg(report_pvsegs_cols_CFG, "pvsegs_cols", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVSEGS_COLS, vsn(1, 1, 3), NULL)
-cfg(report_pvsegs_cols_verbose_CFG, "pvsegs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_PVSEGS_COLS_VERB, vsn(1, 1, 3), NULL)
-
-cfg(dmeventd_mirror_library_CFG, "mirror_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_MIRROR_LIB, vsn(1, 2, 3), NULL)
-cfg(dmeventd_raid_library_CFG, "raid_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_RAID_LIB, vsn(2, 2, 87), NULL)
-cfg(dmeventd_snapshot_library_CFG, "snapshot_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_SNAPSHOT_LIB, vsn(1, 2, 26), NULL)
-cfg(dmeventd_thin_library_CFG, "thin_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_THIN_LIB, vsn(2, 2, 89), NULL)
-cfg(dmeventd_executable_CFG, "executable", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_PATH, vsn(2, 2, 73), NULL)
-
-cfg(tags_hosttags_CFG, "hosttags", tags_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_HOSTTAGS, vsn(1, 0, 18), NULL)
-
-cfg_section(tag_CFG_SUBSECTION, "tag", tags_CFG_SECTION, CFG_NAME_VARIABLE | CFG_DEFAULT_UNDEFINED, vsn(1, 0, 18), NULL)
-cfg(tag_host_list_CFG, "host_list", tag_CFG_SUBSECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL)
-
-cfg(CFG_COUNT, NULL, root_CFG_SECTION, 0, CFG_TYPE_INT, 0, vsn(0, 0, 0), NULL)
+cfg_section(root_CFG_SECTION, "(root)", root_CFG_SECTION, 0, vsn(0, 0, 0), 0, NULL, NULL)
+
+cfg_section(config_CFG_SECTION, "config", root_CFG_SECTION, 0, vsn(2, 2, 99), 0, NULL,
+ "How LVM configuration settings are handled.\n")
+
+cfg_section(devices_CFG_SECTION, "devices", root_CFG_SECTION, 0, vsn(1, 0, 0), 0, NULL,
+ "How LVM uses block devices.\n")
+
+cfg_section(allocation_CFG_SECTION, "allocation", root_CFG_SECTION, CFG_PROFILABLE, vsn(2, 2, 77), 0, NULL,
+ "How LVM selects space and applies properties to LVs.\n")
+
+cfg_section(log_CFG_SECTION, "log", root_CFG_SECTION, 0, vsn(1, 0, 0), 0, NULL,
+ "How LVM log information is reported.\n")
+
+cfg_section(backup_CFG_SECTION, "backup", root_CFG_SECTION, 0, vsn(1, 0, 0), 0, NULL,
+ "How LVM metadata is backed up and archived.\n"
+ "In LVM, a 'backup' is a copy of the metadata for the current system,\n"
+ "and an 'archive' contains old metadata configurations. They are\n"
+ "stored in a human readable text format.\n")
+
+cfg_section(shell_CFG_SECTION, "shell", root_CFG_SECTION, 0, vsn(1, 0, 0), 0, NULL,
+ "Settings for running LVM in shell (readline) mode.\n")
+
+cfg_section(global_CFG_SECTION, "global", root_CFG_SECTION, CFG_PROFILABLE, vsn(1, 0, 0), 0, NULL,
+ "Miscellaneous global LVM settings.\n")
+
+cfg_section(activation_CFG_SECTION, "activation", root_CFG_SECTION, CFG_PROFILABLE, vsn(1, 0, 0), 0, NULL, NULL)
+
+cfg_section(metadata_CFG_SECTION, "metadata", root_CFG_SECTION, CFG_DEFAULT_COMMENTED, vsn(1, 0, 0), 0, NULL, NULL)
+
+cfg_section(report_CFG_SECTION, "report", root_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, vsn(1, 0, 0), 0, NULL,
+ "LVM report command output formatting.\n")
+
+cfg_section(dmeventd_CFG_SECTION, "dmeventd", root_CFG_SECTION, 0, vsn(1, 2, 3), 0, NULL,
+ "Settings for the LVM event daemon.\n")
+
+cfg_section(tags_CFG_SECTION, "tags", root_CFG_SECTION, CFG_DEFAULT_COMMENTED, vsn(1, 0, 18), 0, NULL,
+ "Host tag settings.\n")
+
+cfg_section(local_CFG_SECTION, "local", root_CFG_SECTION, 0, vsn(2, 2, 117), 0, NULL,
+ "LVM settings that are specific to the local host.\n")
+
+cfg(config_checks_CFG, "checks", config_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 99), NULL, 0, NULL,
+ "If enabled, any LVM configuration mismatch is reported.\n"
+ "This implies checking that the configuration key is understood by\n"
+ "LVM and that the value of the key is the proper type. If disabled,\n"
+ "any configuration mismatch is ignored and the default value is used\n"
+ "without any warning (a message about the configuration key not being\n"
+ "found is issued in verbose mode only).\n")
+
+cfg(config_abort_on_errors_CFG, "abort_on_errors", config_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2,2,99), NULL, 0, NULL,
+ "Abort the LVM process if a configuration mismatch is found.\n")
+
+cfg_runtime(config_profile_dir_CFG, "profile_dir", config_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(2, 2, 99), 0, NULL,
+ "Directory where LVM looks for configuration profiles.\n")
+
+cfg(devices_dir_CFG, "dir", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, DEFAULT_DEV_DIR, vsn(1, 0, 0), NULL, 0, NULL,
+ "Directory in which to create volume group device nodes.\n"
+ "Commands also accept this as a prefix on volume group names.\n")
+
+cfg_array(devices_scan_CFG, "scan", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, "#S/dev", vsn(1, 0, 0), NULL, 0, NULL,
+ "Directories containing device nodes to use with LVM.\n")
+
+cfg_array(devices_loopfiles_CFG, "loopfiles", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 2, 0), NULL, 0, NULL, NULL)
+
+cfg(devices_obtain_device_list_from_udev_CFG, "obtain_device_list_from_udev", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV, vsn(2, 2, 85), NULL, 0, NULL,
+ "Obtain the list of available devices from udev.\n"
+ "This avoids opening or using any inapplicable non-block devices or\n"
+ "subdirectories found in the udev directory. Any device node or\n"
+ "symlink not managed by udev in the udev directory is ignored. This\n"
+ "setting applies only to the udev-managed device directory; other\n"
+ "directories will be scanned fully. LVM needs to be compiled with\n"
+ "udev support for this setting to apply.\n")
+
+cfg(devices_external_device_info_source_CFG, "external_device_info_source", devices_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE, vsn(2, 2, 116), NULL, 0, NULL,
+ "Select an external device information source.\n"
+ "Some information may already be available in the system and LVM can\n"
+ "use this information to determine the exact type or use of devices it\n"
+ "processes. Using an existing external device information source can\n"
+ "speed up device processing as LVM does not need to run its own native\n"
+ "routines to acquire this information. For example, this information\n"
+ "is used to drive LVM filtering like MD component detection, multipath\n"
+ "component detection, partition detection and others.\n"
+ "#\n"
+ "Accepted values:\n"
+ " none\n"
+ " No external device information source is used.\n"
+ " udev\n"
+ " Reuse existing udev database records. Applicable only if LVM is\n"
+ " compiled with udev support.\n"
+ "#\n")
+
+cfg_array(devices_preferred_names_CFG, "preferred_names", devices_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED , CFG_TYPE_STRING, NULL, vsn(1, 2, 19), NULL, 0, NULL,
+ "Select which path name to display for a block device.\n"
+ "If multiple path names exist for a block device, and LVM needs to\n"
+ "display a name for the device, the path names are matched against\n"
+ "each item in this list of regular expressions. The first match is\n"
+ "used. Try to avoid using undescriptive /dev/dm-N names, if present.\n"
+ "If no preferred name matches, or if preferred_names are not defined,\n"
+ "the following built-in preferences are applied in order until one\n"
+ "produces a preferred name:\n"
+ "Prefer names with path prefixes in the order of:\n"
+ "/dev/mapper, /dev/disk, /dev/dm-*, /dev/block.\n"
+ "Prefer the name with the least number of slashes.\n"
+ "Prefer a name that is a symlink.\n"
+ "Prefer the path with least value in lexicographical order.\n"
+ "#\n"
+ "Example\n"
+ "preferred_names = [ \"^/dev/mpath/\", \"^/dev/mapper/mpath\", \"^/dev/[hs]d\" ]\n"
+ "#\n")
+
+cfg_array(devices_filter_CFG, "filter", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, "#Sa|.*/|", vsn(1, 0, 0), NULL, 0, NULL,
+ "Limit the block devices that are used by LVM commands.\n"
+ "This is a list of regular expressions used to accept or reject block\n"
+ "device path names. Each regex is delimited by a vertical bar '|'\n"
+ "(or any character) and is preceded by 'a' to accept the path, or\n"
+ "by 'r' to reject the path. The first regex in the list to match the\n"
+ "path is used, producing the 'a' or 'r' result for the device.\n"
+ "When multiple path names exist for a block device, if any path name\n"
+ "matches an 'a' pattern before an 'r' pattern, then the device is\n"
+ "accepted. If all the path names match an 'r' pattern first, then the\n"
+ "device is rejected. Unmatching path names do not affect the accept\n"
+ "or reject decision. If no path names for a device match a pattern,\n"
+ "then the device is accepted. Be careful mixing 'a' and 'r' patterns,\n"
+ "as the combination might produce unexpected results (test changes.)\n"
+ "Run vgscan after changing the filter to regenerate the cache.\n"
+ "See the use_lvmetad comment for a special case regarding filters.\n"
+ "#\n"
+ "Example\n"
+ "Accept every block device:\n"
+ "filter = [ \"a|.*/|\" ]\n"
+ "Reject the cdrom drive:\n"
+ "filter = [ \"r|/dev/cdrom|\" ]\n"
+ "Work with just loopback devices, e.g. for testing:\n"
+ "filter = [ \"a|loop|\", \"r|.*|\" ]\n"
+ "Accept all loop devices and ide drives except hdc:\n"
+ "filter = [ \"a|loop|\", \"r|/dev/hdc|\", \"a|/dev/ide|\", \"r|.*|\" ]\n"
+ "Use anchors to be very specific:\n"
+ "filter = [ \"a|^/dev/hda8$|\", \"r|.*/|\" ]\n"
+ "#\n")
+
+cfg_array(devices_global_filter_CFG, "global_filter", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, "#Sa|.*/|", vsn(2, 2, 98), NULL, 0, NULL,
+ "Limit the block devices that are used by LVM system components.\n"
+ "Because devices/filter may be overridden from the command line, it is\n"
+ "not suitable for system-wide device filtering, e.g. udev and lvmetad.\n"
+ "Use global_filter to hide devices from these LVM system components.\n"
+ "The syntax is the same as devices/filter. Devices rejected by\n"
+ "global_filter are not opened by LVM.\n")
+
+cfg_runtime(devices_cache_CFG, "cache", devices_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), vsn(1, 2, 19),
+ "This has been replaced by the devices/cache_dir setting.\n",
+ "Cache file path.\n")
+
+cfg_runtime(devices_cache_dir_CFG, "cache_dir", devices_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 2, 19), 0, NULL,
+ "Directory in which to store the device cache file.\n"
+ "The results of filtering are cached on disk to avoid rescanning dud\n"
+ "devices (which can take a very long time). By default this cache is\n"
+ "stored in a file named .cache. It is safe to delete this file; the\n"
+ "tools regenerate it. If obtain_device_list_from_udev is enabled, the\n"
+ "list of devices is obtained from udev and any existing .cache file\n"
+ "is removed.\n")
+
+cfg(devices_cache_file_prefix_CFG, "cache_file_prefix", devices_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, DEFAULT_CACHE_FILE_PREFIX, vsn(1, 2, 19), NULL, 0, NULL,
+ "A prefix used before the .cache file name. See devices/cache_dir.\n")
+
+cfg(devices_write_cache_state_CFG, "write_cache_state", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(1, 0, 0), NULL, 0, NULL,
+ "Enable/disable writing the cache file. See devices/cache_dir.\n")
+
+cfg_array(devices_types_CFG, "types", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_ADVANCED, CFG_TYPE_INT | CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of additional acceptable block device types.\n"
+ "These are of device type names from /proc/devices, followed by the\n"
+ "maximum number of partitions.\n"
+ "#\n"
+ "Example\n"
+ "types = [ \"fd\", 16 ]\n"
+ "#\n")
+
+cfg(devices_sysfs_scan_CFG, "sysfs_scan", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SYSFS_SCAN, vsn(1, 0, 8), NULL, 0, NULL,
+ "Restrict device scanning to block devices appearing in sysfs.\n"
+ "This is a quick way of filtering out block devices that are not\n"
+ "present on the system. sysfs must be part of the kernel and mounted.)\n")
+
+cfg(devices_multipath_component_detection_CFG, "multipath_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MULTIPATH_COMPONENT_DETECTION, vsn(2, 2, 89), NULL, 0, NULL,
+ "Ignore devices that are components of DM multipath devices.\n")
+
+cfg(devices_md_component_detection_CFG, "md_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MD_COMPONENT_DETECTION, vsn(1, 0, 18), NULL, 0, NULL,
+ "Ignore devices that are components of software RAID (md) devices.\n")
+
+cfg(devices_fw_raid_component_detection_CFG, "fw_raid_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FW_RAID_COMPONENT_DETECTION, vsn(2, 2, 112), NULL, 0, NULL,
+ "Ignore devices that are components of firmware RAID devices.\n"
+ "LVM must use an external_device_info_source other than none for this\n"
+ "detection to execute.\n")
+
+cfg(devices_md_chunk_alignment_CFG, "md_chunk_alignment", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MD_CHUNK_ALIGNMENT, vsn(2, 2, 48), NULL, 0, NULL,
+ "Align PV data blocks with md device's stripe-width.\n"
+ "This applies if a PV is placed directly on an md device.\n")
+
+cfg(devices_default_data_alignment_CFG, "default_data_alignment", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_DATA_ALIGNMENT, vsn(2, 2, 75), NULL, 0, NULL,
+ "Default alignment of the start of a PV data area in MB.\n"
+ "If set to 0, a value of 64KiB will be used.\n"
+ "Set to 1 for 1MiB, 2 for 2MiB, etc.\n")
+
+cfg(devices_data_alignment_detection_CFG, "data_alignment_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DATA_ALIGNMENT_DETECTION, vsn(2, 2, 51), NULL, 0, NULL,
+ "Detect PV data alignment based on sysfs device information.\n"
+ "The start of a PV data area will be a multiple of minimum_io_size or\n"
+ "optimal_io_size exposed in sysfs. minimum_io_size is the smallest\n"
+ "request the device can perform without incurring a read-modify-write\n"
+ "penalty, e.g. MD chunk size. optimal_io_size is the device's\n"
+ "preferred unit of receiving I/O, e.g. MD stripe width.\n"
+ "minimum_io_size is used if optimal_io_size is undefined (0).\n"
+ "If md_chunk_alignment is enabled, that detects the optimal_io_size.\n"
+ "This setting takes precedence over md_chunk_alignment.\n")
+
+cfg(devices_data_alignment_CFG, "data_alignment", devices_CFG_SECTION, 0, CFG_TYPE_INT, 0, vsn(2, 2, 45), NULL, 0, NULL,
+ "Alignment of the start of a PV data area in KiB.\n"
+ "If a PV is placed directly on an md device and md_chunk_alignment or\n"
+ "data_alignment_detection are enabled, then this setting is ignored.\n"
+ "Otherwise, md_chunk_alignment and data_alignment_detection are\n"
+ "disabled if this is set. Set to 0 to use the default alignment or the\n"
+ "page size, if larger.\n")
+
+cfg(devices_data_alignment_offset_detection_CFG, "data_alignment_offset_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION, vsn(2, 2, 50), NULL, 0, NULL,
+ "Detect PV data alignment offset based on sysfs device information.\n"
+ "The start of a PV aligned data area will be shifted by the\n"
+ "alignment_offset exposed in sysfs. This offset is often 0, but may\n"
+ "be non-zero. Certain 4KiB sector drives that compensate for windows\n"
+ "partitioning will have an alignment_offset of 3584 bytes (sector 7\n"
+ "is the lowest aligned logical block, the 4KiB sectors start at\n"
+ "LBA -1, and consequently sector 63 is aligned on a 4KiB boundary).\n"
+ "pvcreate --dataalignmentoffset will skip this detection.\n")
+
+cfg(devices_ignore_suspended_devices_CFG, "ignore_suspended_devices", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_IGNORE_SUSPENDED_DEVICES, vsn(1, 2, 19), NULL, 0, NULL,
+ "Ignore DM devices that have I/O suspended while scanning devices.\n"
+ "Otherwise, LVM waits for a suspended device to become accessible.\n"
+ "This should only be needed in recovery situations.\n")
+
+cfg(devices_ignore_lvm_mirrors_CFG, "ignore_lvm_mirrors", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_IGNORE_LVM_MIRRORS, vsn(2, 2, 104), NULL, 0, NULL,
+ "Do not scan 'mirror' LVs to avoid possible deadlocks.\n"
+ "This avoids possible deadlocks when using the 'mirror' segment type.\n"
+ "This setting determines whether LVs using the 'mirror' segment type\n"
+ "are scanned for LVM labels. This affects the ability of mirrors to\n"
+ "be used as physical volumes. If this setting is enabled, it is\n"
+ "impossible to create VGs on top of mirror LVs, i.e. to stack VGs on\n"
+ "mirror LVs. If this setting is disabled, allowing mirror LVs to be\n"
+ "scanned, it may cause LVM processes and I/O to the mirror to become\n"
+ "blocked. This is due to the way that the mirror segment type handles\n"
+ "failures. In order for the hang to occur, an LVM command must be run\n"
+ "just after a failure and before the automatic LVM repair process\n"
+ "takes place, or there must be failures in multiple mirrors in the\n"
+ "same VG at the same time with write failures occurring moments before\n"
+ "a scan of the mirror's labels. The 'mirror' scanning problems do not\n"
+ "apply to LVM RAID types like 'raid1' which handle failures in a\n"
+ "different way, making them a better choice for VG stacking.\n")
+
+cfg(devices_disable_after_error_count_CFG, "disable_after_error_count", devices_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_DISABLE_AFTER_ERROR_COUNT, vsn(2, 2, 75), NULL, 0, NULL,
+ "Number of I/O errors after which a device is skipped.\n"
+ "During each LVM operation, errors received from each device are\n"
+ "counted. If the counter of a device exceeds the limit set here,\n"
+ "no further I/O is sent to that device for the remainder of the\n"
+ "operation. Setting this to 0 disables the counters altogether.\n")
+
+cfg(devices_require_restorefile_with_uuid_CFG, "require_restorefile_with_uuid", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_REQUIRE_RESTOREFILE_WITH_UUID, vsn(2, 2, 73), NULL, 0, NULL,
+ "Allow use of pvcreate --uuid without requiring --restorefile.\n")
+
+cfg(devices_pv_min_size_CFG, "pv_min_size", devices_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_PV_MIN_SIZE_KB, vsn(2, 2, 85), NULL, 0, NULL,
+ "Minimum size in KiB of block devices which can be used as PVs.\n"
+ "In a clustered environment all nodes must use the same value.\n"
+ "Any value smaller than 512KiB is ignored. The previous built-in\n"
+ "value was 512.\n")
+
+cfg(devices_issue_discards_CFG, "issue_discards", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ISSUE_DISCARDS, vsn(2, 2, 85), NULL, 0, NULL,
+ "Issue discards to PVs that are no longer used by an LV.\n"
+ "Discards are sent to an LV's underlying physical volumes when the LV\n"
+ "is no longer using the physical volumes' space, e.g. lvremove,\n"
+ "lvreduce. Discards inform the storage that a region is no longer\n"
+ "used. Storage that supports discards advertise the protocol-specific\n"
+ "way discards should be issued by the kernel (TRIM, UNMAP, or\n"
+ "WRITE SAME with UNMAP bit set). Not all storage will support or\n"
+ "benefit from discards, but SSDs and thinly provisioned LUNs\n"
+ "generally do. If enabled, discards will only be issued if both the\n"
+ "storage and kernel provide support.\n")
+
+cfg_array(allocation_cling_tag_list_CFG, "cling_tag_list", allocation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 77), NULL, 0, NULL,
+ "Advise LVM which PVs to use when searching for new space.\n"
+ "When searching for free space to extend an LV, the 'cling' allocation\n"
+ "policy will choose space on the same PVs as the last segment of the\n"
+ "existing LV. If there is insufficient space and a list of tags is\n"
+ "defined here, it will check whether any of them are attached to the\n"
+ "PVs concerned and then seek to match those PV tags between existing\n"
+ "extents and new extents.\n"
+ "#\n"
+ "Example\n"
+ "Use the special tag \"@*\" as a wildcard to match any PV tag:\n"
+ "cling_tag_list = [ \"@*\" ]\n"
+ "LVs are mirrored between two sites within a single VG, and\n"
+ "PVs are tagged with either @site1 or @site2 to indicate where\n"
+ "they are situated:\n"
+ "cling_tag_list = [ \"@site1\", \"@site2\" ]\n"
+ "#\n")
+
+cfg(allocation_maximise_cling_CFG, "maximise_cling", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MAXIMISE_CLING, vsn(2, 2, 85), NULL, 0, NULL,
+ "Use a previous allocation algorithm.\n"
+ "Changes made in version 2.02.85 extended the reach of the 'cling'\n"
+ "policies to detect more situations where data can be grouped onto\n"
+ "the same disks. This setting can be used to disable the changes\n"
+ "and revert to the previous algorithm.\n")
+
+cfg(allocation_use_blkid_wiping_CFG, "use_blkid_wiping", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 105), "@DEFAULT_USE_BLKID_WIPING@", 0, NULL,
+ "Use blkid to detect existing signatures on new PVs and LVs.\n"
+ "The blkid library can detect more signatures than the native LVM\n"
+ "detection code, but may take longer. LVM needs to be compiled with\n"
+ "blkid wiping support for this setting to apply. LVM native detection\n"
+ "code is currently able to recognize: MD device signatures,\n"
+ "swap signature, and LUKS signatures. To see the list of signatures\n"
+ "recognized by blkid, check the output of the 'blkid -k' command.\n")
+
+cfg(allocation_wipe_signatures_when_zeroing_new_lvs_CFG, "wipe_signatures_when_zeroing_new_lvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, 1, vsn(2, 2, 105), NULL, 0, NULL,
+ "Look for and erase any signatures while zeroing a new LV.\n"
+ "The --wipesignatures option overrides this setting.\n"
+ "Zeroing is controlled by the -Z/--zero option, and if not specified,\n"
+ "zeroing is used by default if possible. Zeroing simply overwrites the\n"
+ "first 4KiB of a new LV with zeroes and does no signature detection or\n"
+ "wiping. Signature wiping goes beyond zeroing and detects exact types\n"
+ "and positions of signatures within the whole LV. It provides a\n"
+ "cleaner LV after creation as all known signatures are wiped. The LV\n"
+ "is not claimed incorrectly by other tools because of old signatures\n"
+ "from previous use. The number of signatures that LVM can detect\n"
+ "depends on the detection code that is selected (see\n"
+ "use_blkid_wiping.) Wiping each detected signature must be confirmed.\n"
+ "When this setting is disabled, signatures on new LVs are not detected\n"
+ "or erased unless the --wipesignatures option is used directly.\n")
+
+cfg(allocation_mirror_logs_require_separate_pvs_CFG, "mirror_logs_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS, vsn(2, 2, 85), NULL, 0, NULL,
+ "Mirror logs and images will always use different PVs.\n"
+ "The default setting changed in version 2.02.85.\n")
+
+cfg(allocation_cache_pool_metadata_require_separate_pvs_CFG, "cache_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_CACHE_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 106), NULL, 0, NULL,
+ "Cache pool metadata and data will always use different PVs.\n")
+
+cfg(allocation_cache_pool_cachemode_CFG, "cache_pool_cachemode", allocation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_CACHE_MODE, vsn(2, 2, 113), NULL, vsn(2, 2, 128),
+ "This has been replaced by the allocation/cache_mode setting.\n",
+ "Cache mode.\n")
+
+cfg(allocation_cache_mode_CFG, "cache_mode", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_CACHE_MODE, vsn(2, 2, 128), NULL, 0, NULL,
+ "The default cache mode used for new cache.\n"
+ "#\n"
+ "Accepted values:\n"
+ " writethrough\n"
+ " Data blocks are immediately written from the cache to disk.\n"
+ " writeback\n"
+ " Data blocks are written from the cache back to disk after some\n"
+ " delay to improve performance.\n"
+ "#\n"
+ "This setting replaces allocation/cache_pool_cachemode.\n")
+
+cfg(allocation_cache_policy_CFG, "cache_policy", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, 0, vsn(2, 2, 128), NULL, 0, NULL,
+ "The default cache policy used for new cache volume.\n"
+ "Since kernel 4.2 the default policy is smq (Stochastic multique),\n"
+ "otherwise the older mq (Multiqueue) policy is selected.\n")
+
+cfg_section(allocation_cache_settings_CFG_SECTION, "cache_settings", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, vsn(2, 2, 128), 0, NULL,
+ "Individual settings for policies.\n"
+ "See the help for individual policies for more info.\n")
+
+cfg_section(policy_settings_CFG_SUBSECTION, "policy_settings", allocation_cache_settings_CFG_SECTION, CFG_NAME_VARIABLE | CFG_SECTION_NO_CHECK | CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, vsn(2, 2, 128), 0, NULL,
+ "Replace this subsection name with a policy name.\n"
+ "Multiple subsections for different policies can be created.\n")
+
+cfg_runtime(allocation_cache_pool_chunk_size_CFG, "cache_pool_chunk_size", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, vsn(2, 2, 106), 0, NULL,
+ "The minimal chunk size in KiB for cache pool volumes.\n"
+ "Using a chunk_size that is too large can result in wasteful use of\n"
+ "the cache, where small reads and writes can cause large sections of\n"
+ "an LV to be mapped into the cache. However, choosing a chunk_size\n"
+ "that is too small can result in more overhead trying to manage the\n"
+ "numerous chunks that become mapped into the cache. The former is\n"
+ "more of a problem than the latter in most cases, so the default is\n"
+ "on the smaller end of the spectrum. Supported values range from\n"
+ "32KiB to 1GiB in multiples of 32.\n")
+
+cfg(allocation_thin_pool_metadata_require_separate_pvs_CFG, "thin_pool_metadata_require_separate_pvs", allocation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS, vsn(2, 2, 89), NULL, 0, NULL,
+ "Thin pool metdata and data will always use different PVs.\n")
+
+cfg(allocation_thin_pool_zero_CFG, "thin_pool_zero", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_THIN_POOL_ZERO, vsn(2, 2, 99), NULL, 0, NULL,
+ "Thin pool data chunks are zeroed before they are first used.\n"
+ "Zeroing with a larger thin pool chunk size reduces performance.\n")
+
+cfg(allocation_thin_pool_discards_CFG, "thin_pool_discards", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_THIN_POOL_DISCARDS, vsn(2, 2, 99), NULL, 0, NULL,
+ "The discards behaviour of thin pool volumes.\n"
+ "#\n"
+ "Accepted values:\n"
+ " ignore\n"
+ " nopassdown\n"
+ " passdown\n"
+ "#\n")
+
+cfg(allocation_thin_pool_chunk_size_policy_CFG, "thin_pool_chunk_size_policy", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_THIN_POOL_CHUNK_SIZE_POLICY, vsn(2, 2, 101), NULL, 0, NULL,
+ "The chunk size calculation policy for thin pool volumes.\n"
+ "#\n"
+ "Accepted values:\n"
+ " generic\n"
+ " If thin_pool_chunk_size is defined, use it. Otherwise, calculate\n"
+ " the chunk size based on estimation and device hints exposed in\n"
+ " sysfs - the minimum_io_size. The chunk size is always at least\n"
+ " 64KiB.\n"
+ " performance\n"
+ " If thin_pool_chunk_size is defined, use it. Otherwise, calculate\n"
+ " the chunk size for performance based on device hints exposed in\n"
+ " sysfs - the optimal_io_size. The chunk size is always at least\n"
+ " 512KiB.\n"
+ "#\n")
+
+cfg_runtime(allocation_thin_pool_chunk_size_CFG, "thin_pool_chunk_size", allocation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA | CFG_DEFAULT_UNDEFINED, CFG_TYPE_INT, vsn(2, 2, 99), 0, NULL,
+ "The minimal chunk size in KiB for thin pool volumes.\n"
+ "Larger chunk sizes may improve performance for plain thin volumes,\n"
+ "however using them for snapshot volumes is less efficient, as it\n"
+ "consumes more space and takes extra time for copying. When unset,\n"
+ "lvm tries to estimate chunk size starting from 64KiB. Supported\n"
+ "values are in the range 64KiB to 1GiB.\n")
+
+cfg(allocation_physical_extent_size_CFG, "physical_extent_size", allocation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_EXTENT_SIZE, vsn(2, 2, 112), NULL, 0, NULL,
+ "Default physical extent size in KiB to use for new VGs.\n")
+
+cfg(log_verbose_CFG, "verbose", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_VERBOSE, vsn(1, 0, 0), NULL, 0, NULL,
+ "Controls the messages sent to stdout or stderr.\n")
+
+cfg(log_silent_CFG, "silent", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SILENT, vsn(2, 2, 98), NULL, 0, NULL,
+ "Suppress all non-essential messages from stdout.\n"
+ "This has the same effect as -qq. When enabled, the following commands\n"
+ "still produce output: dumpconfig, lvdisplay, lvmdiskscan, lvs, pvck,\n"
+ "pvdisplay, pvs, version, vgcfgrestore -l, vgdisplay, vgs.\n"
+ "Non-essential messages are shifted from log level 4 to log level 5\n"
+ "for syslog and lvm2_log_fn purposes.\n"
+ "Any 'yes' or 'no' questions not overridden by other arguments are\n"
+ "suppressed and default to 'no'.\n")
+
+cfg(log_syslog_CFG, "syslog", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_SYSLOG, vsn(1, 0, 0), NULL, 0, NULL,
+ "Send log messages through syslog.\n")
+
+cfg(log_file_CFG, "file", log_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL,
+ "Write error and debug log messages to a file specified here.\n")
+
+cfg(log_overwrite_CFG, "overwrite", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OVERWRITE, vsn(1, 0, 0), NULL, 0, NULL,
+ "Overwrite the log file each time the program is run.\n")
+
+cfg(log_level_CFG, "level", log_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_LOGLEVEL, vsn(1, 0, 0), NULL, 0, NULL,
+ "The level of log messages that are sent to the log file or syslog.\n"
+ "There are 6 syslog-like log levels currently in use: 2 to 7 inclusive.\n"
+ "7 is the most verbose (LOG_DEBUG).\n")
+
+cfg(log_indent_CFG, "indent", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_INDENT, vsn(1, 0, 0), NULL, 0, NULL,
+ "Indent messages according to their severity.\n")
+
+cfg(log_command_names_CFG, "command_names", log_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_CMD_NAME, vsn(1, 0, 0), NULL, 0, NULL,
+ "Display the command name on each line of output.\n")
+
+cfg(log_prefix_CFG, "prefix", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, DEFAULT_MSG_PREFIX, vsn(1, 0, 0), NULL, 0, NULL,
+ "A prefix to use before the log message text.\n"
+ "(After the command name, if selected).\n"
+ "Two spaces allows you to see/grep the severity of each message.\n"
+ "To make the messages look similar to the original LVM tools use:\n"
+ "indent = 0, command_names = 1, prefix = \" -- \"\n")
+
+cfg(log_activation_CFG, "activation", log_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(1, 0, 0), NULL, 0, NULL,
+ "Log messages during activation.\n"
+ "Don't use this in low memory situations (can deadlock).\n")
+
+cfg(log_activate_file_CFG, "activate_file", log_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL, NULL)
+
+cfg_array(log_debug_classes_CFG, "debug_classes", log_CFG_SECTION, CFG_ALLOW_EMPTY, CFG_TYPE_STRING, "#Smemory#Sdevices#Sactivation#Sallocation#Slvmetad#Smetadata#Scache#Slocking#Slvmpolld", vsn(2, 2, 99), NULL, 0, NULL,
+ "Select log messages by class.\n"
+ "Some debugging messages are assigned to a class and only appear in\n"
+ "debug output if the class is listed here. Classes currently\n"
+ "available: memory, devices, activation, allocation, lvmetad,\n"
+ "metadata, cache, locking, lvmpolld. Use \"all\" to see everything.\n")
+
+cfg(backup_backup_CFG, "backup", backup_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_BACKUP_ENABLED, vsn(1, 0, 0), NULL, 0, NULL,
+ "Maintain a backup of the current metadata configuration.\n"
+ "Think very hard before turning this off!\n")
+
+cfg_runtime(backup_backup_dir_CFG, "backup_dir", backup_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), 0, NULL,
+ "Location of the metadata backup files.\n"
+ "Remember to back up this directory regularly!\n")
+
+cfg(backup_archive_CFG, "archive", backup_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ARCHIVE_ENABLED, vsn(1, 0, 0), NULL, 0, NULL,
+ "Maintain an archive of old metadata configurations.\n"
+ "Think very hard before turning this off.\n")
+
+cfg_runtime(backup_archive_dir_CFG, "archive_dir", backup_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(1, 0, 0), 0, NULL,
+ "Location of the metdata archive files.\n"
+ "Remember to back up this directory regularly!\n")
+
+cfg(backup_retain_min_CFG, "retain_min", backup_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_ARCHIVE_NUMBER, vsn(1, 0, 0), NULL, 0, NULL,
+ "Minimum number of archives to keep.\n")
+
+cfg(backup_retain_days_CFG, "retain_days", backup_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_ARCHIVE_DAYS, vsn(1, 0, 0), NULL, 0, NULL,
+ "Minimum number of days to keep archive files.\n")
+
+cfg(shell_history_size_CFG, "history_size", shell_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_MAX_HISTORY, vsn(1, 0, 0), NULL, 0, NULL,
+ "Number of lines of history to store in ~/.lvm_history.\n")
+
+cfg(global_umask_CFG, "umask", global_CFG_SECTION, CFG_FORMAT_INT_OCTAL, CFG_TYPE_INT, DEFAULT_UMASK, vsn(1, 0, 0), NULL, 0, NULL,
+ "The file creation mask for any files and directories created.\n"
+ "Interpreted as octal if the first digit is zero.\n")
+
+cfg(global_test_CFG, "test", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(1, 0, 0), NULL, 0, NULL,
+ "No on-disk metadata changes will be made in test mode.\n"
+ "Equivalent to having the -t option on every command.\n")
+
+cfg(global_units_CFG, "units", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_STRING, DEFAULT_UNITS, vsn(1, 0, 0), NULL, 0, NULL,
+ "Default value for --units argument.\n")
+
+cfg(global_si_unit_consistency_CFG, "si_unit_consistency", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_SI_UNIT_CONSISTENCY, vsn(2, 2, 54), NULL, 0, NULL,
+ "Distinguish between powers of 1024 and 1000 bytes.\n"
+ "The LVM commands distinguish between powers of 1024 bytes,\n"
+ "e.g. KiB, MiB, GiB, and powers of 1000 bytes, e.g. KB, MB, GB.\n"
+ "If scripts depend on the old behaviour, disable this setting\n"
+ "temporarily until they are updated.\n")
+
+cfg(global_suffix_CFG, "suffix", global_CFG_SECTION, CFG_PROFILABLE, CFG_TYPE_BOOL, DEFAULT_SUFFIX, vsn(1, 0, 0), NULL, 0, NULL,
+ "Display unit suffix for sizes.\n"
+ "This setting has no effect if the units are in human-readable form\n"
+ "(global/units = \"h\") in which case the suffix is always displayed.\n")
+
+cfg(global_activation_CFG, "activation", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ACTIVATION, vsn(1, 0, 0), NULL, 0, NULL,
+ "Enable/disable communication with the kernel device-mapper.\n"
+ "Disable to use the tools to manipulate LVM metadata without\n"
+ "activating any logical volumes. If the device-mapper driver\n"
+ "is not present in the kernel, disabling this should suppress\n"
+ "the error messages.\n")
+
+cfg(global_fallback_to_lvm1_CFG, "fallback_to_lvm1", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_LVM1, vsn(1, 0, 18), "@DEFAULT_FALLBACK_TO_LVM1@", 0, NULL,
+ "Try running LVM1 tools if LVM cannot communicate with DM.\n"
+ "This option only applies to 2.4 kernels and is provided to help\n"
+ "switch between device-mapper kernels and LVM1 kernels. The LVM1\n"
+ "tools need to be installed with .lvm1 suffices, e.g. vgscan.lvm1.\n"
+ "They will stop working once the lvm2 on-disk metadata format is used.\n")
+
+cfg(global_format_CFG, "format", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_FORMAT, vsn(1, 0, 0), NULL, 0, NULL,
+ "The default metadata format that commands should use.\n"
+ "The -M 1|2 option overrides this setting.\n"
+ "#\n"
+ "Accepted values:\n"
+ " lvm1\n"
+ " lvm2\n"
+ "#\n")
+
+cfg_array(global_format_libraries_CFG, "format_libraries", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL,
+ "Shared libraries that process different metadata formats.\n"
+ "If support for LVM1 metadata was compiled as a shared library use\n"
+ "format_libraries = \"liblvm2format1.so\"\n")
+
+cfg_array(global_segment_libraries_CFG, "segment_libraries", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL, 0, NULL, NULL)
+
+cfg(global_proc_CFG, "proc", global_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, DEFAULT_PROC_DIR, vsn(1, 0, 0), NULL, 0, NULL,
+ "Location of proc filesystem.\n")
+
+cfg(global_etc_CFG, "etc", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_ETC_DIR, vsn(2, 2, 117), "@CONFDIR@", 0, NULL,
+ "Location of /etc system configuration directory.\n")
+
+cfg(global_locking_type_CFG, "locking_type", global_CFG_SECTION, 0, CFG_TYPE_INT, 1, vsn(1, 0, 0), NULL, 0, NULL,
+ "Type of locking to use.\n"
+ "#\n"
+ "Accepted values:\n"
+ " 0\n"
+ " Turns off locking. Warning: this risks metadata corruption if\n"
+ " commands run concurrently.\n"
+ " 1\n"
+ " LVM uses local file-based locking, the standard mode.\n"
+ " 2\n"
+ " LVM uses the external shared library locking_library.\n"
+ " 3\n"
+ " LVM uses built-in clustered locking with clvmd.\n"
+ " This is incompatible with lvmetad. If use_lvmetad is enabled,\n"
+ " LVM prints a warning and disables lvmetad use.\n"
+ " 4\n"
+ " LVM uses read-only locking which forbids any operations that\n"
+ " might change metadata.\n"
+ " 5\n"
+ " Offers dummy locking for tools that do not need any locks.\n"
+ " You should not need to set this directly; the tools will select\n"
+ " when to use it instead of the configured locking_type.\n"
+ " Do not use lvmetad or the kernel device-mapper driver with this\n"
+ " locking type. It is used by the --readonly option that offers\n"
+ " read-only access to Volume Group metadata that cannot be locked\n"
+ " safely because it belongs to an inaccessible domain and might be\n"
+ " in use, for example a virtual machine image or a disk that is\n"
+ " shared by a clustered machine.\n"
+ "#\n")
+
+cfg(global_wait_for_locks_CFG, "wait_for_locks", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_WAIT_FOR_LOCKS, vsn(2, 2, 50), NULL, 0, NULL,
+ "When disabled, fail if a lock request would block.\n")
+
+cfg(global_fallback_to_clustered_locking_CFG, "fallback_to_clustered_locking", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING, vsn(2, 2, 42), NULL, 0, NULL,
+ "Attempt to use built-in cluster locking if locking_type 2 fails.\n"
+ "If using external locking (type 2) and initialisation fails, with\n"
+ "this enabled, an attempt will be made to use the built-in clustered\n"
+ "locking. Disable this if using a customised locking_library.\n")
+
+cfg(global_fallback_to_local_locking_CFG, "fallback_to_local_locking", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FALLBACK_TO_LOCAL_LOCKING, vsn(2, 2, 42), NULL, 0, NULL,
+ "Use locking_type 1 (local) if locking_type 2 or 3 fail.\n"
+ "If an attempt to initialise type 2 or type 3 locking failed, perhaps\n"
+ "because cluster components such as clvmd are not running, with this\n"
+ "enabled, an attempt will be made to use local file-based locking\n"
+ "(type 1). If this succeeds, only commands against local VGs will\n"
+ "proceed. VGs marked as clustered will be ignored.\n")
+
+cfg(global_locking_dir_CFG, "locking_dir", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_LOCK_DIR, vsn(1, 0, 0), "@DEFAULT_LOCK_DIR@", 0, NULL,
+ "Directory to use for LVM command file locks.\n"
+ "Local non-LV directory that holds file-based locks while commands are\n"
+ "in progress. A directory like /tmp that may get wiped on reboot is OK.\n")
+
+cfg(global_prioritise_write_locks_CFG, "prioritise_write_locks", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_PRIORITISE_WRITE_LOCKS, vsn(2, 2, 52), NULL, 0, NULL,
+ "Allow quicker VG write access during high volume read access.\n"
+ "When there are competing read-only and read-write access requests for\n"
+ "a volume group's metadata, instead of always granting the read-only\n"
+ "requests immediately, delay them to allow the read-write requests to\n"
+ "be serviced. Without this setting, write access may be stalled by a\n"
+ "high volume of read-only requests. This option only affects\n"
+ "locking_type 1 viz. local file-based locking.\n")
+
+cfg(global_library_dir_CFG, "library_dir", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL,
+ "Search this directory first for shared libraries.\n")
+
+cfg(global_locking_library_CFG, "locking_library", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_LOCKING_LIB, vsn(1, 0, 0), NULL, 0, NULL,
+ "The external locking library to use for locking_type 2.\n")
+
+cfg(global_abort_on_internal_errors_CFG, "abort_on_internal_errors", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ABORT_ON_INTERNAL_ERRORS, vsn(2, 2, 57), NULL, 0, NULL,
+ "Abort a command that encounters an internal error.\n"
+ "Treat any internal errors as fatal errors, aborting the process that\n"
+ "encountered the internal error. Please only enable for debugging.\n")
+
+cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION, vsn(2, 2, 96), NULL, 0, NULL,
+ "Internal verification of VG structures.\n"
+ "Check if CRC matches when a parsed VG is used multiple times. This\n"
+ "is useful to catch unexpected changes to cached VG structures.\n"
+ "Please only enable for debugging.\n")
+
+cfg(global_metadata_read_only_CFG, "metadata_read_only", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_METADATA_READ_ONLY, vsn(2, 2, 75), NULL, 0, NULL,
+ "No operations that change on-disk metadata are permitted.\n"
+ "Additionally, read-only commands that encounter metadata in need of\n"
+ "repair will still be allowed to proceed exactly as if the repair had\n"
+ "been performed (except for the unchanged vg_seqno). Inappropriate\n"
+ "use could mess up your system, so seek advice first!\n")
+
+cfg(global_mirror_segtype_default_CFG, "mirror_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_SEGTYPE, vsn(2, 2, 87), "@DEFAULT_MIRROR_SEGTYPE@", 0, NULL,
+ "The segment type used by the short mirroring option -m.\n"
+ "The --type mirror|raid1 option overrides this setting.\n"
+ "#\n"
+ "Accepted values:\n"
+ " mirror\n"
+ " The original RAID1 implementation from LVM/DM. It is\n"
+ " characterized by a flexible log solution (core, disk, mirrored),\n"
+ " and by the necessity to block I/O while handling a failure.\n"
+ " There is an inherent race in the dmeventd failure handling logic\n"
+ " with snapshots of devices using this type of RAID1 that in the\n"
+ " worst case could cause a deadlock. (Also see\n"
+ " devices/ignore_lvm_mirrors.)\n"
+ " raid1\n"
+ " This is a newer RAID1 implementation using the MD RAID1\n"
+ " personality through device-mapper. It is characterized by a\n"
+ " lack of log options. (A log is always allocated for every\n"
+ " device and they are placed on the same device as the image,\n"
+ " so no separate devices are required.) This mirror\n"
+ " implementation does not require I/O to be blocked while\n"
+ " handling a failure. This mirror implementation is not\n"
+ " cluster-aware and cannot be used in a shared (active/active)\n"
+ " fashion in a cluster.\n"
+ "#\n")
+
+cfg(global_raid10_segtype_default_CFG, "raid10_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_RAID10_SEGTYPE, vsn(2, 2, 99), "@DEFAULT_RAID10_SEGTYPE@", 0, NULL,
+ "The segment type used by the -i -m combination.\n"
+ "The --type raid10|mirror option overrides this setting.\n"
+ "The --stripes/-i and --mirrors/-m options can both be specified\n"
+ "during the creation of a logical volume to use both striping and\n"
+ "mirroring for the LV. There are two different implementations.\n"
+ "#\n"
+ "Accepted values:\n"
+ " raid10\n"
+ " LVM uses MD's RAID10 personality through DM. This is the\n"
+ " preferred option.\n"
+ " mirror\n"
+ " LVM layers the 'mirror' and 'stripe' segment types. The layering\n"
+ " is done by creating a mirror LV on top of striped sub-LVs,\n"
+ " effectively creating a RAID 0+1 array. The layering is suboptimal\n"
+ " in terms of providing redundancy and performance.\n"
+ "#\n")
+
+cfg(global_sparse_segtype_default_CFG, "sparse_segtype_default", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_SPARSE_SEGTYPE, vsn(2, 2, 112), "@DEFAULT_SPARSE_SEGTYPE@", 0, NULL,
+ "The segment type used by the -V -L combination.\n"
+ "The --type snapshot|thin option overrides this setting.\n"
+ "The combination of -V and -L options creates a sparse LV. There are\n"
+ "two different implementations.\n"
+ "#\n"
+ "Accepted values:\n"
+ " snapshot\n"
+ " The original snapshot implementation from LVM/DM. It uses an old\n"
+ " snapshot that mixes data and metadata within a single COW\n"
+ " storage volume and performs poorly when the size of stored data\n"
+ " passes hundreds of MB.\n"
+ " thin\n"
+ " A newer implementation that uses thin provisioning. It has a\n"
+ " bigger minimal chunk size (64KiB) and uses a separate volume for\n"
+ " metadata. It has better performance, especially when more data\n"
+ " is used. It also supports full snapshots.\n"
+ "#\n")
+
+cfg(global_lvdisplay_shows_full_device_path_CFG, "lvdisplay_shows_full_device_path", global_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH, vsn(2, 2, 89), NULL, 0, NULL,
+ "Enable this to reinstate the previous lvdisplay name format.\n"
+ "The default format for displaying LV names in lvdisplay was changed\n"
+ "in version 2.02.89 to show the LV name and path separately.\n"
+ "Previously this was always shown as /dev/vgname/lvname even when that\n"
+ "was never a valid path in the /dev filesystem.\n")
+
+cfg(global_use_lvmetad_CFG, "use_lvmetad", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_LVMETAD, vsn(2, 2, 93), "@DEFAULT_USE_LVMETAD@", 0, NULL,
+ "Use lvmetad to cache metadata and reduce disk scanning.\n"
+ "When enabled (and running), lvmetad provides LVM commands with VG\n"
+ "metadata and PV state. LVM commands then avoid reading this\n"
+ "information from disks which can be slow. When disabled (or not\n"
+ "running), LVM commands fall back to scanning disks to obtain VG\n"
+ "metadata. lvmetad is kept updated via udev rules which must be set\n"
+ "up for LVM to work correctly. (The udev rules should be installed\n"
+ "by default.) Without a proper udev setup, changes in the system's\n"
+ "block device configuration will be unknown to LVM, and ignored\n"
+ "until a manual 'pvscan --cache' is run. If lvmetad was running\n"
+ "while use_lvmetad was disabled, it must be stopped, use_lvmetad\n"
+ "enabled, and then started. When using lvmetad, LV activation is\n"
+ "switched to an automatic, event-based mode. In this mode, LVs are\n"
+ "activated based on incoming udev events that inform lvmetad when\n"
+ "PVs appear on the system. When a VG is complete (all PVs present),\n"
+ "it is auto-activated. The auto_activation_volume_list setting\n"
+ "controls which LVs are auto-activated (all by default.)\n"
+ "When lvmetad is updated (automatically by udev events, or directly\n"
+ "by pvscan --cache), devices/filter is ignored and all devices are\n"
+ "scanned by default. lvmetad always keeps unfiltered information\n"
+ "which is provided to LVM commands. Each LVM command then filters\n"
+ "based on devices/filter. This does not apply to other, non-regexp,\n"
+ "filtering settings: component filters such as multipath and MD\n"
+ "are checked during pvscan --cache. To filter a device and prevent\n"
+ "scanning from the LVM system entirely, including lvmetad, use\n"
+ "devices/global_filter.\n")
+
+cfg(global_use_lvmlockd_CFG, "use_lvmlockd", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2, 2, 124), NULL, 0, NULL,
+ "Use lvmlockd for locking among hosts using LVM on shared storage.\n"
+ "See lvmlockd(8) for more information.\n")
+
+cfg(global_lvmlockd_lock_retries_CFG, "lvmlockd_lock_retries", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_LVMLOCKD_LOCK_RETRIES, vsn(2, 2, 125), NULL, 0, NULL,
+ "Retry lvmlockd lock requests this many times.\n")
+
+cfg(global_sanlock_lv_extend_CFG, "sanlock_lv_extend", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_SANLOCK_LV_EXTEND_MB, vsn(2, 2, 124), NULL, 0, NULL,
+ "Size in MiB to extend the internal LV holding sanlock locks.\n"
+ "The internal LV holds locks for each LV in the VG, and after enough\n"
+ "LVs have been created, the internal LV needs to be extended. lvcreate\n"
+ "will automatically extend the internal LV when needed by the amount\n"
+ "specified here. Setting this to 0 disables the automatic extension\n"
+ "and can cause lvcreate to fail.\n")
+
+cfg(global_thin_check_executable_CFG, "thin_check_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, THIN_CHECK_CMD, vsn(2, 2, 94), "@THIN_CHECK_CMD@", 0, NULL,
+ "The full path to the thin_check command.\n"
+ "LVM uses this command to check that a thin metadata device is in a\n"
+ "usable state. When a thin pool is activated and after it is\n"
+ "deactivated, this command is run. Activation will only proceed if\n"
+ "the command has an exit status of 0. Set to \"\" to skip this check.\n"
+ "(Not recommended.) Also see thin_check_options.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg(global_thin_dump_executable_CFG, "thin_dump_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, THIN_DUMP_CMD, vsn(2, 2, 100), "@THIN_DUMP_CMD@", 0, NULL,
+ "The full path to the thin_dump command.\n"
+ "LVM uses this command to dump thin pool metadata.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg(global_thin_repair_executable_CFG, "thin_repair_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, THIN_REPAIR_CMD, vsn(2, 2, 100), "@THIN_REPAIR_CMD@", 0, NULL,
+ "The full path to the thin_repair command.\n"
+ "LVM uses this command to repair a thin metadata device if it is in\n"
+ "an unusable state. Also see thin_repair_options.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg_array(global_thin_check_options_CFG, "thin_check_options", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_THIN_CHECK_OPTIONS_CONFIG, vsn(2, 2, 96), NULL, 0, NULL,
+ "List of options passed to the thin_check command.\n"
+ "With thin_check version 2.1 or newer you can add the option\n"
+ "--ignore-non-fatal-errors to let it pass through ignorable errors\n"
+ "and fix them later. With thin_check version 3.2 or newer you should\n"
+ "include the option --clear-needs-check-flag.\n")
+
+cfg_array(global_thin_repair_options_CFG, "thin_repair_options", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_THIN_REPAIR_OPTIONS_CONFIG, vsn(2, 2, 100), NULL, 0, NULL,
+ "List of options passed to the thin_repair command.\n")
+
+cfg_array(global_thin_disabled_features_CFG, "thin_disabled_features", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 99), NULL, 0, NULL,
+ "Features to not use in the thin driver.\n"
+ "This can be helpful for testing, or to avoid using a feature that is\n"
+ "causing problems. Features include: block_size, discards,\n"
+ "discards_non_power_2, external_origin, metadata_resize,\n"
+ "external_origin_extend, error_if_no_space.\n"
+ "#\n"
+ "Example\n"
+ "thin_disabled_features = [ \"discards\", \"block_size\" ]\n"
+ "#\n")
+
+cfg_array(global_cache_disabled_features_CFG, "cache_disabled_features", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 128), NULL, 0, NULL,
+ "Features to not use in the cache driver.\n"
+ "This can be helpful for testing, or to avoid using a feature that is\n"
+ "causing problems. Features include: policy_mq, policy_smq.\n"
+ "#\n"
+ "Example\n"
+ "cache_disabled_features = [ \"policy_smq\" ]\n"
+ "#\n")
+
+cfg(global_cache_check_executable_CFG, "cache_check_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, CACHE_CHECK_CMD, vsn(2, 2, 108), "@CACHE_CHECK_CMD@", 0, NULL,
+ "The full path to the cache_check command.\n"
+ "LVM uses this command to check that a cache metadata device is in a\n"
+ "usable state. When a cached LV is activated and after it is\n"
+ "deactivated, this command is run. Activation will only proceed if the\n"
+ "command has an exit status of 0. Set to \"\" to skip this check.\n"
+ "(Not recommended.) Also see cache_check_options.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg(global_cache_dump_executable_CFG, "cache_dump_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, CACHE_DUMP_CMD, vsn(2, 2, 108), "@CACHE_DUMP_CMD@", 0, NULL,
+ "The full path to the cache_dump command.\n"
+ "LVM uses this command to dump cache pool metadata.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg(global_cache_repair_executable_CFG, "cache_repair_executable", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, CACHE_REPAIR_CMD, vsn(2, 2, 108), "@CACHE_REPAIR_CMD@", 0, NULL,
+ "The full path to the cache_repair command.\n"
+ "LVM uses this command to repair a cache metadata device if it is in\n"
+ "an unusable state. Also see cache_repair_options.\n"
+ "(See package device-mapper-persistent-data or thin-provisioning-tools)\n")
+
+cfg_array(global_cache_check_options_CFG, "cache_check_options", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_CACHE_CHECK_OPTIONS_CONFIG, vsn(2, 2, 108), NULL, 0, NULL,
+ "List of options passed to the cache_check command.\n"
+ "With cache_check version 5.0 or newer you should include the option\n"
+ "--clear-needs-check-flag.\n")
+
+cfg_array(global_cache_repair_options_CFG, "cache_repair_options", global_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_CACHE_REPAIR_OPTIONS_CONFIG, vsn(2, 2, 108), NULL, 0, NULL,
+ "List of options passed to the cache_repair command.\n")
+
+cfg(global_system_id_source_CFG, "system_id_source", global_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_SYSTEM_ID_SOURCE, vsn(2, 2, 117), NULL, 0, NULL,
+ "The method LVM uses to set the local system ID.\n"
+ "Volume Groups can also be given a system ID (by vgcreate, vgchange,\n"
+ "or vgimport.) A VG on shared storage devices is accessible only to\n"
+ "the host with a matching system ID. See 'man lvmsystemid' for\n"
+ "information on limitations and correct usage.\n"
+ "#\n"
+ "Accepted values:\n"
+ " none\n"
+ " The host has no system ID.\n"
+ " lvmlocal\n"
+ " Obtain the system ID from the system_id setting in the 'local'\n"
+ " section of an lvm configuration file, e.g. lvmlocal.conf.\n"
+ " uname\n"
+ " Set the system ID from the hostname (uname) of the system.\n"
+ " System IDs beginning localhost are not permitted.\n"
+ " machineid\n"
+ " Use the contents of the machine-id file to set the system ID.\n"
+ " Some systems create this file at installation time.\n"
+ " See 'man machine-id' and global/etc.\n"
+ " file\n"
+ " Use the contents of another file (system_id_file) to set the\n"
+ " system ID.\n"
+ "#\n")
+
+cfg(global_system_id_file_CFG, "system_id_file", global_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 117), NULL, 0, NULL,
+ "The full path to the file containing a system ID.\n"
+ "This is used when system_id_source is set to 'file'.\n"
+ "Comments starting with the character # are ignored.\n")
+
+cfg(activation_checks_CFG, "checks", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_ACTIVATION_CHECKS, vsn(2, 2, 86), NULL, 0, NULL,
+ "Perform internal checks of libdevmapper operations.\n"
+ "Useful for debugging problems with activation. Some of the checks may\n"
+ "be expensive, so it's best to use this only when there seems to be a\n"
+ "problem.\n")
+
+cfg(global_use_lvmpolld_CFG, "use_lvmpolld", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_LVMPOLLD, vsn(2, 2, 120), "@DEFAULT_USE_LVMPOLLD@", 0, NULL,
+ "Use lvmpolld to supervise long running LVM commands.\n"
+ "When enabled, control of long running LVM commands is transferred\n"
+ "from the original LVM command to the lvmpolld daemon. This allows\n"
+ "the operation to continue independent of the original LVM command.\n"
+ "After lvmpolld takes over, the LVM command displays the progress\n"
+ "of the ongoing operation. lvmpolld itself runs LVM commands to\n"
+ "manage the progress of ongoing operations. lvmpolld can be used as\n"
+ "a native systemd service, which allows it to be started on demand,\n"
+ "and to use its own control group. When this option is disabled, LVM\n"
+ "commands will supervise long running operations by forking themselves.\n")
+
+cfg(activation_udev_sync_CFG, "udev_sync", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_UDEV_SYNC, vsn(2, 2, 51), NULL, 0, NULL,
+ "Use udev notifications to synchronize udev and LVM.\n"
+ "The --nodevsync option overrides this setting.\n"
+ "When disabled, LVM commands will not wait for notifications from\n"
+ "udev, but continue irrespective of any possible udev processing in\n"
+ "the background. Only use this if udev is not running or has rules\n"
+ "that ignore the devices LVM creates. If enabled when udev is not\n"
+ "running, and LVM processes are waiting for udev, run the command\n"
+ "'dmsetup udevcomplete_all' to wake them up.\n")
+
+cfg(activation_udev_rules_CFG, "udev_rules", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_UDEV_RULES, vsn(2, 2, 57), NULL, 0, NULL,
+ "Use udev rules to manage LV device nodes and symlinks.\n"
+ "When disabled, LVM will manage the device nodes and symlinks for\n"
+ "active LVs itself. Manual intervention may be required if this\n"
+ "setting is changed while LVs are active.\n")
+
+cfg(activation_verify_udev_operations_CFG, "verify_udev_operations", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_VERIFY_UDEV_OPERATIONS, vsn(2, 2, 86), NULL, 0, NULL,
+ "Use extra checks in LVM to verify udev operations.\n"
+ "This enables additional checks (and if necessary, repairs) on entries\n"
+ "in the device directory after udev has completed processing its\n"
+ "events. Useful for diagnosing problems with LVM/udev interactions.\n")
+
+cfg(activation_retry_deactivation_CFG, "retry_deactivation", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_RETRY_DEACTIVATION, vsn(2, 2, 89), NULL, 0, NULL,
+ "Retry failed LV deactivation.\n"
+ "If LV deactivation fails, LVM will retry for a few seconds before\n"
+ "failing. This may happen because a process run from a quick udev rule\n"
+ "temporarily opened the device.\n")
+
+cfg(activation_missing_stripe_filler_CFG, "missing_stripe_filler", activation_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, DEFAULT_STRIPE_FILLER, vsn(1, 0, 0), NULL, 0, NULL,
+ "Method to fill missing stripes when activating an incomplete LV.\n"
+ "Using 'error' will make inaccessible parts of the device return I/O\n"
+ "errors on access. You can instead use a device path, in which case,\n"
+ "that device will be used in place of missing stripes. Using anything\n"
+ "other than 'error' with mirrored or snapshotted volumes is likely to\n"
+ "result in data corruption.\n")
+
+cfg(activation_use_linear_target_CFG, "use_linear_target", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_LINEAR_TARGET, vsn(2, 2, 89), NULL, 0, NULL,
+ "Use the linear target to optimize single stripe LVs.\n"
+ "When disabled, the striped target is used. The linear target is an\n"
+ "optimised version of the striped target that only handles a single\n"
+ "stripe.\n")
+
+cfg(activation_reserved_stack_CFG, "reserved_stack", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RESERVED_STACK, vsn(1, 0, 0), NULL, 0, NULL,
+ "Stack size in KiB to reserve for use while devices are suspended.\n"
+ "Insufficent reserve risks I/O deadlock during device suspension.\n")
+
+cfg(activation_reserved_memory_CFG, "reserved_memory", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RESERVED_MEMORY, vsn(1, 0, 0), NULL, 0, NULL,
+ "Memory size in KiB to reserve for use while devices are suspended.\n"
+ "Insufficent reserve risks I/O deadlock during device suspension.\n")
+
+cfg(activation_process_priority_CFG, "process_priority", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_PROCESS_PRIORITY, vsn(1, 0, 0), NULL, 0, NULL,
+ "Nice value used while devices are suspended.\n"
+ "Use a high priority so that LVs are suspended\n"
+ "for the shortest possible time.\n")
+
+cfg_array(activation_volume_list_CFG, "volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL, 0, NULL,
+ "Only LVs selected by this list are activated.\n"
+ "If this list is defined, an LV is only activated if it matches an\n"
+ "entry in this list. If this list is undefined, it imposes no limits\n"
+ "on LV activation (all are allowed).\n"
+ "#\n"
+ "Accepted values:\n"
+ " vgname\n"
+ " The VG name is matched exactly and selects all LVs in the VG.\n"
+ " vgname/lvname\n"
+ " The VG name and LV name are matched exactly and selects the LV.\n"
+ " @tag\n"
+ " Selects an LV if the specified tag matches a tag set on the LV\n"
+ " or VG.\n"
+ " @*\n"
+ " Selects an LV if a tag defined on the host is also set on the LV\n"
+ " or VG. See tags/hosttags. If any host tags exist but volume_list\n"
+ " is not defined, a default single-entry list containing '@*' is\n"
+ " assumed.\n"
+ "#\n"
+ "Example\n"
+ "volume_list = [ \"vg1\", \"vg2/lvol1\", \"@tag1\", \"@*\" ]\n"
+ "#\n")
+
+cfg_array(activation_auto_activation_volume_list_CFG, "auto_activation_volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 97), NULL, 0, NULL,
+ "Only LVs selected by this list are auto-activated.\n"
+ "This list works like volume_list, but it is used only by\n"
+ "auto-activation commands. It does not apply to direct activation\n"
+ "commands. If this list is defined, an LV is only auto-activated\n"
+ "if it matches an entry in this list. If this list is undefined, it\n"
+ "imposes no limits on LV auto-activation (all are allowed.) If this\n"
+ "list is defined and empty, i.e. \"[]\", then no LVs are selected for\n"
+ "auto-activation. An LV that is selected by this list for\n"
+ "auto-activation, must also be selected by volume_list (if defined)\n"
+ "before it is activated. Auto-activation is an activation command that\n"
+ "includes the 'a' argument: --activate ay or -a ay. The 'a' (auto)\n"
+ "argument for auto-activation is meant to be used by activation\n"
+ "commands that are run automatically by the system, as opposed to LVM\n"
+ "commands run directly by a user. A user may also use the 'a' flag\n"
+ "directly to perform auto-activation. Also see pvscan(8) for more\n"
+ "information about auto-activation.\n"
+ "#\n"
+ "Accepted values:\n"
+ " vgname\n"
+ " The VG name is matched exactly and selects all LVs in the VG.\n"
+ " vgname/lvname\n"
+ " The VG name and LV name are matched exactly and selects the LV.\n"
+ " @tag\n"
+ " Selects an LV if the specified tag matches a tag set on the LV\n"
+ " or VG.\n"
+ " @*\n"
+ " Selects an LV if a tag defined on the host is also set on the LV\n"
+ " or VG. See tags/hosttags. If any host tags exist but volume_list\n"
+ " is not defined, a default single-entry list containing '@*' is\n"
+ " assumed.\n"
+ "#\n"
+ "Example\n"
+ "volume_list = [ \"vg1\", \"vg2/lvol1\", \"@tag1\", \"@*\" ]\n"
+ "#\n")
+
+cfg_array(activation_read_only_volume_list_CFG, "read_only_volume_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 89), NULL, 0, NULL,
+ "LVs in this list are activated in read-only mode.\n"
+ "If this list is defined, each LV that is to be activated is checked\n"
+ "against this list, and if it matches, it is activated in read-only\n"
+ "mode. This overrides the permission setting stored in the metadata,\n"
+ "e.g. from --permission rw.\n"
+ "#\n"
+ "Accepted values:\n"
+ " vgname\n"
+ " The VG name is matched exactly and selects all LVs in the VG.\n"
+ " vgname/lvname\n"
+ " The VG name and LV name are matched exactly and selects the LV.\n"
+ " @tag\n"
+ " Selects an LV if the specified tag matches a tag set on the LV\n"
+ " or VG.\n"
+ " @*\n"
+ " Selects an LV if a tag defined on the host is also set on the LV\n"
+ " or VG. See tags/hosttags. If any host tags exist but volume_list\n"
+ " is not defined, a default single-entry list containing '@*' is\n"
+ " assumed.\n"
+ "#\n"
+ "Example\n"
+ "volume_list = [ \"vg1\", \"vg2/lvol1\", \"@tag1\", \"@*\" ]\n"
+ "#\n")
+
+cfg(activation_mirror_region_size_CFG, "mirror_region_size", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RAID_REGION_SIZE, vsn(1, 0, 0), NULL, vsn(2, 2, 99),
+ "This has been replaced by the activation/raid_region_size setting.\n",
+ "Size in KiB of each copy operation when mirroring.\n")
+
+cfg(activation_raid_region_size_CFG, "raid_region_size", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_RAID_REGION_SIZE, vsn(2, 2, 99), NULL, 0, NULL,
+ "Size in KiB of each raid or mirror synchronization region.\n"
+ "For raid or mirror segment types, this is the amount of data that is\n"
+ "copied at once when initializing, or moved at once by pvmove.\n")
+
+cfg(activation_error_when_full_CFG, "error_when_full", activation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_ERROR_WHEN_FULL, vsn(2, 2, 115), NULL, 0, NULL,
+ "Return errors if a thin pool runs out of space.\n"
+ "The --errorwhenfull option overrides this setting.\n"
+ "When enabled, writes to thin LVs immediately return an error if the\n"
+ "thin pool is out of data space. When disabled, writes to thin LVs\n"
+ "are queued if the thin pool is out of space, and processed when the\n"
+ "thin pool data space is extended. New thin pools are assigned the\n"
+ "behavior defined here.\n")
+
+cfg(activation_readahead_CFG, "readahead", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_READ_AHEAD, vsn(1, 0, 23), NULL, 0, NULL,
+ "Setting to use when there is no readahead setting in metadata.\n"
+ "#\n"
+ "Accepted values:\n"
+ " none\n"
+ " Disable readahead.\n"
+ " auto\n"
+ " Use default value chosen by kernel.\n"
+ "#\n")
+
+cfg(activation_raid_fault_policy_CFG, "raid_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_RAID_FAULT_POLICY, vsn(2, 2, 89), NULL, 0, NULL,
+ "Defines how a device failure in a RAID LV is handled.\n"
+ "This includes LVs that have the following segment types:\n"
+ "raid1, raid4, raid5*, and raid6*.\n"
+ "If a device in the LV fails, the policy determines the steps\n"
+ "performed by dmeventd automatically, and the steps perfomed by the\n"
+ "manual command lvconvert --repair --use-policies.\n"
+ "Automatic handling requires dmeventd to be monitoring the LV.\n"
+ "#\n"
+ "Accepted values:\n"
+ " warn\n"
+ " Use the system log to warn the user that a device in the RAID LV\n"
+ " has failed. It is left to the user to run lvconvert --repair\n"
+ " manually to remove or replace the failed device. As long as the\n"
+ " number of failed devices does not exceed the redundancy of the LV\n"
+ " (1 device for raid4/5, 2 for raid6), the LV will remain usable.\n"
+ " allocate\n"
+ " Attempt to use any extra physical volumes in the VG as spares and\n"
+ " replace faulty devices.\n"
+ "#\n")
+
+cfg_runtime(activation_mirror_image_fault_policy_CFG, "mirror_image_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, vsn(2, 2, 57), 0, NULL,
+ "Defines how a device failure in a 'mirror' LV is handled.\n"
+ "An LV with the 'mirror' segment type is composed of mirror images\n"
+ "(copies) and a mirror log. A disk log ensures that a mirror LV does\n"
+ "not need to be re-synced (all copies made the same) every time a\n"
+ "machine reboots or crashes. If a device in the LV fails, this policy\n"
+ "determines the steps perfomed by dmeventd automatically, and the steps\n"
+ "performed by the manual command lvconvert --repair --use-policies.\n"
+ "Automatic handling requires dmeventd to be monitoring the LV.\n"
+ "#\n"
+ "Accepted values:\n"
+ " remove\n"
+ " Simply remove the faulty device and run without it. If the log\n"
+ " device fails, the mirror would convert to using an in-memory log.\n"
+ " This means the mirror will not remember its sync status across\n"
+ " crashes/reboots and the entire mirror will be re-synced. If a\n"
+ " mirror image fails, the mirror will convert to a non-mirrored\n"
+ " device if there is only one remaining good copy.\n"
+ " allocate\n"
+ " Remove the faulty device and try to allocate space on a new\n"
+ " device to be a replacement for the failed device. Using this\n"
+ " policy for the log is fast and maintains the ability to remember\n"
+ " sync state through crashes/reboots. Using this policy for a\n"
+ " mirror device is slow, as it requires the mirror to resynchronize\n"
+ " the devices, but it will preserve the mirror characteristic of\n"
+ " the device. This policy acts like 'remove' if no suitable device\n"
+ " and space can be allocated for the replacement.\n"
+ " allocate_anywhere\n"
+ " Not yet implemented. Useful to place the log device temporarily\n"
+ " on the same physical volume as one of the mirror images. This\n"
+ " policy is not recommended for mirror devices since it would break\n"
+ " the redundant nature of the mirror. This policy acts like\n"
+ " 'remove' if no suitable device and space can be allocated for the\n"
+ " replacement.\n"
+ "#\n")
+
+cfg(activation_mirror_log_fault_policy_CFG, "mirror_log_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_LOG_FAULT_POLICY, vsn(1, 2, 18), NULL, 0, NULL,
+ "Defines how a device failure in a 'mirror' log LV is handled.\n"
+ "The mirror_image_fault_policy description for mirrored LVs also\n"
+ "applies to mirrored log LVs.\n")
+
+cfg(activation_mirror_device_fault_policy_CFG, "mirror_device_fault_policy", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_MIRROR_DEVICE_FAULT_POLICY, vsn(1, 2, 10), NULL, vsn(2, 2, 57),
+ "This has been replaced by the activation/mirror_image_fault_policy setting.\n",
+ "Define how a device failure affecting a mirror is handled.\n")
+
+cfg(activation_snapshot_autoextend_threshold_CFG, "snapshot_autoextend_threshold", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_SNAPSHOT_AUTOEXTEND_THRESHOLD, vsn(2, 2, 75), NULL, 0, NULL,
+ "Auto-extend a snapshot when its usage exceeds this percent.\n"
+ "Setting this to 100 disables automatic extension.\n"
+ "The minimum value is 50 (a smaller value is treated as 50.)\n"
+ "Also see snapshot_autoextend_percent.\n"
+ "Automatic extension requires dmeventd to be monitoring the LV.\n"
+ "#\n"
+ "Example\n"
+ "Using 70% autoextend threshold and 20% autoextend size, when a 1G\n"
+ "snapshot exceeds 700M, it is extended to 1.2G, and when it exceeds\n"
+ "840M, it is extended to 1.44G:\n"
+ "snapshot_autoextend_threshold = 70\n"
+ "#\n")
+
+cfg(activation_snapshot_autoextend_percent_CFG, "snapshot_autoextend_percent", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_SNAPSHOT_AUTOEXTEND_PERCENT, vsn(2, 2, 75), NULL, 0, NULL,
+ "Auto-extending a snapshot adds this percent extra space.\n"
+ "The amount of additional space added to a snapshot is this\n"
+ "percent of its current size.\n"
+ "#\n"
+ "Example\n"
+ "Using 70% autoextend threshold and 20% autoextend size, when a 1G\n"
+ "snapshot exceeds 700M, it is extended to 1.2G, and when it exceeds\n"
+ "840M, it is extended to 1.44G:\n"
+ "snapshot_autoextend_percent = 20\n"
+ "#\n")
+
+cfg(activation_thin_pool_autoextend_threshold_CFG, "thin_pool_autoextend_threshold", activation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_INT, DEFAULT_THIN_POOL_AUTOEXTEND_THRESHOLD, vsn(2, 2, 89), NULL, 0, NULL,
+ "Auto-extend a thin pool when its usage exceeds this percent.\n"
+ "Setting this to 100 disables automatic extension.\n"
+ "The minimum value is 50 (a smaller value is treated as 50.)\n"
+ "Also see thin_pool_autoextend_percent.\n"
+ "Automatic extension requires dmeventd to be monitoring the LV.\n"
+ "#\n"
+ "Example\n"
+ "Using 70% autoextend threshold and 20% autoextend size, when a 1G\n"
+ "thin pool exceeds 700M, it is extended to 1.2G, and when it exceeds\n"
+ "840M, it is extended to 1.44G:\n"
+ "thin_pool_autoextend_threshold = 70\n"
+ "#\n")
+
+cfg(activation_thin_pool_autoextend_percent_CFG, "thin_pool_autoextend_percent", activation_CFG_SECTION, CFG_PROFILABLE | CFG_PROFILABLE_METADATA, CFG_TYPE_INT, DEFAULT_THIN_POOL_AUTOEXTEND_PERCENT, vsn(2, 2, 89), NULL, 0, NULL,
+ "Auto-extending a thin pool adds this percent extra space.\n"
+ "The amount of additional space added to a thin pool is this\n"
+ "percent of its current size.\n"
+ "#\n"
+ "Example\n"
+ "Using 70% autoextend threshold and 20% autoextend size, when a 1G\n"
+ "thin pool exceeds 700M, it is extended to 1.2G, and when it exceeds\n"
+ "840M, it is extended to 1.44G:\n"
+ "thin_pool_autoextend_percent = 20\n"
+ "#\n")
+
+cfg_array(activation_mlock_filter_CFG, "mlock_filter", activation_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_ADVANCED, CFG_TYPE_STRING, NULL, vsn(2, 2, 62), NULL, 0, NULL,
+ "Do not mlock these memory areas.\n"
+ "While activating devices, I/O to devices being (re)configured is\n"
+ "suspended. As a precaution against deadlocks, LVM pins memory it is\n"
+ "using so it is not paged out, and will not require I/O to reread.\n"
+ "Groups of pages that are known not to be accessed during activation\n"
+ "do not need to be pinned into memory. Each string listed in this\n"
+ "setting is compared against each line in /proc/self/maps, and the\n"
+ "pages corresponding to lines that match are not pinned. On some\n"
+ "systems, locale-archive was found to make up over 80% of the memory\n"
+ "used by the process.\n"
+ "#\n"
+ "Example\n"
+ "mlock_filter = [ \"locale/locale-archive\", \"gconv/gconv-modules.cache\" ]\n"
+ "#\n")
+
+cfg(activation_use_mlockall_CFG, "use_mlockall", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_USE_MLOCKALL, vsn(2, 2, 62), NULL, 0, NULL,
+ "Use the old behavior of mlockall to pin all memory.\n"
+ "Prior to version 2.02.62, LVM used mlockall() to pin the whole\n"
+ "process's memory while activating devices.\n")
+
+cfg(activation_monitoring_CFG, "monitoring", activation_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DMEVENTD_MONITOR, vsn(2, 2, 63), NULL, 0, NULL,
+ "Monitor LVs that are activated.\n"
+ "The --ignoremonitoring option overrides this setting.\n"
+ "When enabled, LVM will ask dmeventd to monitor activated LVs.\n")
+
+cfg(activation_polling_interval_CFG, "polling_interval", activation_CFG_SECTION, 0, CFG_TYPE_INT, DEFAULT_INTERVAL, vsn(2, 2, 63), NULL, 0, NULL,
+ "Check pvmove or lvconvert progress at this interval (seconds).\n"
+ "When pvmove or lvconvert must wait for the kernel to finish\n"
+ "synchronising or merging data, they check and report progress at\n"
+ "intervals of this number of seconds. If this is set to 0 and there\n"
+ "is only one thing to wait for, there are no progress reports, but\n"
+ "the process is awoken immediately once the operation is complete.\n")
+
+cfg(activation_auto_set_activation_skip_CFG, "auto_set_activation_skip", activation_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_AUTO_SET_ACTIVATION_SKIP, vsn(2,2,99), NULL, 0, NULL,
+ "Set the activation skip flag on new thin snapshot LVs.\n"
+ "The --setactivationskip option overrides this setting.\n"
+ "An LV can have a persistent 'activation skip' flag. The flag causes\n"
+ "the LV to be skipped during normal activation. The lvchange/vgchange\n"
+ "-K option is required to activate LVs that have the activation skip\n"
+ "flag set. When this setting is enabled, the activation skip flag is\n"
+ "set on new thin snapshot LVs.\n")
+
+cfg(activation_mode_CFG, "activation_mode", activation_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_ACTIVATION_MODE, vsn(2,2,108), NULL, 0, NULL,
+ "How LVs with missing devices are activated.\n"
+ "The --activationmode option overrides this setting.\n"
+ "#\n"
+ "Accepted values:\n"
+ " complete\n"
+ " Only allow activation of an LV if all of the Physical Volumes it\n"
+ " uses are present. Other PVs in the Volume Group may be missing.\n"
+ " degraded\n"
+ " Like complete, but additionally RAID LVs of segment type raid1,\n"
+ " raid4, raid5, radid6 and raid10 will be activated if there is no\n"
+ " data loss, i.e. they have sufficient redundancy to present the\n"
+ " entire addressable range of the Logical Volume.\n"
+ " partial\n"
+ " Allows the activation of any LV even if a missing or failed PV\n"
+ " could cause data loss with a portion of the LV inaccessible.\n"
+ " This setting should not normally be used, but may sometimes\n"
+ " assist with data recovery.\n"
+ "#\n")
+
+cfg_array(activation_lock_start_list_CFG, "lock_start_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY|CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 124), NULL, 0, NULL,
+ "Locking is started only for VGs selected by this list.\n"
+ "The rules are the same as those for volume_list.\n")
+
+cfg_array(activation_auto_lock_start_list_CFG, "auto_lock_start_list", activation_CFG_SECTION, CFG_ALLOW_EMPTY|CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 124), NULL, 0, NULL,
+ "Locking is auto-started only for VGs selected by this list.\n"
+ "The rules are the same as those for auto_activation_volume_list.\n")
+
+cfg(metadata_pvmetadatacopies_CFG, "pvmetadatacopies", metadata_CFG_SECTION, CFG_ADVANCED | CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_PVMETADATACOPIES, vsn(1, 0, 0), NULL, 0, NULL,
+ "Number of copies of metadata to store on each PV.\n"
+ "The --pvmetadatacopies option overrides this setting.\n"
+ "#\n"
+ "Accepted values:\n"
+ " 2\n"
+ " Two copies of the VG metadata are stored on the PV, one at the\n"
+ " front of the PV, and one at the end.\n"
+ " 1\n"
+ " One copy of VG metadata is stored at the front of the PV.\n"
+ " 0\n"
+ " No copies of VG metadata are stored on the PV. This may be\n"
+ " useful for VGs containing large numbers of PVs.\n"
+ "#\n")
+
+cfg(metadata_vgmetadatacopies_CFG, "vgmetadatacopies", metadata_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_VGMETADATACOPIES, vsn(2, 2, 69), NULL, 0, NULL,
+ "Number of copies of metadata to maintain for each VG.\n"
+ "The --vgmetadatacopies option overrides this setting.\n"
+ "If set to a non-zero value, LVM automatically chooses which of the\n"
+ "available metadata areas to use to achieve the requested number of\n"
+ "copies of the VG metadata. If you set a value larger than the the\n"
+ "total number of metadata areas available, then metadata is stored in\n"
+ "them all. The value 0 (unmanaged) disables this automatic management\n"
+ "and allows you to control which metadata areas are used at the\n"
+ "individual PV level using pvchange --metadataignore y|n.\n")
+
+cfg(metadata_pvmetadatasize_CFG, "pvmetadatasize", metadata_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_PVMETADATASIZE, vsn(1, 0, 0), NULL, 0, NULL,
+ "Approximate number of sectors to use for each metadata copy.\n"
+ "VGs with large numbers of PVs or LVs, or VGs containing complex LV\n"
+ "structures, may need additional space for VG metadata. The metadata\n"
+ "areas are treated as circular buffers, so unused space becomes filled\n"
+ "with an archive of the most recent previous versions of the metadata.\n")
+
+cfg(metadata_pvmetadataignore_CFG, "pvmetadataignore", metadata_CFG_SECTION, CFG_ADVANCED | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_PVMETADATAIGNORE, vsn(2, 2, 69), NULL, 0, NULL,
+ "Ignore metadata areas on a new PV.\n"
+ "The --metadataignore option overrides this setting.\n"
+ "If metadata areas on a PV are ignored, LVM will not store metadata\n"
+ "in them.\n")
+
+cfg(metadata_stripesize_CFG, "stripesize", metadata_CFG_SECTION, CFG_ADVANCED | CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_STRIPESIZE, vsn(1, 0, 0), NULL, 0, NULL, NULL)
+
+cfg_array(metadata_dirs_CFG, "dirs", metadata_CFG_SECTION, CFG_ADVANCED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL,
+ "Directories holding live copies of text format metadata.\n"
+ "These directories must not be on logical volumes!\n"
+ "It's possible to use LVM with a couple of directories here,\n"
+ "preferably on different (non-LV) filesystems, and with no other\n"
+ "on-disk metadata (pvmetadatacopies = 0). Or this can be in addition\n"
+ "to on-disk metadata areas. The feature was originally added to\n"
+ "simplify testing and is not supported under low memory situations -\n"
+ "the machine could lock up. Never edit any files in these directories\n"
+ "by hand unless you are absolutely sure you know what you are doing!\n"
+ "Use the supplied toolset to make changes (e.g. vgcfgrestore).\n"
+ "#\n"
+ "Example\n"
+ "dirs = [ \"/etc/lvm/metadata\", \"/mnt/disk2/lvm/metadata2\" ]\n"
+ "#\n")
+
+cfg_section(metadata_disk_areas_CFG_SUBSECTION, "disk_areas", metadata_CFG_SECTION, CFG_UNSUPPORTED | CFG_DEFAULT_COMMENTED, vsn(1, 0, 0), 0, NULL, NULL)
+cfg_section(disk_area_CFG_SUBSECTION, "disk_area", metadata_disk_areas_CFG_SUBSECTION, CFG_NAME_VARIABLE | CFG_UNSUPPORTED | CFG_DEFAULT_COMMENTED, vsn(1, 0, 0), 0, NULL, NULL)
+cfg(disk_area_start_sector_CFG, "start_sector", disk_area_CFG_SUBSECTION, CFG_UNSUPPORTED | CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, 0, vsn(1, 0, 0), NULL, 0, NULL, NULL)
+cfg(disk_area_size_CFG, "size", disk_area_CFG_SUBSECTION, CFG_UNSUPPORTED | CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, 0, vsn(1, 0, 0), NULL, 0, NULL, NULL)
+cfg(disk_area_id_CFG, "id", disk_area_CFG_SUBSECTION, CFG_UNSUPPORTED | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 0), NULL, 0, NULL, NULL)
+
+cfg(report_compact_output_CFG, "compact_output", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_COMPACT_OUTPUT, vsn(2, 2, 115), NULL, 0, NULL,
+ "Do not print empty report fields.\n"
+ "Fields that don't have a value set for any of the rows reported are\n"
+ "skipped and not printed. Compact output is applicable only if\n"
+ "report/buffered is enabled.\n")
+
+cfg(report_aligned_CFG, "aligned", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_ALIGNED, vsn(1, 0, 0), NULL, 0, NULL,
+ "Align columns in report output.\n")
+
+cfg(report_buffered_CFG, "buffered", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_BUFFERED, vsn(1, 0, 0), NULL, 0, NULL,
+ "Buffer report output.\n"
+ "When buffered reporting is used, the report's content is appended\n"
+ "incrementally to include each object being reported until the report\n"
+ "is flushed to output which normally happens at the end of command\n"
+ "execution. Otherwise, if buffering is not used, each object is\n"
+ "reported as soon as its processing is finished.\n")
+
+cfg(report_headings_CFG, "headings", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_HEADINGS, vsn(1, 0, 0), NULL, 0, NULL,
+ "Show headings for columns on report.\n")
+
+cfg(report_separator_CFG, "separator", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_REP_SEPARATOR, vsn(1, 0, 0), NULL, 0, NULL,
+ "A separator to use on report after each field.\n")
+
+cfg(report_list_item_separator_CFG, "list_item_separator", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_REP_LIST_ITEM_SEPARATOR, vsn(2, 2, 108), NULL, 0, NULL,
+ "A separator to use for list items when reported.\n")
+
+cfg(report_prefixes_CFG, "prefixes", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_PREFIXES, vsn(2, 2, 36), NULL, 0, NULL,
+ "Use a field name prefix for each field reported.\n")
+
+cfg(report_quoted_CFG, "quoted", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_QUOTED, vsn(2, 2, 39), NULL, 0, NULL,
+ "Quote field values when using field name prefixes.\n")
+
+cfg(report_colums_as_rows_CFG, "colums_as_rows", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_REP_COLUMNS_AS_ROWS, vsn(1, 0, 0), NULL, 0, NULL,
+ "Output each column as a row.\n"
+ "If set, this also implies report/prefixes=1.\n")
+
+cfg(report_binary_values_as_numeric_CFG, "binary_values_as_numeric", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, 0, vsn(2, 2, 108), NULL, 0, NULL,
+ "Use binary values 0 or 1 instead of descriptive literal values.\n"
+ "For columns that have exactly two valid values to report\n"
+ "(not counting the 'unknown' value which denotes that the\n"
+ "value could not be determined).\n")
+
+cfg(report_time_format_CFG, "time_format", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_TIME_FORMAT, vsn(2, 2, 123), NULL, 0, NULL,
+ "Set time format for fields reporting time values.\n"
+ "Format specification is a string which may contain special character\n"
+ "sequences and ordinary character sequences. Ordinary character\n"
+ "sequences are copied verbatim. Each special character sequence is\n"
+ "introduced by the '%' character and such sequence is then\n"
+ "substituted with a value as described below.\n"
+ "#\n"
+ "Accepted values:\n"
+ " %a\n"
+ " The abbreviated name of the day of the week according to the\n"
+ " current locale.\n"
+ " %A\n"
+ " The full name of the day of the week according to the current\n"
+ " locale.\n"
+ " %b\n"
+ " The abbreviated month name according to the current locale.\n"
+ " %B\n"
+ " The full month name according to the current locale.\n"
+ " %c\n"
+ " The preferred date and time representation for the current\n"
+ " locale (alt E)\n"
+ " %C\n"
+ " The century number (year/100) as a 2-digit integer. (alt E)\n"
+ " %d\n"
+ " The day of the month as a decimal number (range 01 to 31).\n"
+ " (alt O)\n"
+ " %D\n"
+ " Equivalent to %m/%d/%y. (For Americans only. Americans should\n"
+ " note that in other countries%d/%m/%y is rather common. This\n"
+ " means that in international context this format is ambiguous and\n"
+ " should not be used.\n"
+ " %e\n"
+ " Like %d, the day of the month as a decimal number, but a leading\n"
+ " zero is replaced by a space. (alt O)\n"
+ " %E\n"
+ " Modifier: use alternative local-dependent representation if\n"
+ " available.\n"
+ " %F\n"
+ " Equivalent to %Y-%m-%d (the ISO 8601 date format).\n"
+ " %G\n"
+ " The ISO 8601 week-based year with century as adecimal number.\n"
+ " The 4-digit year corresponding to the ISO week number (see %V).\n"
+ " This has the same format and value as %Y, except that if the\n"
+ " ISO week number belongs to the previous or next year, that year\n"
+ " is used instead.\n"
+ " %g\n"
+ " Like %G, but without century, that is, with a 2-digit year\n"
+ " (00-99).\n"
+ " %h\n"
+ " Equivalent to %b.\n"
+ " %H\n"
+ " The hour as a decimal number using a 24-hour clock\n"
+ " (range 00 to 23). (alt O)\n"
+ " %I\n"
+ " The hour as a decimal number using a 12-hour clock\n"
+ " (range 01 to 12). (alt O)\n"
+ " %j\n"
+ " The day of the year as a decimal number (range 001 to 366).\n"
+ " %k\n"
+ " The hour (24-hour clock) as a decimal number (range 0 to 23);\n"
+ " single digits are preceded by a blank. (See also %H.)\n"
+ " %l\n"
+ " The hour (12-hour clock) as a decimal number (range 1 to 12);\n"
+ " single digits are preceded by a blank. (See also %I.)\n"
+ " %m\n"
+ " The month as a decimal number (range 01 to 12). (alt O)\n"
+ " %M\n"
+ " The minute as a decimal number (range 00 to 59). (alt O)\n"
+ " %O\n"
+ " Modifier: use alternative numeric symbols.\n"
+ " %p\n"
+ " Either \"AM\" or \"PM\" according to the given time value,\n"
+ " or the corresponding strings for the current locale. Noon is\n"
+ " treated as \"PM\" and midnight as \"AM\".\n"
+ " %P\n"
+ " Like %p but in lowercase: \"am\" or \"pm\" or a corresponding\n"
+ " string for the current locale.\n"
+ " %r\n"
+ " The time in a.m. or p.m. notation. In the POSIX locale this is\n"
+ " equivalent to %I:%M:%S %p.\n"
+ " %R\n"
+ " The time in 24-hour notation (%H:%M). For a version including\n"
+ " the seconds, see %T below.\n"
+ " %s\n"
+ " The number of seconds since the Epoch,\n"
+ " 1970-01-01 00:00:00 +0000 (UTC)\n"
+ " %S\n"
+ " The second as a decimal number (range 00 to 60). (The range is\n"
+ " up to 60 to allow for occasional leap seconds.) (alt O)\n"
+ " %t\n"
+ " A tab character.\n"
+ " %T\n"
+ " The time in 24-hour notation (%H:%M:%S).\n"
+ " %u\n"
+ " The day of the week as a decimal, range 1 to 7, Monday being 1.\n"
+ " See also %w. (alt O)\n"
+ " %U\n"
+ " The week number of the current year as a decimal number,\n"
+ " range 00 to 53, starting with the first Sunday as the first\n"
+ " day of week 01. See also %V and %W. (alt O)\n"
+ " %V\n"
+ " The ISO 8601 week number of the current year as a decimal number,\n"
+ " range 01 to 53, where week 1 is the first week that has at least\n"
+ " 4 days in the new year. See also %U and %W. (alt O)\n"
+ " %w\n"
+ " The day of the week as a decimal, range 0 to 6, Sunday being 0.\n"
+ " See also %u. (alt O)\n"
+ " %W\n"
+ " The week number of the current year as a decimal number,\n"
+ " range 00 to 53, starting with the first Monday as the first day\n"
+ " of week 01. (alt O)\n"
+ " %x\n"
+ " The preferred date representation for the current locale without\n"
+ " the time. (alt E)\n"
+ " %X\n"
+ " The preferred time representation for the current locale without\n"
+ " the date. (alt E)\n"
+ " %y\n"
+ " The year as a decimal number without a century (range 00 to 99).\n"
+ " (alt E, alt O)\n"
+ " %Y\n"
+ " The year as a decimal number including the century. (alt E)\n"
+ " %z\n"
+ " The +hhmm or -hhmm numeric timezone (that is, the hour and minute\n"
+ " offset from UTC).\n"
+ " %Z\n"
+ " The timezone name or abbreviation.\n"
+ " %%\n"
+ " A literal '%' character.\n"
+ "#\n")
+
+cfg(report_devtypes_sort_CFG, "devtypes_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_DEVTYPES_SORT, vsn(2, 2, 101), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'lvm devtypes' command.\n"
+ "See 'lvm devtypes -o help' for the list of possible fields.\n")
+
+cfg(report_devtypes_cols_CFG, "devtypes_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_DEVTYPES_COLS, vsn(2, 2, 101), NULL, 0, NULL,
+ "List of columns to report for 'lvm devtypes' command.\n"
+ "See 'lvm devtypes -o help' for the list of possible fields.\n")
+
+cfg(report_devtypes_cols_verbose_CFG, "devtypes_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_DEVTYPES_COLS_VERB, vsn(2, 2, 101), NULL, 0, NULL,
+ "List of columns to report for 'lvm devtypes' command in verbose mode.\n"
+ "See 'lvm devtypes -o help' for the list of possible fields.\n")
+
+cfg(report_lvs_sort_CFG, "lvs_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_LVS_SORT, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'lvs' command.\n"
+ "See 'lvs -o help' for the list of possible fields.\n")
+
+cfg(report_lvs_cols_CFG, "lvs_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_LVS_COLS, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'lvs' command.\n"
+ "See 'lvs -o help' for the list of possible fields.\n")
+
+cfg(report_lvs_cols_verbose_CFG, "lvs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_LVS_COLS_VERB, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'lvs' command in verbose mode.\n"
+ "See 'lvs -o help' for the list of possible fields.\n")
+
+cfg(report_vgs_sort_CFG, "vgs_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_VGS_SORT, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'vgs' command.\n"
+ "See 'vgs -o help' for the list of possible fields.\n")
+
+cfg(report_vgs_cols_CFG, "vgs_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_VGS_COLS, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'vgs' command.\n"
+ "See 'vgs -o help' for the list of possible fields.\n")
+
+cfg(report_vgs_cols_verbose_CFG, "vgs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_VGS_COLS_VERB, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'vgs' command in verbose mode.\n"
+ "See 'vgs -o help' for the list of possible fields.\n")
+
+cfg(report_pvs_sort_CFG, "pvs_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVS_SORT, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'pvs' command.\n"
+ "See 'pvs -o help' for the list of possible fields.\n")
+
+cfg(report_pvs_cols_CFG, "pvs_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVS_COLS, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'pvs' command.\n"
+ "See 'pvs -o help' for the list of possible fields.\n")
+
+cfg(report_pvs_cols_verbose_CFG, "pvs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVS_COLS_VERB, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'pvs' command in verbose mode.\n"
+ "See 'pvs -o help' for the list of possible fields.\n")
+
+cfg(report_segs_sort_CFG, "segs_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_SEGS_SORT, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'lvs --segments' command.\n"
+ "See 'lvs --segments -o help' for the list of possible fields.\n")
+
+cfg(report_segs_cols_CFG, "segs_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_SEGS_COLS, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'lvs --segments' command.\n"
+ "See 'lvs --segments -o help' for the list of possible fields.\n")
+
+cfg(report_segs_cols_verbose_CFG, "segs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_SEGS_COLS_VERB, vsn(1, 0, 0), NULL, 0, NULL,
+ "List of columns to report for 'lvs --segments' command in verbose mode.\n"
+ "See 'lvs --segments -o help' for the list of possible fields.\n")
+
+cfg(report_pvsegs_sort_CFG, "pvsegs_sort", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVSEGS_SORT, vsn(1, 1, 3), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'pvs --segments' command.\n"
+ "See 'pvs --segments -o help' for the list of possible fields.\n")
+
+cfg(report_pvsegs_cols_CFG, "pvsegs_cols", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVSEGS_COLS, vsn(1, 1, 3), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'pvs --segments' command.\n"
+ "See 'pvs --segments -o help' for the list of possible fields.\n")
+
+cfg(report_pvsegs_cols_verbose_CFG, "pvsegs_cols_verbose", report_CFG_SECTION, CFG_PROFILABLE | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_PVSEGS_COLS_VERB, vsn(1, 1, 3), NULL, 0, NULL,
+ "List of columns to sort by when reporting 'pvs --segments' command in verbose mode.\n"
+ "See 'pvs --segments -o help' for the list of possible fields.\n")
+
+cfg(dmeventd_mirror_library_CFG, "mirror_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_MIRROR_LIB, vsn(1, 2, 3), NULL, 0, NULL,
+ "The library dmeventd uses when monitoring a mirror device.\n"
+ "libdevmapper-event-lvm2mirror.so attempts to recover from\n"
+ "failures. It removes failed devices from a volume group and\n"
+ "reconfigures a mirror as necessary. If no mirror library is\n"
+ "provided, mirrors are not monitored through dmeventd.\n")
+
+cfg(dmeventd_raid_library_CFG, "raid_library", dmeventd_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_DMEVENTD_RAID_LIB, vsn(2, 2, 87), NULL, 0, NULL, NULL)
+
+cfg(dmeventd_snapshot_library_CFG, "snapshot_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_SNAPSHOT_LIB, vsn(1, 2, 26), NULL, 0, NULL,
+ "The library dmeventd uses when monitoring a snapshot device.\n"
+ "libdevmapper-event-lvm2snapshot.so monitors the filling of snapshots\n"
+ "and emits a warning through syslog when the usage exceeds 80%. The\n"
+ "warning is repeated when 85%, 90% and 95% of the snapshot is filled.\n")
+
+cfg(dmeventd_thin_library_CFG, "thin_library", dmeventd_CFG_SECTION, 0, CFG_TYPE_STRING, DEFAULT_DMEVENTD_THIN_LIB, vsn(2, 2, 89), NULL, 0, NULL,
+ "The library dmeventd uses when monitoring a thin device.\n"
+ "libdevmapper-event-lvm2thin.so monitors the filling of a pool\n"
+ "and emits a warning through syslog when the usage exceeds 80%. The\n"
+ "warning is repeated when 85%, 90% and 95% of the pool is filled.\n")
+
+cfg(dmeventd_executable_CFG, "executable", dmeventd_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_DMEVENTD_PATH, vsn(2, 2, 73), "@DMEVENTD_PATH@", 0, NULL,
+ "The full path to the dmeventd binary.\n")
+
+cfg(tags_hosttags_CFG, "hosttags", tags_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_HOSTTAGS, vsn(1, 0, 18), NULL, 0, NULL,
+ "Create a host tag using the machine name.\n"
+ "The machine name is nodename returned by uname(2).\n")
+
+cfg_section(tag_CFG_SUBSECTION, "tag", tags_CFG_SECTION, CFG_NAME_VARIABLE | CFG_DEFAULT_COMMENTED, vsn(1, 0, 18), 0, NULL,
+ "Replace this subsection name with a custom tag name.\n"
+ "Multiple subsections like this can be created. The '@' prefix for\n"
+ "tags is optional. This subsection can contain host_list, which is a\n"
+ "list of machine names. If the name of the local machine is found in\n"
+ "host_list, then the name of this subsection is used as a tag and is\n"
+ "applied to the local machine as a 'host tag'. If this subsection is\n"
+ "empty (has no host_list), then the subsection name is always applied\n"
+ "as a 'host tag'.\n"
+ "#\n"
+ "Example\n"
+ "The host tag foo is given to all hosts, and the host tag\n"
+ "bar is given to the hosts named machine1 and machine2.\n"
+ "tags { foo { } bar { host_list = [ \"machine1\", \"machine2\" ] } }\n"
+ "#\n")
+
+cfg_array(tag_host_list_CFG, "host_list", tag_CFG_SUBSECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(1, 0, 18), NULL, 0, NULL,
+ "A list of machine names.\n"
+ "These machine names are compared to the nodename returned\n"
+ "by uname(2). If the local machine name matches an entry in\n"
+ "this list, the name of the subsection is applied to the\n"
+ "machine as a 'host tag'.\n")
+
+cfg(local_system_id_CFG, "system_id", local_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, NULL, vsn(2, 2, 117), NULL, 0, NULL,
+ "Defines the local system ID for lvmlocal mode.\n"
+ "This is used when global/system_id_source is set to 'lvmlocal' in the\n"
+ "main configuration file, e.g. lvm.conf. When used, it must be set to\n"
+ "a unique value among all hosts sharing access to the storage,\n"
+ "e.g. a host name.\n"
+ "#\n"
+ "Example\n"
+ "Set no system ID:\n"
+ "system_id = \"\"\n"
+ "Set the system_id to a specific name:\n"
+ "system_id = \"host1\"\n"
+ "#\n")
+
+cfg_array(local_extra_system_ids_CFG, "extra_system_ids", local_CFG_SECTION, CFG_ALLOW_EMPTY | CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 117), NULL, 0, NULL,
+ "A list of extra VG system IDs the local host can access.\n"
+ "VGs with the system IDs listed here (in addition to the host's own\n"
+ "system ID) can be fully accessed by the local host. (These are\n"
+ "system IDs that the host sees in VGs, not system IDs that identify\n"
+ "the local host, which is determined by system_id_source.)\n"
+ "Use this only after consulting 'man lvmsystemid' to be certain of\n"
+ "correct usage and possible dangers.\n")
+
+cfg(local_host_id_CFG, "host_id", local_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, 0, vsn(2, 2, 124), NULL, 0, NULL,
+ "The lvmlockd sanlock host_id.\n"
+ "This must be unique among all hosts, and must be between 1 and 2000.\n")
+
+cfg(CFG_COUNT, NULL, root_CFG_SECTION, 0, CFG_TYPE_INT, 0, vsn(0, 0, 0), NULL, 0, NULL, NULL)
diff --git a/lib/config/defaults.h b/lib/config/defaults.h
index 47710f476..2b6040480 100644
--- a/lib/config/defaults.h
+++ b/lib/config/defaults.h
@@ -29,9 +29,12 @@
#define DEFAULT_DEV_DIR "/dev"
#define DEFAULT_PROC_DIR "/proc"
+#define DEFAULT_SYSTEM_ID_SOURCE "none"
#define DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV 1
+#define DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE "none"
#define DEFAULT_SYSFS_SCAN 1
#define DEFAULT_MD_COMPONENT_DETECTION 1
+#define DEFAULT_FW_RAID_COMPONENT_DETECTION 0
#define DEFAULT_MD_CHUNK_ALIGNMENT 1
#define DEFAULT_IGNORE_LVM_MIRRORS 1
#define DEFAULT_MULTIPATH_COMPONENT_DETECTION 1
@@ -44,14 +47,18 @@
#define DEFAULT_PV_MIN_SIZE_KB 2048
#define DEFAULT_LOCKING_LIB "liblvm2clusterlock.so"
+#define DEFAULT_ERROR_WHEN_FULL 0
#define DEFAULT_FALLBACK_TO_LOCAL_LOCKING 1
#define DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING 1
#define DEFAULT_WAIT_FOR_LOCKS 1
+#define DEFAULT_LVMLOCKD_LOCK_RETRIES 3
#define DEFAULT_PRIORITISE_WRITE_LOCKS 1
#define DEFAULT_USE_MLOCKALL 0
#define DEFAULT_METADATA_READ_ONLY 0
#define DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH 0
+#define DEFAULT_SANLOCK_LV_EXTEND_MB 256
+
#define DEFAULT_MIRRORLOG MIRROR_LOG_DISK
#define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
#define DEFAULT_MIRROR_IMAGE_FAULT_POLICY "remove"
@@ -74,12 +81,17 @@
#endif
#ifdef THIN_CHECK_NEEDS_CHECK
-# define DEFAULT_THIN_CHECK_OPTIONS "-q --clear-needs-check-flag"
+# define DEFAULT_THIN_CHECK_OPTION1 "-q"
+# define DEFAULT_THIN_CHECK_OPTION2 "--clear-needs-check-flag"
+# define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1 "#S" DEFAULT_THIN_CHECK_OPTION2
#else
-# define DEFAULT_THIN_CHECK_OPTIONS "-q"
+# define DEFAULT_THIN_CHECK_OPTION1 "-q"
+# define DEFAULT_THIN_CHECK_OPTION2 ""
+# define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1
#endif
-#define DEFAULT_THIN_REPAIR_OPTIONS ""
+#define DEFAULT_THIN_REPAIR_OPTION1 ""
+#define DEFAULT_THIN_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_THIN_REPAIR_OPTION1
#define DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
#define DEFAULT_THIN_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024) /* KB */
#define DEFAULT_THIN_POOL_MIN_METADATA_SIZE 2048 /* KB */
@@ -91,23 +103,27 @@
#define DEFAULT_THIN_POOL_ZERO 1
#define DEFAULT_POOL_METADATA_SPARE 1 /* thin + cache */
-#define DEFAULT_CACHE_CHECK_OPTIONS "-q"
-#define DEFAULT_CACHE_REPAIR_OPTIONS ""
+#ifdef CACHE_CHECK_NEEDS_CHECK
+# define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+# define DEFAULT_CACHE_CHECK_OPTION2 "--clear-needs-check-flag"
+# define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1 "#S" DEFAULT_CACHE_CHECK_OPTION2
+#else
+# define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+# define DEFAULT_CACHE_CHECK_OPTION2 ""
+# define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1
+#endif
+
+#define DEFAULT_CACHE_REPAIR_OPTION1 ""
+#define DEFAULT_CACHE_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_CACHE_REPAIR_OPTION1
#define DEFAULT_CACHE_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
#define DEFAULT_CACHE_POOL_CHUNK_SIZE 64 /* KB */
#define DEFAULT_CACHE_POOL_MIN_METADATA_SIZE 2048 /* KB */
#define DEFAULT_CACHE_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024) /* KB */
-#define DEFAULT_CACHE_POOL_CACHEMODE "writethrough"
-#define DEFAULT_CACHE_POOL_POLICY "mq"
+#define DEFAULT_CACHE_POLICY "mq"
+#define DEFAULT_CACHE_MODE "writethrough"
#define DEFAULT_UMASK 0077
-#ifdef LVM1_FALLBACK
-# define DEFAULT_FALLBACK_TO_LVM1 1
-#else
-# define DEFAULT_FALLBACK_TO_LVM1 0
-#endif
-
#define DEFAULT_FORMAT "lvm2"
#define DEFAULT_STRIPESIZE 64 /* KB */
@@ -138,10 +154,6 @@
# define DEFAULT_LOG_FACILITY LOG_USER
#endif
-#define DEFAULT_LOGGED_DEBUG_CLASSES (LOG_CLASS_MEM | LOG_CLASS_DEVS | \
- LOG_CLASS_ACTIVATION | LOG_CLASS_ALLOC | LOG_CLASS_LVMETAD | \
- LOG_CLASS_METADATA | LOG_CLASS_CACHE | LOG_CLASS_LOCKING)
-
#define DEFAULT_SYSLOG 1
#define DEFAULT_VERBOSE 0
#define DEFAULT_SILENT 0
@@ -187,18 +199,19 @@
#define DEFAULT_REP_QUOTED 1
#define DEFAULT_REP_SEPARATOR " "
#define DEFAULT_REP_LIST_ITEM_SEPARATOR ","
+#define DEFAULT_TIME_FORMAT "%Y-%m-%d %T %z"
#define DEFAULT_LVS_COLS "lv_name,vg_name,lv_attr,lv_size,pool_lv,origin,data_percent,metadata_percent,move_pv,mirror_log,copy_percent,convert_lv"
#define DEFAULT_VGS_COLS "vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free"
#define DEFAULT_PVS_COLS "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free"
-#define DEFAULT_SEGS_COLS "lv_name,vg_name,lv_attr,stripes,segtype,seg_size"
+#define DEFAULT_SEGS_COLS "lv_name,vg_name,lv_attr,stripes,data_copies,segtype,seg_size"
#define DEFAULT_PVSEGS_COLS "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size"
#define DEFAULT_DEVTYPES_COLS "devtype_name,devtype_max_partitions,devtype_description"
#define DEFAULT_LVS_COLS_VERB "lv_name,vg_name,seg_count,lv_attr,lv_size,lv_major,lv_minor,lv_kernel_major,lv_kernel_minor,pool_lv,origin,data_percent,metadata_percent,move_pv,copy_percent,mirror_log,convert_lv,lv_uuid,lv_profile"
#define DEFAULT_VGS_COLS_VERB "vg_name,vg_attr,vg_extent_size,pv_count,lv_count,snap_count,vg_size,vg_free,vg_uuid,vg_profile"
#define DEFAULT_PVS_COLS_VERB "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,dev_size,pv_uuid"
-#define DEFAULT_SEGS_COLS_VERB "lv_name,vg_name,lv_attr,seg_start,seg_size,stripes,segtype,stripesize,chunksize"
+#define DEFAULT_SEGS_COLS_VERB "lv_name,vg_name,lv_attr,seg_start,seg_size,stripes,data_copies,segtype,stripesize,chunksize"
#define DEFAULT_PVSEGS_COLS_VERB "pv_name,vg_name,pv_fmt,pv_attr,pv_size,pv_free,pvseg_start,pvseg_size,lv_name,seg_start_pe,segtype,seg_pe_ranges"
#define DEFAULT_DEVTYPES_COLS_VERB "devtype_name,devtype_max_partitions,devtype_description"
@@ -216,4 +229,6 @@
#define DEFAULT_THIN_POOL_AUTOEXTEND_THRESHOLD 100
#define DEFAULT_THIN_POOL_AUTOEXTEND_PERCENT 20
+#define DEFAULT_CY_LOCK_TYPE "sanlock"
+
#endif /* _LVM_DEFAULTS_H */
diff --git a/lib/datastruct/str_list.c b/lib/datastruct/str_list.c
index feec8b6f7..1d3f08ad1 100644
--- a/lib/datastruct/str_list.c
+++ b/lib/datastruct/str_list.c
@@ -71,6 +71,21 @@ int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str)
return str_list_add_no_dup_check(mem, sll, str);
}
+/* Add contents of sll2 to sll */
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2)
+{
+ struct dm_str_list *sl;
+
+ if (!sll2)
+ return_0;
+
+ dm_list_iterate_items(sl, sll2)
+ if (!str_list_add(mem, sll, sl->str))
+ return_0;
+
+ return 1;
+}
+
void str_list_del(struct dm_list *sll, const char *str)
{
struct dm_list *slh, *slht;
diff --git a/lib/datastruct/str_list.h b/lib/datastruct/str_list.h
index 0046fe42a..3121a28ce 100644
--- a/lib/datastruct/str_list.h
+++ b/lib/datastruct/str_list.h
@@ -21,6 +21,7 @@ struct dm_pool;
struct dm_list *str_list_create(struct dm_pool *mem);
int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str);
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2);
int str_list_add_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
int str_list_add_h_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
void str_list_del(struct dm_list *sll, const char *str);
diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c
index ba4ee5462..043a722e1 100644
--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@@ -64,10 +64,23 @@ static void _dev_init(struct device *dev, int max_error_count)
dev->read_ahead = -1;
dev->max_error_count = max_error_count;
+ dev->ext.enabled = 0;
+ dev->ext.src = DEV_EXT_NONE;
+
dm_list_init(&dev->aliases);
dm_list_init(&dev->open_list);
}
+void dev_destroy_file(struct device *dev)
+{
+ if (!(dev->flags & DEV_ALLOCED))
+ return;
+
+ dm_free((void *) dm_list_item(dev->aliases.n, struct dm_str_list)->str);
+ dm_free(dev->aliases.n);
+ dm_free(dev);
+}
+
struct device *dev_create_file(const char *filename, struct device *dev,
struct dm_str_list *alias, int use_malloc)
{
@@ -678,10 +691,12 @@ static int _init_preferred_names(struct cmd_context *cmd)
_cache.preferred_names_matcher = NULL;
- if (!(cn = find_config_tree_node(cmd, devices_preferred_names_CFG, NULL)) ||
+ if (!(cn = find_config_tree_array(cmd, devices_preferred_names_CFG, NULL)) ||
cn->v->type == DM_CFG_EMPTY_ARRAY) {
- log_very_verbose("devices/preferred_names not found in config file: "
- "using built-in preferences");
+ log_very_verbose("devices/preferred_names %s: "
+ "using built-in preferences",
+ cn && cn->v->type == DM_CFG_EMPTY_ARRAY ? "is empty"
+ : "not found in config");
return 1;
}
@@ -940,7 +955,7 @@ struct device *dev_cache_get(const char *name, struct dev_filter *f)
if (d)
dm_hash_remove(_cache.names, name);
log_sys_very_verbose("stat", name);
- return NULL;
+ d = NULL;
}
if (d && (buf.st_rdev != d->dev)) {
@@ -983,12 +998,31 @@ static struct device *_dev_cache_seek_devt(dev_t dev)
*/
struct device *dev_cache_get_by_devt(dev_t dev, struct dev_filter *f)
{
+ char path[PATH_MAX];
+ const char *sysfs_dir;
+ struct stat info;
struct device *d = _dev_cache_seek_devt(dev);
if (d && (d->flags & DEV_REGULAR))
return d;
if (!d) {
+ sysfs_dir = dm_sysfs_dir();
+ if (sysfs_dir && *sysfs_dir) {
+ /* First check if dev is sysfs to avoid useless scan */
+ if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d",
+ sysfs_dir, (int)MAJOR(dev), (int)MINOR(dev)) < 0) {
+ log_error("dm_snprintf partition failed.");
+ return NULL;
+ }
+
+ if (lstat(path, &info)) {
+ log_debug("No sysfs entry for %d:%d.",
+ (int)MAJOR(dev), (int)MINOR(dev));
+ return NULL;
+ }
+ }
+
_full_scan(0);
d = _dev_cache_seek_devt(dev);
}
diff --git a/lib/device/dev-ext-udev-constants.h b/lib/device/dev-ext-udev-constants.h
new file mode 100644
index 000000000..a84d7bc3d
--- /dev/null
+++ b/lib/device/dev-ext-udev-constants.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*************************************************************************
+ * Properties saved in udev db and accesible via libudev and used by LVM *
+ *************************************************************************/
+
+/*
+ * DEV_EXT_UDEV_BLKID_TYPE property with various DEV_EXT_UDEV_BLKID_TYPE_*
+ * values that is saved in udev db via blkid call in udev rules
+ */
+#define DEV_EXT_UDEV_BLKID_TYPE "ID_FS_TYPE"
+/*
+ * mpath_member is forced by multipath - it's set in udev db via
+ * multipath call overwriting any existing ID_FS_TYPE value for
+ * a device which is a multipath component which prevents incorrect
+ * claim of the device by any other block device subsystem
+ */
+#define DEV_EXT_UDEV_BLKID_TYPE_MPATH "mpath_member"
+/* FW RAIDs are all *_raid_member types except linux_raid_member which denotes SW RAID */
+#define DEV_EXT_UDEV_BLKID_TYPE_RAID_SUFFIX "_raid_member"
+#define DEV_EXT_UDEV_BLKID_TYPE_SW_RAID "linux_raid_member"
+#define DEV_EXT_UDEV_BLKID_PART_TABLE_TYPE "ID_PART_TABLE_TYPE"
+#define DEV_EXT_UDEV_BLKID_PART_ENTRY_DISK "ID_PART_ENTRY_DISK"
+
+/*
+ * DEV_EXT_UDEV_MPATH_DEVICE_PATH is set by multipath in udev db
+ * with value either 0 or 1. The same functionality as
+ * DEV_EXT_UDEV_BLKID_TYPE_MPATH actually, but introduced later
+ * for some reason.
+ */
+#define DEV_EXT_UDEV_MPATH_DEVICE_PATH "DM_MULTIPATH_DEVICE_PATH"
+
+
+/***********************************************************
+ * Sysfs attributes accessible via libudev and used by LVM *
+ ***********************************************************/
+
+/* the value of size sysfs attribute is size in bytes */
+#define DEV_EXT_UDEV_SYSFS_ATTR_SIZE "size"
+
diff --git a/lib/device/dev-ext.c b/lib/device/dev-ext.c
new file mode 100644
index 000000000..8f2024e92
--- /dev/null
+++ b/lib/device/dev-ext.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lib.h"
+#include "device.h"
+
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#endif
+
+struct ext_registry_item {
+ const char *name;
+ struct dev_ext *(* dev_ext_get) (struct device *dev);
+ int (*dev_ext_release) (struct device *dev);
+};
+
+#define EXT_REGISTER(id,name) [id] = { #name, &_dev_ext_get_ ## name, &_dev_ext_release_ ## name }
+
+/*
+ * DEV_EXT_NONE
+ */
+static struct dev_ext *_dev_ext_get_none(struct device *dev)
+{
+ dev->ext.handle = NULL;
+ return &dev->ext;
+}
+
+static int _dev_ext_release_none(struct device *dev)
+{
+ dev->ext.handle = NULL;
+ return 1;
+}
+
+/*
+ * DEV_EXT_UDEV
+ */
+static struct dev_ext *_dev_ext_get_udev(struct device *dev)
+{
+#ifdef UDEV_SYNC_SUPPORT
+ struct udev *udev;
+ struct udev_device *udev_device;
+
+ if (dev->ext.handle)
+ return &dev->ext;
+
+ if (!(udev = udev_get_library_context()))
+ return_NULL;
+
+ if (!(udev_device = udev_device_new_from_devnum(udev, 'b', dev->dev)))
+ return_NULL;
+
+ dev->ext.handle = (void *) udev_device;
+ return &dev->ext;
+#else
+ return NULL;
+#endif
+}
+
+static int _dev_ext_release_udev(struct device *dev)
+{
+#ifdef UDEV_SYNC_SUPPORT
+ if (!dev->ext.handle)
+ return 1;
+
+ /* udev_device_unref can't fail - it has no return value */
+ udev_device_unref((struct udev_device *) dev->ext.handle);
+ dev->ext.handle = NULL;
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+static struct ext_registry_item _ext_registry[DEV_EXT_NUM] = {
+ EXT_REGISTER(DEV_EXT_NONE, none),
+ EXT_REGISTER(DEV_EXT_UDEV, udev)
+};
+
+const char *dev_ext_name(struct device *dev)
+{
+ return _ext_registry[dev->ext.src].name;
+}
+
+static const char *_ext_attached_msg = "External handle attached to device";
+
+struct dev_ext *dev_ext_get(struct device *dev)
+{
+ struct dev_ext *ext;
+ void *handle_ptr;
+
+ handle_ptr = dev->ext.handle;
+
+ if (!(ext = _ext_registry[dev->ext.src].dev_ext_get(dev)))
+ log_error("Failed to get external handle for device %s [%s].",
+ dev_name(dev), dev_ext_name(dev));
+ else if (handle_ptr != dev->ext.handle)
+ log_debug_devs("%s %s [%s:%p]", _ext_attached_msg, dev_name(dev),
+ dev_ext_name(dev), dev->ext.handle);
+
+ return ext;
+}
+
+int dev_ext_release(struct device *dev)
+{
+ int r;
+ void *handle_ptr;
+
+ if (!dev->ext.enabled ||
+ !dev->ext.handle)
+ return 1;
+
+ handle_ptr = dev->ext.handle;
+
+ if (!(r = _ext_registry[dev->ext.src].dev_ext_release(dev)))
+ log_error("Failed to release external handle for device %s [%s:%p].",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ else
+ log_debug_devs("External handle detached from device %s [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), handle_ptr);
+
+ return r;
+}
+
+int dev_ext_enable(struct device *dev, dev_ext_t src)
+{
+ if (dev->ext.enabled && (dev->ext.src != src) && !dev_ext_release(dev)) {
+ log_error("Failed to enable external handle for device %s [%s].",
+ dev_name(dev), _ext_registry[src].name);
+ return 0;
+ }
+
+ dev->ext.src = src;
+ dev->ext.enabled = 1;
+
+ return 1;
+}
+
+int dev_ext_disable(struct device *dev)
+{
+ if (!dev->ext.enabled)
+ return 1;
+
+ if (!dev_ext_release(dev)) {
+ log_error("Failed to disable external handle for device %s [%s].",
+ dev_name(dev), dev_ext_name(dev));
+ return 0;
+ }
+
+ dev->ext.enabled = 0;
+ dev->ext.src = DEV_EXT_NONE;
+
+ return 1;
+}
diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c
index 122b616fb..8c2388bb0 100644
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -289,25 +289,22 @@ static int _dev_get_size_file(const struct device *dev, uint64_t *size)
return 1;
}
-static int _dev_get_size_dev(const struct device *dev, uint64_t *size)
+static int _dev_get_size_dev(struct device *dev, uint64_t *size)
{
- int fd;
const char *name = dev_name(dev);
- if ((fd = open(name, O_RDONLY)) < 0) {
- log_sys_error("open", name);
- return 0;
- }
+ if (!dev_open_readonly(dev))
+ return_0;
- if (ioctl(fd, BLKGETSIZE64, size) < 0) {
+ if (ioctl(dev_fd(dev), BLKGETSIZE64, size) < 0) {
log_sys_error("ioctl BLKGETSIZE64", name);
- if (close(fd))
+ if (!dev_close(dev))
log_sys_error("close", name);
return 0;
}
*size >>= BLKSIZE_SHIFT; /* Convert to sectors */
- if (close(fd))
+ if (!dev_close(dev))
log_sys_error("close", name);
log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
@@ -377,7 +374,7 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
* Public functions
*---------------------------------------------------------------*/
-int dev_get_size(const struct device *dev, uint64_t *size)
+int dev_get_size(struct device *dev, uint64_t *size)
{
if (!dev)
return 0;
@@ -589,12 +586,8 @@ static void _close(struct device *dev)
log_debug_devs("Closed %s", dev_name(dev));
- if (dev->flags & DEV_ALLOCED) {
- dm_free((void *) dm_list_item(dev->aliases.n, struct dm_str_list)->
- str);
- dm_free(dev->aliases.n);
- dm_free(dev);
- }
+ if (dev->flags & DEV_ALLOCED)
+ dev_destroy_file(dev);
}
static int _dev_close(struct device *dev, int immediate)
diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c
index 76e30939d..603010cc2 100644
--- a/lib/device/dev-md.c
+++ b/lib/device/dev-md.c
@@ -16,6 +16,10 @@
#include "lib.h"
#include "dev-type.h"
#include "xlate.h"
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h> /* for MD detection using udev db records */
+#include "dev-ext-udev-constants.h"
+#endif
#ifdef __linux__
@@ -81,10 +85,31 @@ static uint64_t _v1_sb_offset(uint64_t size, md_minor_version_t minor_version)
return sb_offset;
}
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_md(struct device *dev)
+{
+ const char *value;
+ struct dev_ext *ext;
+
+ if (!(ext = dev_ext_get(dev)))
+ return_0;
+
+ if (!(value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE)))
+ return 0;
+
+ return !strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_SW_RAID);
+}
+#else
+static int _udev_dev_is_md(struct device *dev)
+{
+ return 0;
+}
+#endif
+
/*
* Returns -1 on error
*/
-int dev_is_md(struct device *dev, uint64_t *offset_found)
+static int _native_dev_is_md(struct device *dev, uint64_t *offset_found)
{
int ret = 1;
md_minor_version_t minor;
@@ -129,6 +154,27 @@ out:
return ret;
}
+int dev_is_md(struct device *dev, uint64_t *offset_found)
+{
+
+ /*
+ * If non-native device status source is selected, use it
+ * only if offset_found is not requested as this
+ * information is not in udev db.
+ */
+ if ((dev->ext.src == DEV_EXT_NONE) || offset_found)
+ return _native_dev_is_md(dev, offset_found);
+
+ if (dev->ext.src == DEV_EXT_UDEV)
+ return _udev_dev_is_md(dev);
+
+ log_error(INTERNAL_ERROR "Missing hook for MD device recognition "
+ "using external device info source %s", dev_ext_name(dev));
+
+ return -1;
+
+}
+
static int _md_sysfs_attribute_snprintf(char *path, size_t size,
struct dev_types *dt,
struct device *blkdev,
diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c
index 001248d45..db4079225 100644
--- a/lib/device/dev-type.c
+++ b/lib/device/dev-type.c
@@ -25,6 +25,11 @@
#include <blkid.h>
#endif
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#include "dev-ext-udev-constants.h"
+#endif
+
#include "device-types.h"
struct dev_types *create_dev_types(const char *proc_dir,
@@ -112,6 +117,10 @@ struct dev_types *create_dev_types(const char *proc_dir,
if (!strncmp("drbd", line + i, 4) && isspace(*(line + i + 4)))
dt->drbd_major = line_maj;
+ /* Look for DASD */
+ if (!strncmp("dasd", line + i, 4) && isspace(*(line + i + 4)))
+ dt->dasd_major = line_maj;
+
/* Look for EMC powerpath */
if (!strncmp("emcpower", line + i, 8) && isspace(*(line + i + 8)))
dt->emcpower_major = line_maj;
@@ -216,12 +225,18 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev)
const char *dev_subsystem_name(struct dev_types *dt, struct device *dev)
{
+ if (MAJOR(dev->dev) == dt->device_mapper_major)
+ return "DM";
+
if (MAJOR(dev->dev) == dt->md_major)
return "MD";
if (MAJOR(dev->dev) == dt->drbd_major)
return "DRBD";
+ if (MAJOR(dev->dev) == dt->dasd_major)
+ return "DASD";
+
if (MAJOR(dev->dev) == dt->emcpower_major)
return "EMCPOWER";
@@ -272,6 +287,9 @@ static int _is_partitionable(struct dev_types *dt, struct device *dev)
{
int parts = major_max_partitions(dt, MAJOR(dev->dev));
+ if (MAJOR(dev->dev) == dt->device_mapper_major)
+ return 1;
+
/* All MD devices are partitionable via blkext (as of 2.6.28) */
if (MAJOR(dev->dev) == dt->md_major)
return 1;
@@ -314,12 +332,66 @@ static int _has_partition_table(struct device *dev)
return ret;
}
-int dev_is_partitioned(struct dev_types *dt, struct device *dev)
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_partitioned(struct device *dev)
+{
+ struct dev_ext *ext;
+
+ if (!(ext = dev_ext_get(dev)))
+ return_0;
+
+ if (!udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_PART_TABLE_TYPE))
+ return 0;
+
+ if (udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_PART_ENTRY_DISK))
+ return 0;
+
+ return 1;
+}
+#else
+static int _udev_dev_is_partitioned(struct device *dev)
{
+ return 0;
+}
+#endif
+
+static int _native_dev_is_partitioned(struct dev_types *dt, struct device *dev)
+{
+ int r;
+
if (!_is_partitionable(dt, dev))
return 0;
- return _has_partition_table(dev);
+ /* Unpartitioned DASD devices are not supported. */
+ if (MAJOR(dev->dev) == dt->dasd_major)
+ return 1;
+
+ if (!dev_open_readonly_quiet(dev)) {
+ log_debug_devs("%s: failed to open device, considering device "
+ "is partitioned", dev_name(dev));
+ return 1;
+ }
+
+ r = _has_partition_table(dev);
+
+ if (!dev_close(dev))
+ stack;
+
+ return r;
+}
+
+int dev_is_partitioned(struct dev_types *dt, struct device *dev)
+{
+ if (dev->ext.src == DEV_EXT_NONE)
+ return _native_dev_is_partitioned(dt, dev);
+
+ if (dev->ext.src == DEV_EXT_UDEV)
+ return _udev_dev_is_partitioned(dev);
+
+ log_error(INTERNAL_ERROR "Missing hook for partition table recognition "
+ "using external device info source %s", dev_ext_name(dev));
+
+ return 0;
}
/*
@@ -361,7 +433,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
*/
if ((parts = dt->dev_type_array[major].max_partitions) > 1) {
if ((residue = minor % parts)) {
- *result = MKDEV((dev_t)major, (minor - residue));
+ *result = MKDEV((dev_t)major, (dev_t)(minor - residue));
ret = 2;
} else {
*result = dev->dev;
@@ -438,7 +510,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
path, buffer);
goto out;
}
- *result = MKDEV((dev_t)major, minor);
+ *result = MKDEV((dev_t)major, (dev_t)minor);
ret = 2;
out:
if (fp && fclose(fp))
@@ -456,12 +528,14 @@ static inline int _type_in_flag_list(const char *type, uint32_t flag_list)
((flag_list & TYPE_DM_SNAPSHOT_COW) && !strcmp(type, "DM_snapshot_cow")));
}
+#define MSG_FAILED_SIG_OFFSET "Failed to get offset of the %s signature on %s."
+#define MSG_FAILED_SIG_LENGTH "Failed to get length of the %s signature on %s."
+#define MSG_WIPING_SKIPPED " Wiping skipped."
+
static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
uint32_t types_to_exclude, uint32_t types_no_prompt,
int yes, force_t force)
{
- static const char _msg_failed_offset[] = "Failed to get offset of the %s signature on %s.";
- static const char _msg_failed_length[] = "Failed to get length of the %s signature on %s.";
static const char _msg_wiping[] = "Wiping %s signature on %s.";
const char *offset = NULL, *type = NULL, *magic = NULL,
*usage = NULL, *label = NULL, *uuid = NULL;
@@ -470,23 +544,43 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
if (!blkid_probe_lookup_value(probe, "TYPE", &type, NULL)) {
if (_type_in_flag_list(type, types_to_exclude))
- return 1;
+ return 2;
if (blkid_probe_lookup_value(probe, "SBMAGIC_OFFSET", &offset, NULL)) {
- log_error(_msg_failed_offset, type, name);
- return 0;
+ if (force < DONT_PROMPT) {
+ log_error(MSG_FAILED_SIG_OFFSET, type, name);
+ return 0;
+ } else {
+ log_error("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+ return 2;
+ }
}
if (blkid_probe_lookup_value(probe, "SBMAGIC", &magic, &len)) {
- log_error(_msg_failed_length, type, name);
- return 0;
+ if (force < DONT_PROMPT) {
+ log_error(MSG_FAILED_SIG_LENGTH, type, name);
+ return 0;
+ } else {
+ log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+ return 2;
+ }
}
} else if (!blkid_probe_lookup_value(probe, "PTTYPE", &type, NULL)) {
if (blkid_probe_lookup_value(probe, "PTMAGIC_OFFSET", &offset, NULL)) {
- log_error(_msg_failed_offset, type, name);
- return 0;
+ if (force < DONT_PROMPT) {
+ log_error(MSG_FAILED_SIG_OFFSET, type, name);
+ return 0;
+ } else {
+ log_warn("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+ return 2;
+ }
}
if (blkid_probe_lookup_value(probe, "PTMAGIC", &magic, &len)) {
- log_error(_msg_failed_length, type, name);
- return 0;
+ if (force < DONT_PROMPT) {
+ log_error(MSG_FAILED_SIG_LENGTH, type, name);
+ return 0;
+ } else {
+ log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+ return 2;
+ }
}
usage = "partition table";
} else
@@ -526,12 +620,17 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
static int _wipe_known_signatures_with_blkid(struct device *dev, const char *name,
uint32_t types_to_exclude,
uint32_t types_no_prompt,
- int yes, force_t force)
+ int yes, force_t force, int *wiped)
{
blkid_probe probe = NULL;
- int found = 0, wiped = 0, left = 0;
+ int found = 0, left = 0, wiped_tmp;
+ int r_wipe;
int r = 0;
+ if (!wiped)
+ wiped = &wiped_tmp;
+ *wiped = 0;
+
/* TODO: Should we check for valid dev - _dev_is_valid(dev)? */
if (!(probe = blkid_new_probe_from_filename(dev_name(dev)))) {
@@ -552,15 +651,17 @@ static int _wipe_known_signatures_with_blkid(struct device *dev, const char *nam
BLKID_SUBLKS_BADCSUM);
while (!blkid_do_probe(probe)) {
- found++;
- if (_blkid_wipe(probe, dev, name, types_to_exclude, types_no_prompt, yes, force))
- wiped++;
+ if ((r_wipe = _blkid_wipe(probe, dev, name, types_to_exclude, types_no_prompt, yes, force)) == 1)
+ (*wiped)++;
+ /* do not count excluded types */
+ if (r_wipe != 2)
+ found++;
}
if (!found)
r = 1;
- left = found - wiped;
+ left = found - *wiped;
if (!left)
r = 1;
else
@@ -575,7 +676,7 @@ out:
#endif /* BLKID_WIPING_SUPPORT */
static int _wipe_signature(struct device *dev, const char *type, const char *name,
- int wipe_len, int yes, force_t force,
+ int wipe_len, int yes, force_t force, int *wiped,
int (*signature_detection_fn)(struct device *dev, uint64_t *offset_found))
{
int wipe;
@@ -605,17 +706,24 @@ static int _wipe_signature(struct device *dev, const char *type, const char *nam
return 0;
}
+ (*wiped)++;
return 1;
}
static int _wipe_known_signatures_with_lvm(struct device *dev, const char *name,
uint32_t types_to_exclude __attribute__((unused)),
uint32_t types_no_prompt __attribute__((unused)),
- int yes, force_t force)
+ int yes, force_t force, int *wiped)
{
- if (!_wipe_signature(dev, "software RAID md superblock", name, 4, yes, force, dev_is_md) ||
- !_wipe_signature(dev, "swap signature", name, 10, yes, force, dev_is_swap) ||
- !_wipe_signature(dev, "LUKS signature", name, 8, yes, force, dev_is_luks))
+ int wiped_tmp;
+
+ if (!wiped)
+ wiped = &wiped_tmp;
+ *wiped = 0;
+
+ if (!_wipe_signature(dev, "software RAID md superblock", name, 4, yes, force, wiped, dev_is_md) ||
+ !_wipe_signature(dev, "swap signature", name, 10, yes, force, wiped, dev_is_swap) ||
+ !_wipe_signature(dev, "LUKS signature", name, 8, yes, force, wiped, dev_is_luks))
return 0;
return 1;
@@ -623,19 +731,20 @@ static int _wipe_known_signatures_with_lvm(struct device *dev, const char *name,
int wipe_known_signatures(struct cmd_context *cmd, struct device *dev,
const char *name, uint32_t types_to_exclude,
- uint32_t types_no_prompt, int yes, force_t force)
+ uint32_t types_no_prompt, int yes, force_t force,
+ int *wiped)
{
#ifdef BLKID_WIPING_SUPPORT
if (find_config_tree_bool(cmd, allocation_use_blkid_wiping_CFG, NULL))
return _wipe_known_signatures_with_blkid(dev, name,
types_to_exclude,
types_no_prompt,
- yes, force);
+ yes, force, wiped);
#endif
return _wipe_known_signatures_with_lvm(dev, name,
types_to_exclude,
types_no_prompt,
- yes, force);
+ yes, force, wiped);
}
#ifdef __linux__
@@ -715,7 +824,7 @@ static unsigned long _dev_topology_attribute(struct dev_types *dt,
}
log_very_verbose("Device %s: %s is %lu%s.",
- dev_name(dev), attribute, result, default_value ? "" : " bytes");
+ dev_name(dev), attribute, value, default_value ? "" : " bytes");
result = value >> SECTOR_SHIFT;
diff --git a/lib/device/dev-type.h b/lib/device/dev-type.h
index 8dcd4c84a..2a49b4b80 100644
--- a/lib/device/dev-type.h
+++ b/lib/device/dev-type.h
@@ -44,6 +44,7 @@ struct dev_types {
int device_mapper_major;
int emcpower_major;
int power2_major;
+ int dasd_major;
struct dev_type_def dev_type_array[NUMBER_OF_MAJORS];
};
@@ -65,7 +66,7 @@ int dev_is_luks(struct device *dev, uint64_t *signature);
#define TYPE_DM_SNAPSHOT_COW 0x004
int wipe_known_signatures(struct cmd_context *cmd, struct device *dev, const char *name,
uint32_t types_to_exclude, uint32_t types_no_prompt,
- int yes, force_t force);
+ int yes, force_t force, int *wiped);
/* Type-specific device properties */
unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev);
diff --git a/lib/device/device.h b/lib/device/device.h
index c916a4bdb..733b0d04e 100644
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -29,6 +29,23 @@
#define DEV_O_DIRECT_TESTED 0x00000040 /* DEV_O_DIRECT is reliable */
/*
+ * Support for external device info.
+ * Any new external device info source needs to be
+ * registered using EXT_REGISTER macro in dev-ext.c.
+ */
+typedef enum {
+ DEV_EXT_NONE,
+ DEV_EXT_UDEV,
+ DEV_EXT_NUM
+} dev_ext_t;
+
+struct dev_ext {
+ int enabled;
+ dev_ext_t src;
+ void *handle;
+};
+
+/*
* All devices in LVM will be represented by one of these.
* pointer comparisons are valid.
*/
@@ -47,6 +64,7 @@ struct device {
uint32_t flags;
uint64_t end;
struct dm_list open_list;
+ struct dev_ext ext;
char pvid[ID_LEN + 1];
char _padding[7];
@@ -64,10 +82,19 @@ struct device_area {
};
/*
+ * Support for external device info.
+ */
+const char *dev_ext_name(struct device *dev);
+int dev_ext_enable(struct device *dev, dev_ext_t src);
+int dev_ext_disable(struct device *dev);
+struct dev_ext *dev_ext_get(struct device *dev);
+int dev_ext_release(struct device *dev);
+
+/*
* All io should use these routines.
*/
int dev_get_block_size(struct device *dev, unsigned int *phys_block_size, unsigned int *block_size);
-int dev_get_size(const struct device *dev, uint64_t *size);
+int dev_get_size(struct device *dev, uint64_t *size);
int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead);
int dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64_t size_bytes);
@@ -96,6 +123,7 @@ void dev_flush(struct device *dev);
struct device *dev_create_file(const char *filename, struct device *dev,
struct dm_str_list *alias, int use_malloc);
+void dev_destroy_file(struct device *dev);
/* Return a valid device name from the alias list; NULL otherwise */
const char *dev_name_confirmed(struct device *dev, int quiet);
diff --git a/lib/display/display.c b/lib/display/display.c
index f86b67f47..308ce3191 100644
--- a/lib/display/display.c
+++ b/lib/display/display.c
@@ -24,10 +24,6 @@
#include <stdarg.h>
-#define SIZE_BUF 128
-
-typedef enum { SIZE_LONG = 0, SIZE_SHORT = 1, SIZE_UNIT = 2 } size_len_t;
-
static const struct {
alloc_policy_t alloc;
const char str[14]; /* must be changed when size extends 13 chars */
@@ -86,177 +82,90 @@ alloc_policy_t get_alloc_from_string(const char *str)
return ALLOC_INVALID;
}
-static const char *_percent_types[7] = { "NONE", "VG", "FREE", "LV", "PVS", "ORIGIN" };
-
-const char *get_percent_string(percent_type_t def)
+const char *get_lock_type_string(lock_type_t lock_type)
{
- return _percent_types[def];
+ switch (lock_type) {
+ case LOCK_TYPE_INVALID:
+ return "invalid";
+ case LOCK_TYPE_NONE:
+ return "none";
+ case LOCK_TYPE_CLVM:
+ return "clvm";
+ case LOCK_TYPE_DLM:
+ return "dlm";
+ case LOCK_TYPE_SANLOCK:
+ return "sanlock";
+ }
+ return "invalid";
}
-const char *display_lvname(const struct logical_volume *lv)
+lock_type_t get_lock_type_from_string(const char *str)
{
- /* On allocation failure, just return the LV name. */
- return lv_fullname_dup(lv->vg->cmd->mem, lv) ? : lv->name;
+ if (!str)
+ return LOCK_TYPE_NONE;
+ if (!strcmp(str, "none"))
+ return LOCK_TYPE_NONE;
+ if (!strcmp(str, "clvm"))
+ return LOCK_TYPE_CLVM;
+ if (!strcmp(str, "dlm"))
+ return LOCK_TYPE_DLM;
+ if (!strcmp(str, "sanlock"))
+ return LOCK_TYPE_SANLOCK;
+ return LOCK_TYPE_INVALID;
}
-#define BASE_UNKNOWN 0
-#define BASE_SHARED 1
-#define BASE_1024 8
-#define BASE_1000 15
-#define BASE_SPECIAL 21
-#define NUM_UNIT_PREFIXES 6
-#define NUM_SPECIAL 3
+static const char *_percent_types[7] = { "NONE", "VG", "FREE", "LV", "PVS", "ORIGIN" };
-/* Size supplied in sectors */
-static const char *_display_size(const struct cmd_context *cmd,
- uint64_t size, size_len_t sl)
+const char *get_percent_string(percent_type_t def)
{
- unsigned base = BASE_UNKNOWN;
- unsigned s;
- int suffix, precision;
- uint64_t byte = UINT64_C(0);
- uint64_t units = UINT64_C(1024);
- char *size_buf = NULL;
- const char * const size_str[][3] = {
- /* BASE_UNKNOWN */
- {" ", " ", " "}, /* [0] */
-
- /* BASE_SHARED - Used if cmd->si_unit_consistency = 0 */
- {" Exabyte", " EB", "E"}, /* [1] */
- {" Petabyte", " PB", "P"}, /* [2] */
- {" Terabyte", " TB", "T"}, /* [3] */
- {" Gigabyte", " GB", "G"}, /* [4] */
- {" Megabyte", " MB", "M"}, /* [5] */
- {" Kilobyte", " KB", "K"}, /* [6] */
- {" Byte ", " B", "B"}, /* [7] */
-
- /* BASE_1024 - Used if cmd->si_unit_consistency = 1 */
- {" Exbibyte", " EiB", "e"}, /* [8] */
- {" Pebibyte", " PiB", "p"}, /* [9] */
- {" Tebibyte", " TiB", "t"}, /* [10] */
- {" Gibibyte", " GiB", "g"}, /* [11] */
- {" Mebibyte", " MiB", "m"}, /* [12] */
- {" Kibibyte", " KiB", "k"}, /* [13] */
- {" Byte ", " B", "b"}, /* [14] */
-
- /* BASE_1000 - Used if cmd->si_unit_consistency = 1 */
- {" Exabyte", " EB", "E"}, /* [15] */
- {" Petabyte", " PB", "P"}, /* [16] */
- {" Terabyte", " TB", "T"}, /* [17] */
- {" Gigabyte", " GB", "G"}, /* [18] */
- {" Megabyte", " MB", "M"}, /* [19] */
- {" Kilobyte", " kB", "K"}, /* [20] */
-
- /* BASE_SPECIAL */
- {" Byte ", " B ", "B"}, /* [21] (shared with BASE_1000) */
- {" Units ", " Un", "U"}, /* [22] */
- {" Sectors ", " Se", "S"}, /* [23] */
- };
-
- if (!(size_buf = dm_pool_alloc(cmd->mem, SIZE_BUF))) {
- log_error("no memory for size display buffer");
- return "";
- }
-
- suffix = cmd->current_settings.suffix;
-
- if (!cmd->si_unit_consistency) {
- /* Case-independent match */
- for (s = 0; s < NUM_UNIT_PREFIXES; s++)
- if (toupper((int) cmd->current_settings.unit_type) ==
- *size_str[BASE_SHARED + s][2]) {
- base = BASE_SHARED;
- break;
- }
- } else {
- /* Case-dependent match for powers of 1000 */
- for (s = 0; s < NUM_UNIT_PREFIXES; s++)
- if (cmd->current_settings.unit_type ==
- *size_str[BASE_1000 + s][2]) {
- base = BASE_1000;
- break;
- }
-
- /* Case-dependent match for powers of 1024 */
- if (base == BASE_UNKNOWN)
- for (s = 0; s < NUM_UNIT_PREFIXES; s++)
- if (cmd->current_settings.unit_type ==
- *size_str[BASE_1024 + s][2]) {
- base = BASE_1024;
- break;
- }
- }
-
- if (base == BASE_UNKNOWN)
- /* Check for special units - s, b or u */
- for (s = 0; s < NUM_SPECIAL; s++)
- if (toupper((int) cmd->current_settings.unit_type) ==
- *size_str[BASE_SPECIAL + s][2]) {
- base = BASE_SPECIAL;
- break;
- }
-
- if (size == UINT64_C(0)) {
- if (base == BASE_UNKNOWN)
- s = 0;
- sprintf(size_buf, "0%s", suffix ? size_str[base + s][sl] : "");
- return size_buf;
- }
-
- size *= UINT64_C(512);
-
- if (base != BASE_UNKNOWN)
- byte = cmd->current_settings.unit_factor;
- else {
- /* Human-readable style */
- if (cmd->current_settings.unit_type == 'H') {
- units = UINT64_C(1000);
- base = BASE_1000;
- } else {
- units = UINT64_C(1024);
- base = BASE_1024;
- }
+ return _percent_types[def];
+}
- if (!cmd->si_unit_consistency)
- base = BASE_SHARED;
+const char *display_lvname(const struct logical_volume *lv)
+{
+ char *name;
+ int r;
- byte = units * units * units * units * units * units;
+ if ((lv->vg->cmd->display_lvname_idx + NAME_LEN) >= sizeof((lv->vg->cmd->display_buffer)))
+ lv->vg->cmd->display_lvname_idx = 0;
- for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++)
- byte /= units;
+ name = lv->vg->cmd->display_buffer + lv->vg->cmd->display_lvname_idx;
+ r = dm_snprintf(name, NAME_LEN, "%s/%s", lv->vg->name, lv->name);
- suffix = 1;
+ if (r < 0) {
+ log_error("Full LV name \"%s/%s\" is too long.", lv->vg->name, lv->name);
+ return NULL;
}
- /* FIXME Make precision configurable */
- switch (toupper(*size_str[base + s][SIZE_UNIT])) {
- case 'B':
- case 'S':
- precision = 0;
- break;
- default:
- precision = 2;
- }
+ lv->vg->cmd->display_lvname_idx += r + 1;
- snprintf(size_buf, SIZE_BUF - 1, "%.*f%s", precision,
- (double) size / byte, suffix ? size_str[base + s][sl] : "");
+ return name;
+}
- return size_buf;
+/* Size supplied in sectors */
+static const char *_display_size(const struct cmd_context *cmd,
+ uint64_t size, dm_size_suffix_t suffix_type)
+{
+ return dm_size_to_string(cmd->mem, size, cmd->current_settings.unit_type,
+ cmd->si_unit_consistency,
+ cmd->current_settings.unit_factor,
+ cmd->current_settings.suffix,
+ suffix_type);
}
const char *display_size_long(const struct cmd_context *cmd, uint64_t size)
{
- return _display_size(cmd, size, SIZE_LONG);
+ return _display_size(cmd, size, DM_SIZE_LONG);
}
const char *display_size_units(const struct cmd_context *cmd, uint64_t size)
{
- return _display_size(cmd, size, SIZE_UNIT);
+ return _display_size(cmd, size, DM_SIZE_UNIT);
}
const char *display_size(const struct cmd_context *cmd, uint64_t size)
{
- return _display_size(cmd, size, SIZE_SHORT);
+ return _display_size(cmd, size, DM_SIZE_SHORT);
}
void pvdisplay_colons(const struct physical_volume *pv)
@@ -385,7 +294,7 @@ int pvdisplay_short(const struct cmd_context *cmd __attribute__((unused)),
char uuid[64] __attribute__((aligned(8)));
if (!pv)
- return 0;
+ return_0;
if (!id_write_format(&pv->id, uuid, sizeof(uuid)))
return_0;
@@ -399,7 +308,8 @@ int pvdisplay_short(const struct cmd_context *cmd __attribute__((unused)),
pv->pe_count, pv->pe_count - pv->pe_alloc_count);
log_print(" ");
- return 0;
+
+ return 1; /* ECMD_PROCESSED */
}
void lvdisplay_colons(const struct logical_volume *lv)
@@ -473,7 +383,7 @@ int lvdisplay_full(struct cmd_context *cmd,
log_print("LV UUID %s", uuid);
log_print("LV Write Access %s", access_str);
log_print("LV Creation host, time %s, %s",
- lv_host_dup(cmd->mem, lv), lv_time_dup(cmd->mem, lv));
+ lv_host_dup(cmd->mem, lv), lv_time_dup(cmd->mem, lv, 1));
if (lv_is_origin(lv)) {
log_print("LV snapshot status source of");
@@ -623,7 +533,7 @@ int lvdisplay_full(struct cmd_context *cmd,
log_print(" ");
- return 0;
+ return 1; /* ECMD_PROCESSED */
}
void display_stripe(const struct lv_segment *seg, uint32_t s, const char *pre)
@@ -695,7 +605,7 @@ void vgdisplay_full(const struct volume_group *vg)
log_print("--- Volume group ---");
log_print("VG Name %s", vg->name);
- log_print("System ID %s", vg->system_id);
+ log_print("System ID %s", (vg->system_id && *vg->system_id) ? vg->system_id : vg->lvm1_system_id ? : "");
log_print("Format %s", vg->fid->fmt->name);
if (vg->fid->fmt->features & FMT_MDAS) {
log_print("Metadata Areas %d",
@@ -855,7 +765,7 @@ void display_name_error(name_error_t name_error)
case NAME_INVALID_EMPTY:
log_error("Name is zero length.");
break;
- case NAME_INVALID_HYPEN:
+ case NAME_INVALID_HYPHEN:
log_error("Name cannot start with hyphen.");
break;
case NAME_INVALID_DOTS:
diff --git a/lib/display/display.h b/lib/display/display.h
index cc5654b61..f4e766c09 100644
--- a/lib/display/display.h
+++ b/lib/display/display.h
@@ -64,6 +64,9 @@ const char *get_alloc_string(alloc_policy_t alloc);
char alloc_policy_char(alloc_policy_t alloc);
alloc_policy_t get_alloc_from_string(const char *str);
+const char *get_lock_type_string(lock_type_t lock_type);
+lock_type_t get_lock_type_from_string(const char *str);
+
const char *get_percent_string(percent_type_t def);
char yes_no_prompt(const char *prompt, ...) __attribute__ ((format(printf, 1, 2)));
diff --git a/lib/filters/filter-composite.c b/lib/filters/filter-composite.c
index a6ba71aef..1a2e17281 100644
--- a/lib/filters/filter-composite.c
+++ b/lib/filters/filter-composite.c
@@ -27,6 +27,17 @@ static int _and_p(struct dev_filter *f, struct device *dev)
return 1;
}
+static int _and_p_with_dev_ext_info(struct dev_filter *f, struct device *dev)
+{
+ int r;
+
+ dev_ext_enable(dev, external_device_info_source());
+ r = _and_p(f, dev);
+ dev_ext_disable(dev);
+
+ return r;
+}
+
static void _composite_destroy(struct dev_filter *f)
{
struct dev_filter **filters;
@@ -62,7 +73,7 @@ static void _wipe(struct dev_filter *f)
(*filters)->wipe(*filters);
}
-struct dev_filter *composite_filter_create(int n, struct dev_filter **filters)
+struct dev_filter *composite_filter_create(int n, int use_dev_ext_info, struct dev_filter **filters)
{
struct dev_filter **filters_copy, *cft;
@@ -83,7 +94,7 @@ struct dev_filter *composite_filter_create(int n, struct dev_filter **filters)
return NULL;
}
- cft->passes_filter = _and_p;
+ cft->passes_filter = use_dev_ext_info ? _and_p_with_dev_ext_info : _and_p;
cft->destroy = _composite_destroy;
cft->dump = _dump;
cft->wipe = _wipe;
diff --git a/lib/filters/filter-fwraid.c b/lib/filters/filter-fwraid.c
new file mode 100644
index 000000000..f16833e66
--- /dev/null
+++ b/lib/filters/filter-fwraid.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lib.h"
+#include "filter.h"
+
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#include "dev-ext-udev-constants.h"
+#endif
+
+#ifdef __linux__
+
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_fwraid(struct device *dev)
+{
+ const char *value;
+
+ value = udev_device_get_property_value((struct udev_device *)dev->ext.handle, DEV_EXT_UDEV_BLKID_TYPE);
+ if (value && strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_SW_RAID) && strstr(value, DEV_EXT_UDEV_BLKID_TYPE_RAID_SUFFIX))
+ return 1;
+
+ return 0;
+}
+#else
+static int _udev_dev_is_fwraid(struct device *dev)
+{
+ return 0;
+}
+#endif
+
+static int _native_dev_is_fwraid(struct device *dev)
+{
+ log_verbose("%s: Firmware RAID detection is not supported by LVM natively. "
+ "Skipping firmware raid detection. ", dev_name(dev));
+ return 0;
+}
+
+static int _dev_is_fwraid(struct device *dev)
+{
+ if (dev->ext.src == DEV_EXT_NONE)
+ return _native_dev_is_fwraid(dev);
+
+ if (dev->ext.src == DEV_EXT_UDEV)
+ return _udev_dev_is_fwraid(dev);
+
+ log_error(INTERNAL_ERROR "Missing hook for firmware RAID recognition "
+ "using external device info source %s", dev_ext_name(dev));
+
+ return 0;
+}
+
+static int _ignore_fwraid(struct dev_filter *f __attribute__((unused)),
+ struct device *dev)
+{
+ int ret;
+
+ if (!fwraid_filtering())
+ return 1;
+
+ ret = _dev_is_fwraid(dev);
+
+ if (ret == 1) {
+ log_debug_devs("%s: Skipping firmware RAID component device [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ return 0;
+ }
+
+ if (ret < 0) {
+ log_debug_devs("%s: Skipping: error in firmware RAID component detection",
+ dev_name(dev));
+ return 0;
+ }
+
+ return 1;
+}
+
+static void _destroy(struct dev_filter *f)
+{
+ if (f->use_count)
+ log_error(INTERNAL_ERROR "Destroying firmware RAID filter while in use %u times.", f->use_count);
+
+ dm_free(f);
+}
+
+struct dev_filter *fwraid_filter_create(struct dev_types *dt __attribute__((unused)))
+{
+ struct dev_filter *f;
+
+ if (!(f = dm_zalloc(sizeof(*f)))) {
+ log_error("Firmware RAID filter allocation failed");
+ return NULL;
+ }
+
+ f->passes_filter = _ignore_fwraid;
+ f->destroy = _destroy;
+ f->use_count = 0;
+ f->private = NULL;
+
+ log_debug_devs("Firmware RAID filter initialised.");
+
+ return f;
+}
+
+#else
+
+struct dev_filter *fwraid_filter_create(struct dev_types *dt __attribute__((unused)))
+{
+ return NULL;
+}
+
+#endif
diff --git a/lib/filters/filter-md.c b/lib/filters/filter-md.c
index fe79f3da5..ad9f5409a 100644
--- a/lib/filters/filter-md.c
+++ b/lib/filters/filter-md.c
@@ -29,7 +29,8 @@ static int _ignore_md(struct dev_filter *f __attribute__((unused)),
ret = dev_is_md(dev, NULL);
if (ret == 1) {
- log_debug_devs("%s: Skipping md component device", dev_name(dev));
+ log_debug_devs("%s: Skipping md component device [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
return 0;
}
diff --git a/lib/filters/filter-mpath.c b/lib/filters/filter-mpath.c
index 0f12c730e..0016a515e 100644
--- a/lib/filters/filter-mpath.c
+++ b/lib/filters/filter-mpath.c
@@ -15,6 +15,10 @@
#include "lib.h"
#include "filter.h"
#include "activate.h"
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#include "dev-ext-udev-constants.h"
+#endif
#ifdef __linux__
@@ -141,7 +145,33 @@ static int _get_parent_mpath(const char *dir, char *name, int max_size)
return r;
}
-static int _dev_is_mpath(struct dev_filter *f, struct device *dev)
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_mpath(struct device *dev)
+{
+ const char *value;
+ struct dev_ext *ext;
+
+ if (!(ext = dev_ext_get(dev)))
+ return_0;
+
+ value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE);
+ if (value && !strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_MPATH))
+ return 1;
+
+ value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_MPATH_DEVICE_PATH);
+ if (value && !strcmp(value, "1"))
+ return 1;
+
+ return 0;
+}
+#else
+static int _udev_dev_is_mpath(struct device *dev)
+{
+ return 0;
+}
+#endif
+
+static int _native_dev_is_mpath(struct dev_filter *f, struct device *dev)
{
struct dev_types *dt = (struct dev_types *) f->private;
const char *part_name, *name;
@@ -200,10 +230,25 @@ static int _dev_is_mpath(struct dev_filter *f, struct device *dev)
return lvm_dm_prefix_check(major, minor, MPATH_PREFIX);
}
+static int _dev_is_mpath(struct dev_filter *f, struct device *dev)
+{
+ if (dev->ext.src == DEV_EXT_NONE)
+ return _native_dev_is_mpath(f, dev);
+
+ if (dev->ext.src == DEV_EXT_UDEV)
+ return _udev_dev_is_mpath(dev);
+
+ log_error(INTERNAL_ERROR "Missing hook for mpath recognition "
+ "using external device info source %s", dev_ext_name(dev));
+
+ return 0;
+}
+
static int _ignore_mpath(struct dev_filter *f, struct device *dev)
{
if (_dev_is_mpath(f, dev) == 1) {
- log_debug_devs("%s: Skipping mpath component device", dev_name(dev));
+ log_debug_devs("%s: Skipping mpath component device [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
return 0;
}
diff --git a/lib/filters/filter-partitioned.c b/lib/filters/filter-partitioned.c
index e3caa0e83..9ca8f6fd6 100644
--- a/lib/filters/filter-partitioned.c
+++ b/lib/filters/filter-partitioned.c
@@ -19,40 +19,14 @@
static int _passes_partitioned_filter(struct dev_filter *f, struct device *dev)
{
struct dev_types *dt = (struct dev_types *) f->private;
- const char *name = dev_name(dev);
- int ret = 0;
- uint64_t size;
-
- /* Check it's accessible */
- if (!dev_open_readonly_quiet(dev)) {
- log_debug_devs("%s: Skipping: open failed", name);
- return 0;
- }
-
- /* Check it's not too small */
- if (!dev_get_size(dev, &size)) {
- log_debug_devs("%s: Skipping: dev_get_size failed", name);
- goto out;
- }
-
- if (size < pv_min_size()) {
- log_debug_devs("%s: Skipping: Too small to hold a PV", name);
- goto out;
- }
if (dev_is_partitioned(dt, dev)) {
- log_debug_devs("%s: Skipping: Partition table signature found",
- name);
- goto out;
+ log_debug_devs("%s: Skipping: Partition table signature found [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ return 0;
}
- ret = 1;
-
- out:
- if (!dev_close(dev))
- stack;
-
- return ret;
+ return 1;
}
static void _partitioned_filter_destroy(struct dev_filter *f)
diff --git a/lib/filters/filter-persistent.c b/lib/filters/filter-persistent.c
index b4a768367..8c7b79fd6 100644
--- a/lib/filters/filter-persistent.c
+++ b/lib/filters/filter-persistent.c
@@ -22,7 +22,7 @@ struct pfilter {
char *file;
struct dm_hash_table *devices;
struct dev_filter *real;
- time_t ctime;
+ struct timespec ctime;
struct dev_types *dt;
};
@@ -106,7 +106,7 @@ int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out
}
if (!stat(pf->file, &info))
- pf->ctime = info.st_ctime;
+ lvm_stat_ctim(&pf->ctime, &info);
else {
log_very_verbose("%s: stat failed: %s", pf->file,
strerror(errno));
@@ -177,6 +177,7 @@ static int _persistent_filter_dump(struct dev_filter *f, int merge_existing)
struct pfilter *pf;
char *tmp_file;
struct stat info, info2;
+ struct timespec ts;
struct dm_config_tree *cft = NULL;
FILE *fp;
int lockfd;
@@ -227,7 +228,8 @@ static int _persistent_filter_dump(struct dev_filter *f, int merge_existing)
/*
* If file contents changed since we loaded it, merge new contents
*/
- if (merge_existing && info.st_ctime != pf->ctime)
+ lvm_stat_ctim(&ts, &info);
+ if (merge_existing && timespeccmp(&ts, &pf->ctime, !=))
/* Keep cft open to avoid losing lock */
persistent_filter_load(f, &cft);
@@ -352,7 +354,7 @@ struct dev_filter *persistent_filter_create(struct dev_types *dt,
/* Only merge cache file before dumping it if it changed externally. */
if (!stat(pf->file, &info))
- pf->ctime = info.st_ctime;
+ lvm_stat_ctim(&pf->ctime, &info);
f->passes_filter = _lookup_p;
f->destroy = _persistent_destroy;
diff --git a/lib/filters/filter-usable.c b/lib/filters/filter-usable.c
index f34a325fc..a4cfc69ea 100644
--- a/lib/filters/filter-usable.c
+++ b/lib/filters/filter-usable.c
@@ -15,6 +15,101 @@
#include "lib.h"
#include "filter.h"
#include "activate.h" /* device_is_usable */
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#include "dev-ext-udev-constants.h"
+#endif
+
+static const char *_too_small_to_hold_pv_msg = "Too small to hold a PV";
+
+static int _native_check_pv_min_size(struct device *dev)
+{
+ uint64_t size;
+ int ret = 0;
+
+ /* Check it's accessible */
+ if (!dev_open_readonly_quiet(dev)) {
+ log_debug_devs("%s: Skipping: open failed [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ return 0;
+ }
+
+ /* Check it's not too small */
+ if (!dev_get_size(dev, &size)) {
+ log_debug_devs("%s: Skipping: dev_get_size failed [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ goto out;
+ }
+
+ if (size < pv_min_size()) {
+ log_debug_devs("%s: Skipping: %s [%s:%p]", dev_name(dev),
+ _too_small_to_hold_pv_msg,
+ dev_ext_name(dev), dev->ext.handle);
+ goto out;
+ }
+
+ ret = 1;
+out:
+ if (!dev_close(dev))
+ stack;
+
+ return ret;
+}
+
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_check_pv_min_size(struct device *dev)
+{
+ struct dev_ext *ext;
+ const char *size_str;
+ char *endp;
+ uint64_t size;
+
+ if (!(ext = dev_ext_get(dev)))
+ return_0;
+
+ if (!(size_str = udev_device_get_sysattr_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_SYSFS_ATTR_SIZE))) {
+ log_debug_devs("%s: Skipping: failed to get size from sysfs [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ return 0;
+ }
+
+ errno = 0;
+ size = strtoull(size_str, &endp, 10);
+ if (errno || !endp || *endp) {
+ log_debug_devs("%s: Skipping: failed to parse size from sysfs [%s:%p]",
+ dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+ return 0;
+ }
+
+ if (size < pv_min_size()) {
+ log_debug_devs("%s: Skipping: %s [%s:%p]", dev_name(dev),
+ _too_small_to_hold_pv_msg,
+ dev_ext_name(dev), dev->ext.handle);
+ return 0;
+ }
+
+ return 1;
+}
+#else
+static int _udev_check_pv_min_size(struct device *dev)
+{
+ return 1;
+}
+#endif
+
+static int _check_pv_min_size(struct device *dev)
+{
+ if (dev->ext.src == DEV_EXT_NONE)
+ return _native_check_pv_min_size(dev);
+
+ if (dev->ext.src == DEV_EXT_UDEV)
+ return _udev_check_pv_min_size(dev);
+
+ log_error(INTERNAL_ERROR "Missing hook for PV min size check "
+ "using external device info source %s", dev_ext_name(dev));
+
+ return 0;
+}
static int _passes_usable_filter(struct dev_filter *f, struct device *dev)
{
@@ -22,7 +117,20 @@ static int _passes_usable_filter(struct dev_filter *f, struct device *dev)
struct dev_usable_check_params ucp = {0};
int r;
- /* filter only device-mapper devices */
+ /* check if the device is not too small to hold a PV */
+ switch (mode) {
+ case FILTER_MODE_NO_LVMETAD:
+ /* fall through */
+ case FILTER_MODE_PRE_LVMETAD:
+ if (!_check_pv_min_size(dev))
+ return 0;
+ break;
+ case FILTER_MODE_POST_LVMETAD:
+ /* nothing to do here */
+ break;
+ }
+
+ /* further checks are done on dm devices only */
if (!dm_is_dm_major(MAJOR(dev->dev)))
return 1;
diff --git a/lib/filters/filter.h b/lib/filters/filter.h
index 0519439e9..c03d8e233 100644
--- a/lib/filters/filter.h
+++ b/lib/filters/filter.h
@@ -20,9 +20,11 @@
#include "dev-cache.h"
#include "dev-type.h"
-struct dev_filter *composite_filter_create(int n, struct dev_filter **filters);
+struct dev_filter *composite_filter_create(int n, int use_dev_ext_info, struct dev_filter **filters);
+
struct dev_filter *lvm_type_filter_create(struct dev_types *dt);
struct dev_filter *md_filter_create(struct dev_types *dt);
+struct dev_filter *fwraid_filter_create(struct dev_types *dt);
struct dev_filter *mpath_filter_create(struct dev_types *dt);
struct dev_filter *partitioned_filter_create(struct dev_types *dt);
struct dev_filter *persistent_filter_create(struct dev_types *dt,
diff --git a/lib/format1/disk-rep.h b/lib/format1/disk-rep.h
index 729601ee8..9d1c8a5cc 100644
--- a/lib/format1/disk-rep.h
+++ b/lib/format1/disk-rep.h
@@ -245,4 +245,6 @@ int get_free_vg_number(struct format_instance *fid, struct dev_filter *filter,
int export_vg_number(struct format_instance *fid, struct dm_list *pvds,
const char *vg_name, struct dev_filter *filter);
+int generate_lvm1_system_id(struct cmd_context *cmd, char *s, const char *prefix);
+
#endif
diff --git a/lib/format1/format1.c b/lib/format1/format1.c
index b0943b165..1b86ef5d6 100644
--- a/lib/format1/format1.c
+++ b/lib/format1/format1.c
@@ -180,6 +180,8 @@ out:
static struct volume_group *_format1_vg_read(struct format_instance *fid,
const char *vg_name,
struct metadata_area *mda __attribute__((unused)),
+ struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
+ unsigned *use_previous_vg __attribute__((unused)),
int single_device __attribute__((unused)))
{
struct volume_group *vg;
@@ -496,6 +498,11 @@ static int _format1_vg_setup(struct format_instance *fid, struct volume_group *v
if (!vg_check_new_extent_size(vg->fid->fmt, vg->extent_size))
return_0;
+ /* Generate lvm1_system_id if not yet set */
+ if (!*vg->lvm1_system_id &&
+ !generate_lvm1_system_id(vg->cmd, vg->lvm1_system_id, ""))
+ return_0;
+
return 1;
}
@@ -590,7 +597,8 @@ struct format_type *init_format(struct cmd_context *cmd)
fmt->alias = NULL;
fmt->orphan_vg_name = FMT_LVM1_ORPHAN_VG_NAME;
fmt->features = FMT_RESTRICTED_LVIDS | FMT_ORPHAN_ALLOCATABLE |
- FMT_RESTRICTED_READAHEAD | FMT_OBSOLETE;
+ FMT_RESTRICTED_READAHEAD | FMT_OBSOLETE |
+ FMT_SYSTEMID_ON_PVS;
fmt->private = NULL;
dm_list_init(&fmt->mda_ops);
diff --git a/lib/format1/import-export.c b/lib/format1/import-export.c
index 8aa4d3323..9e49a969c 100644
--- a/lib/format1/import-export.c
+++ b/lib/format1/import-export.c
@@ -69,14 +69,14 @@ int import_pv(const struct format_type *fmt, struct dm_pool *mem,
memcpy(&pv->vgid, vgd->vg_uuid, sizeof(vg->id));
/* Store system_id from first PV if PV belongs to a VG */
- if (vg && !*vg->system_id)
- strncpy(vg->system_id, (char *)pvd->system_id, NAME_LEN);
+ if (vg && !*vg->lvm1_system_id)
+ strncpy(vg->lvm1_system_id, (char *)pvd->system_id, NAME_LEN);
if (vg &&
- strncmp(vg->system_id, (char *)pvd->system_id, sizeof(pvd->system_id)))
+ strncmp(vg->lvm1_system_id, (char *)pvd->system_id, sizeof(pvd->system_id)))
log_very_verbose("System ID %s on %s differs from %s for "
"volume group", pvd->system_id,
- pv_dev_name(pv), vg->system_id);
+ pv_dev_name(pv), vg->lvm1_system_id);
/*
* If exported, we still need to flag in pv->status too because
@@ -125,12 +125,12 @@ int import_pv(const struct format_type *fmt, struct dm_pool *mem,
return 1;
}
-static int _system_id(struct cmd_context *cmd, char *s, const char *prefix)
+int generate_lvm1_system_id(struct cmd_context *cmd, char *s, const char *prefix)
{
if (dm_snprintf(s, NAME_LEN, "%s%s%lu",
prefix, cmd->hostname, time(NULL)) < 0) {
- log_error("Generated system_id too long");
+ log_error("Generated LVM1 format system_id too long");
return 0;
}
@@ -156,16 +156,18 @@ int export_pv(struct cmd_context *cmd, struct dm_pool *mem __attribute__((unused
}
/* Preserve existing system_id if it exists */
- if (vg && *vg->system_id)
+ if (vg && vg->lvm1_system_id && *vg->lvm1_system_id)
+ strncpy((char *)pvd->system_id, vg->lvm1_system_id, sizeof(pvd->system_id));
+ else if (vg && vg->system_id && *vg->system_id)
strncpy((char *)pvd->system_id, vg->system_id, sizeof(pvd->system_id));
/* Is VG already exported or being exported? */
if (vg && vg_is_exported(vg)) {
/* Does system_id need setting? */
- if (!*vg->system_id ||
- strncmp(vg->system_id, EXPORTED_TAG,
+ if (!vg->lvm1_system_id || !*vg->lvm1_system_id ||
+ strncmp(vg->lvm1_system_id, EXPORTED_TAG,
sizeof(EXPORTED_TAG) - 1)) {
- if (!_system_id(cmd, (char *)pvd->system_id, EXPORTED_TAG))
+ if (!generate_lvm1_system_id(cmd, (char *)pvd->system_id, EXPORTED_TAG))
return_0;
}
if (strlen((char *)pvd->vg_name) + sizeof(EXPORTED_TAG) >
@@ -178,22 +180,22 @@ int export_pv(struct cmd_context *cmd, struct dm_pool *mem __attribute__((unused
}
/* Is VG being imported? */
- if (vg && !vg_is_exported(vg) && *vg->system_id &&
- !strncmp(vg->system_id, EXPORTED_TAG, sizeof(EXPORTED_TAG) - 1)) {
- if (!_system_id(cmd, (char *)pvd->system_id, IMPORTED_TAG))
+ if (vg && !vg_is_exported(vg) && vg->lvm1_system_id && *vg->lvm1_system_id &&
+ !strncmp(vg->lvm1_system_id, EXPORTED_TAG, sizeof(EXPORTED_TAG) - 1)) {
+ if (!generate_lvm1_system_id(cmd, (char *)pvd->system_id, IMPORTED_TAG))
return_0;
}
/* Generate system_id if PV is in VG */
if (!pvd->system_id[0])
- if (!_system_id(cmd, (char *)pvd->system_id, ""))
+ if (!generate_lvm1_system_id(cmd, (char *)pvd->system_id, ""))
return_0;
/* Update internal system_id if we changed it */
- if (vg &&
- (!*vg->system_id ||
- strncmp(vg->system_id, (char *)pvd->system_id, sizeof(pvd->system_id))))
- strncpy(vg->system_id, (char *)pvd->system_id, NAME_LEN);
+ if (vg && vg->lvm1_system_id &&
+ (!*vg->lvm1_system_id ||
+ strncmp(vg->lvm1_system_id, (char *)pvd->system_id, sizeof(pvd->system_id))))
+ strncpy(vg->lvm1_system_id, (char *)pvd->system_id, NAME_LEN);
//pvd->pv_major = MAJOR(pv->dev);
@@ -225,11 +227,9 @@ int import_vg(struct dm_pool *mem,
if (!(vg->name = dm_pool_strdup(mem, (char *)dl->pvd.vg_name)))
return_0;
- if (!(vg->system_id = dm_pool_zalloc(mem, NAME_LEN + 1)))
+ if (!(vg->lvm1_system_id = dm_pool_zalloc(mem, NAME_LEN + 1)))
return_0;
- *vg->system_id = '\0';
-
if (vgd->vg_status & VG_EXPORTED)
vg->status |= EXPORTED_VG;
diff --git a/lib/format1/import-extents.c b/lib/format1/import-extents.c
index 3fd251a5f..2829c4197 100644
--- a/lib/format1/import-extents.c
+++ b/lib/format1/import-extents.c
@@ -226,7 +226,7 @@ static int _read_linear(struct cmd_context *cmd, struct lv_map *lvm)
len = _area_length(lvm, le);
if (!(seg = alloc_lv_segment(segtype, lvm->lv, le, len, 0, 0, 0,
- NULL, 1, len, 0, 0, 0, NULL))) {
+ NULL, 1, len, 1, 0, 0, 0, NULL))) {
log_error("Failed to allocate linear segment.");
return 0;
}
@@ -300,7 +300,7 @@ static int _read_stripes(struct cmd_context *cmd, struct lv_map *lvm)
lvm->stripes * area_len,
0, 0, lvm->stripe_size, NULL,
lvm->stripes,
- area_len, 0, 0, 0, NULL))) {
+ area_len, 1, 0, 0, 0, NULL))) {
log_error("Failed to allocate striped segment.");
return 0;
}
diff --git a/lib/format_pool/format_pool.c b/lib/format_pool/format_pool.c
index 503005a7e..2a8819d5d 100644
--- a/lib/format_pool/format_pool.c
+++ b/lib/format_pool/format_pool.c
@@ -101,6 +101,8 @@ static int _check_usp(const char *vgname, struct user_subpool *usp, int sp_count
static struct volume_group *_pool_vg_read(struct format_instance *fid,
const char *vg_name,
struct metadata_area *mda __attribute__((unused)),
+ struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
+ unsigned *use_previous_vg __attribute__((unused)),
int single_device __attribute__((unused)))
{
struct volume_group *vg;
diff --git a/lib/format_pool/import_export.c b/lib/format_pool/import_export.c
index 6c4e83d04..9dc26757b 100644
--- a/lib/format_pool/import_export.c
+++ b/lib/format_pool/import_export.c
@@ -195,7 +195,7 @@ static int _add_stripe_seg(struct dm_pool *mem,
if (!(seg = alloc_lv_segment(segtype, lv, *le_cur,
area_len * usp->num_devs, 0, 0,
usp->striping, NULL, usp->num_devs,
- area_len, 0, 0, 0, NULL))) {
+ area_len, 1, 0, 0, 0, NULL))) {
log_error("Unable to allocate striped lv_segment structure");
return 0;
}
@@ -234,7 +234,7 @@ static int _add_linear_seg(struct dm_pool *mem,
if (!(seg = alloc_lv_segment(segtype, lv, *le_cur,
area_len, 0, 0, usp->striping,
- NULL, 1, area_len,
+ NULL, 1, area_len, 1,
POOL_PE_SIZE, 0, 0, NULL))) {
log_error("Unable to allocate linear lv_segment "
"structure");
diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c
index a2f40f2df..e3d3d570c 100644
--- a/lib/format_text/archiver.c
+++ b/lib/format_text/archiver.c
@@ -308,7 +308,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd,
}
dm_list_iterate_items(mda, &tf->metadata_areas_in_use) {
- if (!(vg = mda->ops->vg_read(tf, vg_name, mda, 0)))
+ if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0)))
stack;
break;
}
diff --git a/lib/format_text/export.c b/lib/format_text/export.c
index 2e06ea9f8..20624d2c7 100644
--- a/lib/format_text/export.c
+++ b/lib/format_text/export.c
@@ -21,6 +21,7 @@
#include "segtype.h"
#include "text_export.h"
#include "lvm-version.h"
+#include "toolcontext.h"
#include <stdarg.h>
#include <time.h>
@@ -327,7 +328,7 @@ int out_config_node(struct formatter *f, const struct dm_config_node *cn)
return dm_config_write_node(cn, _out_line, f);
}
-static int _print_header(struct formatter *f,
+static int _print_header(struct cmd_context *cmd, struct formatter *f,
const char *desc)
{
char *buf;
@@ -350,6 +351,8 @@ static int _print_header(struct formatter *f,
outf(f, "creation_host = \"%s\"\t# %s %s %s %s %s", _utsname.nodename,
_utsname.sysname, _utsname.nodename, _utsname.release,
_utsname.version, _utsname.machine);
+ if (cmd->system_id && *cmd->system_id)
+ outf(f, "creation_host_system_id = \"%s\"", cmd->system_id);
outf(f, "creation_time = %lu\t# %s", t, ctime(&t));
return 1;
@@ -358,6 +361,7 @@ static int _print_header(struct formatter *f,
static int _print_flag_config(struct formatter *f, uint64_t status, int type)
{
char buffer[4096];
+
if (!print_flags(status, type | STATUS_FLAG, buffer, sizeof(buffer)))
return_0;
outf(f, "status = %s", buffer);
@@ -369,19 +373,61 @@ static int _print_flag_config(struct formatter *f, uint64_t status, int type)
return 1;
}
+static char *_alloc_printed_str_list(struct dm_list *list)
+{
+ struct dm_str_list *sl;
+ int first = 1;
+ size_t size = 0;
+ char *buffer, *buf;
+
+ dm_list_iterate_items(sl, list)
+ /* '"' + item + '"' + ',' + ' ' */
+ size += strlen(sl->str) + 4;
+ /* '[' + ']' + '\0' */
+ size += 3;
+
+ if (!(buffer = buf = dm_malloc(size))) {
+ log_error("Could not allocate memory for string list buffer.");
+ return NULL;
+ }
+
+ if (!emit_to_buffer(&buf, &size, "["))
+ goto_bad;
+
+ dm_list_iterate_items(sl, list) {
+ if (!first) {
+ if (!emit_to_buffer(&buf, &size, ", "))
+ goto_bad;
+ } else
+ first = 0;
+
+ if (!emit_to_buffer(&buf, &size, "\"%s\"", sl->str))
+ goto_bad;
+ }
+
+ if (!emit_to_buffer(&buf, &size, "]"))
+ goto_bad;
+
+ return buffer;
+
+bad:
+ dm_free(buffer);
+ return_NULL;
+}
-static int _out_tags(struct formatter *f, struct dm_list *tagsl)
+static int _out_list(struct formatter *f, struct dm_list *list,
+ const char *list_name)
{
- char *tag_buffer;
+ char *buffer;
- if (!dm_list_empty(tagsl)) {
- if (!(tag_buffer = alloc_printed_tags(tagsl)))
+ if (!dm_list_empty(list)) {
+ if (!(buffer = _alloc_printed_str_list(list)))
return_0;
- if (!out_text(f, "tags = %s", tag_buffer)) {
- dm_free(tag_buffer);
+ if (!out_text(f, "%s = %s", list_name, buffer)) {
+ dm_free(buffer);
return_0;
}
- dm_free(tag_buffer);
+ dm_free(buffer);
}
return 1;
@@ -390,6 +436,8 @@ static int _out_tags(struct formatter *f, struct dm_list *tagsl)
static int _print_vg(struct formatter *f, struct volume_group *vg)
{
char buffer[4096];
+ const struct format_type *fmt = NULL;
+ uint64_t status = vg->status;
if (!id_write_format(&vg->id, buffer, sizeof(buffer)))
return_0;
@@ -398,17 +446,38 @@ static int _print_vg(struct formatter *f, struct volume_group *vg)
outf(f, "seqno = %u", vg->seqno);
- if (vg->fid && vg->fid->fmt)
- outfc(f, "# informational", "format = \"%s\"", vg->fid->fmt->name);
+ if (vg->original_fmt)
+ fmt = vg->original_fmt;
+ else if (vg->fid)
+ fmt = vg->fid->fmt;
+ if (fmt)
+ outfc(f, "# informational", "format = \"%s\"", fmt->name);
- if (!_print_flag_config(f, vg->status, VG_FLAGS))
- return_0;
+ /*
+ * Removing WRITE and adding LVM_WRITE_LOCKED makes it read-only
+ * to old versions of lvm that only look for LVM_WRITE.
+ */
+ if ((status & LVM_WRITE) && vg_flag_write_locked(vg)) {
+ status &= ~LVM_WRITE;
+ status |= LVM_WRITE_LOCKED;
+ }
- if (!_out_tags(f, &vg->tags))
+ if (!_print_flag_config(f, status, VG_FLAGS))
return_0;
+ if (!_out_list(f, &vg->tags, "tags"))
+ return_0;
+
if (vg->system_id && *vg->system_id)
outf(f, "system_id = \"%s\"", vg->system_id);
+ else if (vg->lvm1_system_id && *vg->lvm1_system_id)
+ outf(f, "system_id = \"%s\"", vg->lvm1_system_id);
+
+ if (vg->lock_type) {
+ outf(f, "lock_type = \"%s\"", vg->lock_type);
+ if (vg->lock_args)
+ outf(f, "lock_args = \"%s\"", vg->lock_args);
+ }
outsize(f, (uint64_t) vg->extent_size, "extent_size = %u",
vg->extent_size);
@@ -486,7 +555,7 @@ static int _print_pvs(struct formatter *f, struct volume_group *vg)
if (!_print_flag_config(f, pv->status, PV_FLAGS))
return_0;
- if (!_out_tags(f, &pv->tags))
+ if (!_out_list(f, &pv->tags, "tags"))
return_0;
outsize(f, pv->size, "dev_size = %" PRIu64, pv->size);
@@ -525,7 +594,7 @@ static int _print_segment(struct formatter *f, struct volume_group *vg,
outnl(f);
outf(f, "type = \"%s\"", seg->segtype->name);
- if (!_out_tags(f, &seg->tags))
+ if (!_out_list(f, &seg->tags, "tags"))
return_0;
if (seg->segtype->ops->text_export &&
@@ -568,10 +637,12 @@ int out_areas(struct formatter *f, const struct lv_segment *seg,
continue;
}
- /* RAID devices are laid-out in metadata/data pairs */
+ /* RAID devices are laid-out in metadata/data pairs (unless raid0 which is w/o metadata) */
+PFLA("seg_lv(seg, %u)->name=%s", s, seg_lv(seg, s)->name);
if (!lv_is_raid_image(seg_lv(seg, s)) ||
(seg->meta_areas && seg_metalv(seg, s) && !lv_is_raid_metadata(seg_metalv(seg, s)))) {
- log_error("RAID segment has non-RAID areas");
+PFLA("image=%u, meta=%u", lv_is_raid_image(seg_lv(seg, s)), (seg->meta_areas && seg_metalv(seg, s) && lv_is_raid_metadata(seg_metalv(seg, s))) ? 1 : 0);
+ log_error("RAID segment of %s has non-RAID areas", display_lvname(seg->lv));
return 0;
}
@@ -597,6 +668,7 @@ static int _print_lv(struct formatter *f, struct logical_volume *lv)
int seg_count;
struct tm *local_tm;
time_t ts;
+ uint64_t status = lv->status;
#if 0
/* HM FIXME: workaround for empty metadata lvs with raid0 */
@@ -613,10 +685,19 @@ static int _print_lv(struct formatter *f, struct logical_volume *lv)
outf(f, "id = \"%s\"", buffer);
- if (!_print_flag_config(f, lv->status, LV_FLAGS))
+ /*
+ * Removing WRITE and adding LVM_WRITE_LOCKED makes it read-only
+ * to old versions of lvm that only look for LVM_WRITE.
+ */
+ if ((status & LVM_WRITE) && vg_flag_write_locked(lv->vg)) {
+ status &= ~LVM_WRITE;
+ status |= LVM_WRITE_LOCKED;
+ }
+
+ if (!_print_flag_config(f, status, LV_FLAGS))
return_0;
- if (!_out_tags(f, &lv->tags))
+ if (!_out_list(f, &lv->tags, "tags"))
return_0;
if (lv->timestamp) {
@@ -632,6 +713,9 @@ static int _print_lv(struct formatter *f, struct logical_volume *lv)
lv->timestamp);
}
+ if (lv->lock_args)
+ outf(f, "lock_args = \"%s\"", lv->lock_args);
+
if (lv->alloc != ALLOC_INHERIT)
outf(f, "allocation_policy = \"%s\"",
get_alloc_string(lv->alloc));
@@ -752,7 +836,7 @@ static int _text_vg_export(struct formatter *f,
if (!_build_pv_names(f, vg))
goto_out;
- if (f->header && !_print_header(f, desc))
+ if (f->header && !_print_header(vg->cmd, f, desc))
goto_out;
if (!out_text(f, "%s {", vg->name))
@@ -775,7 +859,7 @@ static int _text_vg_export(struct formatter *f,
if (!out_text(f, "}"))
goto_out;
- if (!f->header && !_print_header(f, desc))
+ if (!f->header && !_print_header(vg->cmd, f, desc))
goto_out;
r = 1;
diff --git a/lib/format_text/flags.c b/lib/format_text/flags.c
index 8294b27bf..90ca2bd86 100644
--- a/lib/format_text/flags.c
+++ b/lib/format_text/flags.c
@@ -34,6 +34,7 @@ static const struct flag _vg_flags[] = {
{PVMOVE, "PVMOVE", STATUS_FLAG},
{LVM_READ, "READ", STATUS_FLAG},
{LVM_WRITE, "WRITE", STATUS_FLAG},
+ {LVM_WRITE_LOCKED, "WRITE_LOCKED", COMPATIBLE_FLAG},
{CLUSTERED, "CLUSTERED", STATUS_FLAG},
{SHARED, "SHARED", STATUS_FLAG},
{PARTIAL_VG, NULL, 0},
@@ -53,6 +54,7 @@ static const struct flag _pv_flags[] = {
static const struct flag _lv_flags[] = {
{LVM_READ, "READ", STATUS_FLAG},
{LVM_WRITE, "WRITE", STATUS_FLAG},
+ {LVM_WRITE_LOCKED, "WRITE_LOCKED", COMPATIBLE_FLAG},
{FIXED_MINOR, "FIXED_MINOR", STATUS_FLAG},
{VISIBLE_LV, "VISIBLE", STATUS_FLAG},
{PVMOVE, "PVMOVE", STATUS_FLAG},
@@ -61,11 +63,15 @@ static const struct flag _lv_flags[] = {
{LV_REBUILD, "REBUILD", STATUS_FLAG},
{LV_RESHAPE_DELTA_DISKS_PLUS, "RESHAPE_DELTA_DISKS_PLUS", STATUS_FLAG},
{LV_RESHAPE_DELTA_DISKS_MINUS, "RESHAPE_DELTA_DISKS_MINUS", STATUS_FLAG},
+ {LV_RESHAPE_REMOVED, "RESHAPE_REMOVED", STATUS_FLAG},
+ {LV_DUPLICATED, "DUPLICATED", STATUS_FLAG},
{LV_WRITEMOSTLY, "WRITEMOSTLY", STATUS_FLAG},
{LV_ACTIVATION_SKIP, "ACTIVATION_SKIP", COMPATIBLE_FLAG},
+ {LV_ERROR_WHEN_FULL, "ERROR_WHEN_FULL", COMPATIBLE_FLAG},
{LV_NOSCAN, NULL, 0},
{LV_TEMPORARY, NULL, 0},
{POOL_METADATA_SPARE, NULL, 0},
+ {LOCKD_SANLOCK_LV, NULL, 0},
{RAID, NULL, 0},
{RAID_META, NULL, 0},
{RAID_IMAGE, NULL, 0},
@@ -91,6 +97,7 @@ static const struct flag _lv_flags[] = {
{CACHE_POOL_DATA, NULL, 0},
{CACHE_POOL_METADATA, NULL, 0},
{LV_PENDING_DELETE, NULL, 0}, /* FIXME Display like COMPATIBLE_FLAG */
+ {LV_REMOVED, NULL, 0},
{0, NULL, 0}
};
@@ -155,7 +162,7 @@ int print_flags(uint64_t status, int type, char *buffer, size_t size)
if (status)
log_warn(INTERNAL_ERROR "Metadata inconsistency: "
- "Not all flags successfully exported.");
+ "Not all flags successfully exported (0x%lx).", status);
return 1;
}
diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
index c142e1b35..4fafc8147 100644
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -412,6 +412,11 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
char vgnamebuf[NAME_LEN + 2] __attribute__((aligned(8)));
struct raw_locn *rlocn, *rlocn_precommitted;
struct lvmcache_info *info;
+ struct lvmcache_vgsummary vgsummary_orphan = {
+ .vgname = FMT_TEXT_ORPHAN_VG_NAME,
+ };
+
+ memcpy(&vgsummary_orphan.vgid, FMT_TEXT_ORPHAN_VG_NAME, sizeof(FMT_TEXT_ORPHAN_VG_NAME));
rlocn = mdah->raw_locns; /* Slot 0 */
rlocn_precommitted = rlocn + 1; /* Slot 1 */
@@ -448,9 +453,9 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
"not match expected name %s.", vgname);
bad:
- if ((info = lvmcache_info_from_pvid(dev_area->dev->pvid, 0)))
- lvmcache_update_vgname_and_id(info, FMT_TEXT_ORPHAN_VG_NAME,
- FMT_TEXT_ORPHAN_VG_NAME, 0, NULL);
+ if ((info = lvmcache_info_from_pvid(dev_area->dev->pvid, 0)) &&
+ !lvmcache_update_vgname_and_id(info, &vgsummary_orphan))
+ stack;
return NULL;
}
@@ -498,6 +503,8 @@ static int _raw_holds_vgname(struct format_instance *fid,
static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
const char *vgname,
struct device_area *area,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg,
int precommitted,
int single_device)
{
@@ -526,19 +533,26 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
}
/* FIXME 64-bit */
- if (!(vg = text_vg_import_fd(fid, NULL, single_device, area->dev,
+ if (!(vg = text_vg_import_fd(fid, NULL, vg_fmtdata, use_previous_vg, single_device, area->dev,
(off_t) (area->start + rlocn->offset),
(uint32_t) (rlocn->size - wrap),
(off_t) (area->start + MDA_HEADER_SIZE),
wrap, calc_crc, rlocn->checksum, &when,
- &desc)))
+ &desc)) && (!use_previous_vg || !*use_previous_vg))
goto_out;
- log_debug_metadata("Read %s %smetadata (%u) from %s at %" PRIu64 " size %"
- PRIu64, vg->name, precommitted ? "pre-commit " : "",
- vg->seqno, dev_name(area->dev),
- area->start + rlocn->offset, rlocn->size);
- if (precommitted)
+ if (vg)
+ log_debug_metadata("Read %s %smetadata (%u) from %s at %" PRIu64 " size %"
+ PRIu64, vg->name, precommitted ? "pre-commit " : "",
+ vg->seqno, dev_name(area->dev),
+ area->start + rlocn->offset, rlocn->size);
+ else
+ log_debug_metadata("Skipped reading %smetadata from %s at %" PRIu64 " size %"
+ PRIu64 " with matching checksum.", precommitted ? "pre-commit " : "",
+ dev_name(area->dev),
+ area->start + rlocn->offset, rlocn->size);
+
+ if (vg && precommitted)
vg->status |= PRECOMMITTED;
out:
@@ -548,6 +562,8 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
static struct volume_group *_vg_read_raw(struct format_instance *fid,
const char *vgname,
struct metadata_area *mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg,
int single_device)
{
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
@@ -556,7 +572,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid,
if (!dev_open_readonly(mdac->area.dev))
return_NULL;
- vg = _vg_read_raw_area(fid, vgname, &mdac->area, 0, single_device);
+ vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, single_device);
if (!dev_close(mdac->area.dev))
stack;
@@ -566,7 +582,9 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid,
static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid,
const char *vgname,
- struct metadata_area *mda)
+ struct metadata_area *mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg)
{
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
struct volume_group *vg;
@@ -574,7 +592,7 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid,
if (!dev_open_readonly(mdac->area.dev))
return_NULL;
- vg = _vg_read_raw_area(fid, vgname, &mdac->area, 1, 0);
+ vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, 0);
if (!dev_close(mdac->area.dev))
stack;
@@ -885,6 +903,8 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid,
static struct volume_group *_vg_read_file(struct format_instance *fid,
const char *vgname,
struct metadata_area *mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg __attribute__((unused)),
int single_device __attribute__((unused)))
{
struct text_context *tc = (struct text_context *) mda->metadata_locn;
@@ -894,7 +914,9 @@ static struct volume_group *_vg_read_file(struct format_instance *fid,
static struct volume_group *_vg_read_precommit_file(struct format_instance *fid,
const char *vgname,
- struct metadata_area *mda)
+ struct metadata_area *mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg __attribute__((unused)))
{
struct text_context *tc = (struct text_context *) mda->metadata_locn;
struct volume_group *vg;
@@ -1123,26 +1145,24 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)
return 1;
}
-const char *vgname_from_mda(const struct format_type *fmt,
- struct mda_header *mdah,
- struct device_area *dev_area, struct id *vgid,
- uint64_t *vgstatus, char **creation_host,
- uint64_t *mda_free_sectors)
+int vgname_from_mda(const struct format_type *fmt,
+ struct mda_header *mdah, struct device_area *dev_area,
+ struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors)
{
struct raw_locn *rlocn;
uint32_t wrap = 0;
- const char *vgname = NULL;
unsigned int len = 0;
char buf[NAME_LEN + 1] __attribute__((aligned(8)));
char uuid[64] __attribute__((aligned(8)));
uint64_t buffer_size, current_usage;
+ unsigned used_cached_metadata = 0;
if (mda_free_sectors)
*mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT;
if (!mdah) {
log_error(INTERNAL_ERROR "vgname_from_mda called with NULL pointer for mda_header");
- goto_out;
+ return 0;
}
/* FIXME Cope with returning a list */
@@ -1151,13 +1171,16 @@ const char *vgname_from_mda(const struct format_type *fmt,
/*
* If no valid offset, do not try to search for vgname
*/
- if (!rlocn->offset)
- goto out;
+ if (!rlocn->offset) {
+ log_debug("%s: found metadata with offset 0.",
+ dev_name(dev_area->dev));
+ return 0;
+ }
/* Do quick check for a vgname */
if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset,
NAME_LEN, buf))
- goto_out;
+ return_0;
while (buf[len] && !isspace(buf[len]) && buf[len] != '{' &&
len < (NAME_LEN - 1))
@@ -1167,7 +1190,7 @@ const char *vgname_from_mda(const struct format_type *fmt,
/* Ignore this entry if the characters aren't permissible */
if (!validate_name(buf))
- goto_out;
+ return_0;
/* We found a VG - now check the metadata */
if (rlocn->offset + rlocn->size > mdah->size)
@@ -1176,36 +1199,39 @@ const char *vgname_from_mda(const struct format_type *fmt,
if (wrap > rlocn->offset) {
log_error("%s: metadata too large for circular buffer",
dev_name(dev_area->dev));
- goto out;
+ return 0;
}
+ /* Did we see this metadata before? */
+ vgsummary->mda_checksum = rlocn->checksum;
+ vgsummary->mda_size = rlocn->size;
+
+ if (lvmcache_lookup_mda(vgsummary))
+ used_cached_metadata = 1;
+
/* FIXME 64-bit */
- if (!(vgname = text_vgname_import(fmt, dev_area->dev,
- (off_t) (dev_area->start +
- rlocn->offset),
- (uint32_t) (rlocn->size - wrap),
- (off_t) (dev_area->start +
- MDA_HEADER_SIZE),
- wrap, calc_crc, rlocn->checksum,
- vgid, vgstatus, creation_host)))
- goto_out;
+ if (!text_vgname_import(fmt, dev_area->dev,
+ (off_t) (dev_area->start + rlocn->offset),
+ (uint32_t) (rlocn->size - wrap),
+ (off_t) (dev_area->start + MDA_HEADER_SIZE),
+ wrap, calc_crc, vgsummary->vgname ? 1 : 0,
+ vgsummary))
+ return_0;
/* Ignore this entry if the characters aren't permissible */
- if (!validate_name(vgname)) {
- vgname = NULL;
- goto_out;
- }
+ if (!validate_name(vgsummary->vgname))
+ return_0;
- if (!id_write_format(vgid, uuid, sizeof(uuid))) {
- vgname = NULL;
- goto_out;
- }
+ if (!id_write_format((struct id *)&vgsummary->vgid, uuid, sizeof(uuid)))
+ return_0;
- log_debug_metadata("%s: Found metadata at %" PRIu64 " size %" PRIu64
+ log_debug_metadata("%s: %s metadata at %" PRIu64 " size %" PRIu64
" (in area at %" PRIu64 " size %" PRIu64
") for %s (%s)",
- dev_name(dev_area->dev), dev_area->start + rlocn->offset,
- rlocn->size, dev_area->start, dev_area->size, vgname, uuid);
+ dev_name(dev_area->dev),
+ used_cached_metadata ? "Using cached" : "Found",
+ dev_area->start + rlocn->offset,
+ rlocn->size, dev_area->start, dev_area->size, vgsummary->vgname, uuid);
if (mda_free_sectors) {
current_usage = (rlocn->size + SECTOR_SIZE - UINT64_C(1)) -
@@ -1218,19 +1244,16 @@ const char *vgname_from_mda(const struct format_type *fmt,
*mda_free_sectors = ((buffer_size - 2 * current_usage) / 2) >> SECTOR_SHIFT;
}
- out:
- return vgname;
+ return 1;
}
static int _scan_raw(const struct format_type *fmt, const char *vgname __attribute__((unused)))
{
struct raw_list *rl;
struct dm_list *raw_list;
- const char *scanned_vgname;
struct volume_group *vg;
struct format_instance fid;
- struct id vgid;
- uint64_t vgstatus;
+ struct lvmcache_vgsummary vgsummary = { 0 };
struct mda_header *mdah;
raw_list = &((struct mda_lists *) fmt->private)->raws;
@@ -1251,13 +1274,11 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu
goto close_dev;
}
- if ((scanned_vgname = vgname_from_mda(fmt, mdah,
- &rl->dev_area, &vgid, &vgstatus,
- NULL, NULL))) {
- vg = _vg_read_raw_area(&fid, scanned_vgname, &rl->dev_area, 0, 0);
+ /* TODO: caching as in vgname_from_mda() (trigger this code?) */
+ if (vgname_from_mda(fmt, mdah, &rl->dev_area, &vgsummary, NULL)) {
+ vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0);
if (vg)
lvmcache_update_vg(vg, 0);
-
}
close_dev:
if (!dev_close(rl->dev_area.dev))
@@ -1298,7 +1319,7 @@ static int _write_single_mda(struct metadata_area *mda, void *baton)
return 1;
}
-/* Only for orphans */
+/* Only for orphans - FIXME That's not true any more */
static int _text_pv_write(const struct format_type *fmt, struct physical_volume *pv)
{
struct format_instance *fid = pv->fid;
@@ -1310,9 +1331,11 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume
struct _write_single_mda_baton baton;
unsigned mda_index;
+PFL();
/* Add a new cache entry with PV info or update existing one. */
if (!(info = lvmcache_add(fmt->labeller, (const char *) &pv->id,
- pv->dev, pv->vg_name, NULL, 0)))
+ pv->dev, pv->vg_name,
+ is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0)))
return_0;
label = lvmcache_get_label(info);
@@ -1582,9 +1605,9 @@ static int _text_pv_initialise(const struct format_type *fmt,
if (rp->extent_count)
pv->pe_count = rp->extent_count;
- if ((pv->pe_start + pv->pe_count * pv->pe_size - 1) > (pv->size << SECTOR_SHIFT)) {
+ if ((pv->pe_start + pv->pe_count * (uint64_t)pv->pe_size - 1) > pv->size) {
log_error("Physical extents end beyond end of device %s.",
- pv_dev_name(pv));
+ pv_dev_name(pv));
return 0;
}
@@ -2151,7 +2174,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt,
* LABEL_SCAN_SIZE.
*/
pe_end = pv->pe_count ? (pv->pe_start +
- pv->pe_count * pv->pe_size - 1) << SECTOR_SHIFT
+ pv->pe_count * (uint64_t)pv->pe_size - 1) << SECTOR_SHIFT
: 0;
if (pe_start || pe_start_locked) {
@@ -2216,7 +2239,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt,
if (limit_applied)
log_very_verbose("Using limited metadata area size on %s "
"with value %" PRIu64 " (limited by %s of "
- "%" PRIu64 ").", pv_dev_name(pv),
+ FMTu64 ").", pv_dev_name(pv),
mda_size, limit_name, limit);
if (mda_size) {
@@ -2470,7 +2493,7 @@ struct format_type *create_text_format(struct cmd_context *cmd)
goto bad;
}
- if ((cn = find_config_tree_node(cmd, metadata_dirs_CFG, NULL))) {
+ if ((cn = find_config_tree_array(cmd, metadata_dirs_CFG, NULL))) {
for (cv = cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
log_error("Invalid string in config file: "
diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h
index d4e25f0e7..be889944e 100644
--- a/lib/format_text/import-export.h
+++ b/lib/format_text/import-export.h
@@ -18,6 +18,7 @@
#include "config.h"
#include "metadata.h"
+#include "lvmcache.h"
#include <stdio.h>
@@ -46,13 +47,13 @@ struct text_vg_version_ops {
int (*check_version) (const struct dm_config_tree * cf);
struct volume_group *(*read_vg) (struct format_instance * fid,
const struct dm_config_tree *cf,
- unsigned use_cached_pvs);
+ unsigned use_cached_pvs,
+ unsigned allow_lvmetad_extensions);
void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf,
time_t *when, char **desc);
- const char *(*read_vgname) (const struct format_type *fmt,
- const struct dm_config_tree *cft,
- struct id *vgid, uint64_t *vgstatus,
- char **creation_host);
+ int (*read_vgname) (const struct format_type *fmt,
+ const struct dm_config_tree *cft,
+ struct lvmcache_vgsummary *vgsummary);
};
struct text_vg_version_ops *text_vg_vsn1_init(void);
@@ -60,9 +61,6 @@ struct text_vg_version_ops *text_vg_vsn1_init(void);
int print_flags(uint64_t status, int type, char *buffer, size_t size);
int read_flags(uint64_t *status, int type, const struct dm_config_value *cv);
-char *alloc_printed_tags(struct dm_list *tags);
-int read_tags(struct dm_pool *mem, struct dm_list *tags, const struct dm_config_value *cv);
-
int text_vg_export_file(struct volume_group *vg, const char *desc, FILE *fp);
size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf);
struct volume_group *text_vg_import_file(struct format_instance *fid,
@@ -70,6 +68,8 @@ struct volume_group *text_vg_import_file(struct format_instance *fid,
time_t *when, char **desc);
struct volume_group *text_vg_import_fd(struct format_instance *fid,
const char *file,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg,
int single_device,
struct device *dev,
off_t offset, uint32_t size,
@@ -77,12 +77,13 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
checksum_fn_t checksum_fn,
uint32_t checksum,
time_t *when, char **desc);
-const char *text_vgname_import(const struct format_type *fmt,
- struct device *dev,
- off_t offset, uint32_t size,
- off_t offset2, uint32_t size2,
- checksum_fn_t checksum_fn, uint32_t checksum,
- struct id *vgid, uint64_t *vgstatus,
- char **creation_host);
+
+int text_vgname_import(const struct format_type *fmt,
+ struct device *dev,
+ off_t offset, uint32_t size,
+ off_t offset2, uint32_t size2,
+ checksum_fn_t checksum_fn,
+ int checksum_only,
+ struct lvmcache_vgsummary *vgsummary);
#endif
diff --git a/lib/format_text/import.c b/lib/format_text/import.c
index 4b0226448..16cdf9b82 100644
--- a/lib/format_text/import.c
+++ b/lib/format_text/import.c
@@ -32,30 +32,41 @@ static void _init_text_import(void)
_text_import_initialised = 1;
}
-const char *text_vgname_import(const struct format_type *fmt,
- struct device *dev,
- off_t offset, uint32_t size,
- off_t offset2, uint32_t size2,
- checksum_fn_t checksum_fn, uint32_t checksum,
- struct id *vgid, uint64_t *vgstatus,
- char **creation_host)
+/*
+ * Find out vgname on a given device.
+ */
+int text_vgname_import(const struct format_type *fmt,
+ struct device *dev,
+ off_t offset, uint32_t size,
+ off_t offset2, uint32_t size2,
+ checksum_fn_t checksum_fn,
+ int checksum_only,
+ struct lvmcache_vgsummary *vgsummary)
{
struct dm_config_tree *cft;
struct text_vg_version_ops **vsn;
- const char *vgname = NULL;
+ int r = 0;
_init_text_import();
if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0)))
- return_NULL;
+ return_0;
if ((!dev && !config_file_read(cft)) ||
(dev && !config_file_read_fd(cft, dev, offset, size,
- offset2, size2, checksum_fn, checksum))) {
+ offset2, size2, checksum_fn,
+ vgsummary->mda_checksum,
+ checksum_only))) {
log_error("Couldn't read volume group metadata.");
goto out;
}
+ if (checksum_only) {
+ /* Checksum matches already-cached content - no need to reparse. */
+ r = 1;
+ goto out;
+ }
+
/*
* Find a set of version functions that can read this file
*/
@@ -63,20 +74,27 @@ const char *text_vgname_import(const struct format_type *fmt,
if (!(*vsn)->check_version(cft))
continue;
- if (!(vgname = (*vsn)->read_vgname(fmt, cft, vgid, vgstatus,
- creation_host)))
+ if (!(*vsn)->read_vgname(fmt, cft, vgsummary))
goto_out;
+ r = 1;
break;
}
out:
config_destroy(cft);
- return vgname;
+ return r;
}
+struct cached_vg_fmtdata {
+ uint32_t cached_mda_checksum;
+ size_t cached_mda_size;
+};
+
struct volume_group *text_vg_import_fd(struct format_instance *fid,
const char *file,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg,
int single_device,
struct device *dev,
off_t offset, uint32_t size,
@@ -88,6 +106,13 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
struct volume_group *vg = NULL;
struct dm_config_tree *cft;
struct text_vg_version_ops **vsn;
+ int skip_parse;
+
+ if (vg_fmtdata && !*vg_fmtdata &&
+ !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) {
+ log_error("Failed to allocate VG fmtdata for text format.");
+ return NULL;
+ }
_init_text_import();
@@ -97,10 +122,22 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
if (!(cft = config_open(CONFIG_FILE_SPECIAL, file, 0)))
return_NULL;
+ /* Does the metadata match the already-cached VG? */
+ skip_parse = vg_fmtdata &&
+ ((*vg_fmtdata)->cached_mda_checksum == checksum) &&
+ ((*vg_fmtdata)->cached_mda_size == (size + size2));
+
if ((!dev && !config_file_read(cft)) ||
(dev && !config_file_read_fd(cft, dev, offset, size,
- offset2, size2, checksum_fn, checksum)))
+ offset2, size2, checksum_fn, checksum,
+ skip_parse)))
+ goto_out;
+
+ if (skip_parse) {
+ if (use_previous_vg)
+ *use_previous_vg = 1;
goto out;
+ }
/*
* Find a set of version functions that can read this file
@@ -109,13 +146,21 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
if (!(*vsn)->check_version(cft))
continue;
- if (!(vg = (*vsn)->read_vg(fid, cft, single_device)))
+ if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0)))
goto_out;
(*vsn)->read_desc(vg->vgmem, cft, when, desc);
break;
}
+ if (vg && vg_fmtdata && *vg_fmtdata) {
+ (*vg_fmtdata)->cached_mda_size = (size + size2);
+ (*vg_fmtdata)->cached_mda_checksum = checksum;
+ }
+
+ if (use_previous_vg)
+ *use_previous_vg = 0;
+
out:
config_destroy(cft);
return vg;
@@ -125,12 +170,13 @@ struct volume_group *text_vg_import_file(struct format_instance *fid,
const char *file,
time_t *when, char **desc)
{
- return text_vg_import_fd(fid, file, 0, NULL, (off_t)0, 0, (off_t)0, 0, NULL, 0,
+ return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, (off_t)0, 0, (off_t)0, 0, NULL, 0,
when, desc);
}
-struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
- struct format_instance *fid)
+static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft,
+ struct format_instance *fid,
+ unsigned allow_lvmetad_extensions)
{
struct volume_group *vg = NULL;
struct text_vg_version_ops **vsn;
@@ -145,7 +191,7 @@ struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft
* The only path to this point uses cached vgmetadata,
* so it can use cached PV state too.
*/
- if (!(vg = (*vsn)->read_vg(fid, cft, 1)))
+ if (!(vg = (*vsn)->read_vg(fid, cft, 1, allow_lvmetad_extensions)))
stack;
else if ((vg_missing = vg_missing_pv_count(vg))) {
log_verbose("There are %d physical volumes missing.",
@@ -158,3 +204,15 @@ struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft
return vg;
}
+
+struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
+ struct format_instance *fid)
+{
+ return _import_vg_from_config_tree(cft, fid, 0);
+}
+
+struct volume_group *import_vg_from_lvmetad_config_tree(const struct dm_config_tree *cft,
+ struct format_instance *fid)
+{
+ return _import_vg_from_config_tree(cft, fid, 1);
+}
diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c
index 0c94208cd..0c162ff99 100644
--- a/lib/format_text/import_vsn1.c
+++ b/lib/format_text/import_vsn1.c
@@ -20,11 +20,13 @@
#include "toolcontext.h"
#include "lvmcache.h"
#include "lvmetad.h"
+#include "lvmlockd.h"
#include "lv_alloc.h"
#include "pv_alloc.h"
#include "segtype.h"
#include "text_import.h"
#include "defaults.h"
+#include "str_list.h"
typedef int (*section_fn) (struct format_instance * fid,
struct volume_group * vg, const struct dm_config_node * pvn,
@@ -153,6 +155,26 @@ static int _read_flag_config(const struct dm_config_node *n, uint64_t *status, i
return 1;
}
+static int _read_str_list(struct dm_pool *mem, struct dm_list *list, const struct dm_config_value *cv)
+{
+ if (cv->type == DM_CFG_EMPTY_ARRAY)
+ return 1;
+
+ while (cv) {
+ if (cv->type != DM_CFG_STRING) {
+ log_error("Found an item that is not a string");
+ return 0;
+ }
+
+ if (!str_list_add(mem, list, dm_pool_strdup(mem, cv->v.str)))
+ return_0;
+
+ cv = cv->next;
+ }
+
+ return 1;
+}
+
static int _read_pv(struct format_instance *fid,
struct volume_group *vg, const struct dm_config_node *pvn,
const struct dm_config_node *vgn __attribute__((unused)),
@@ -167,6 +189,8 @@ static int _read_pv(struct format_instance *fid,
const struct dm_config_value *cv;
uint64_t size, ba_start;
+ int outdated = !strcmp(pvn->parent->key, "outdated_pvs");
+
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl))) ||
!(pvl->pv = dm_pool_zalloc(mem, sizeof(*pvl->pv))))
return_0;
@@ -212,7 +236,7 @@ static int _read_pv(struct format_instance *fid,
memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
- if (!_read_flag_config(pvn, &pv->status, PV_FLAGS)) {
+ if (!outdated && !_read_flag_config(pvn, &pv->status, PV_FLAGS)) {
log_error("Couldn't read status flags for physical volume.");
return 0;
}
@@ -234,13 +258,13 @@ static int _read_pv(struct format_instance *fid,
return 0;
}
- if (!_read_uint64(pvn, "pe_start", &pv->pe_start)) {
+ if (!outdated && !_read_uint64(pvn, "pe_start", &pv->pe_start)) {
log_error("Couldn't read extent start value (pe_start) "
"for physical volume.");
return 0;
}
- if (!_read_int32(pvn, "pe_count", &pv->pe_count)) {
+ if (!outdated && !_read_int32(pvn, "pe_count", &pv->pe_count)) {
log_error("Couldn't find extent count (pe_count) for "
"physical volume.");
return 0;
@@ -251,7 +275,7 @@ static int _read_pv(struct format_instance *fid,
_read_uint64(pvn, "ba_start", &ba_start);
_read_uint64(pvn, "ba_size", &size);
if (ba_start && size) {
- log_debug("Found bootloader area specification for PV %s "
+ log_debug_metadata("Found bootloader area specification for PV %s "
"in metadata: ba_start=%" PRIu64 ", ba_size=%" PRIu64 ".",
pv_dev_name(pv), ba_start, size);
pv->ba_start = ba_start;
@@ -267,7 +291,7 @@ static int _read_pv(struct format_instance *fid,
/* Optional tags */
if (dm_config_get_list(pvn, "tags", &cv) &&
- !(read_tags(mem, &pv->tags, cv))) {
+ !(_read_str_list(mem, &pv->tags, cv))) {
log_error("Couldn't read tags for physical volume %s in %s.",
pv_dev_name(pv), vg->name);
return 0;
@@ -299,7 +323,10 @@ static int _read_pv(struct format_instance *fid,
vg->extent_count += pv->pe_count;
vg->free_count += pv->pe_count;
- add_pvl_to_vgs(vg, pvl);
+ if (outdated)
+ dm_list_add(&vg->pvs_outdated, &pvl->list);
+ else
+ add_pvl_to_vgs(vg, pvl);
return 1;
}
@@ -327,7 +354,7 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node
struct lv_segment *seg;
const struct dm_config_node *sn_child = sn->child;
const struct dm_config_value *cv;
- uint32_t start_extent, extent_count, reshape_count;
+ uint32_t area_extents, start_extent, extent_count, reshape_count, data_copies;
struct segment_type *segtype;
const char *segtype_str;
@@ -348,10 +375,12 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node
return 0;
}
- /* HM FIXME: use reshape_count */
if (!_read_int32(sn_child, "reshape_count", &reshape_count))
reshape_count = 0;
+ if (!_read_int32(sn_child, "data_copies", &data_copies))
+ data_copies = 1;
+
segtype_str = "striped";
if (!dm_config_get_str(sn_child, "type", &segtype_str)) {
@@ -359,6 +388,7 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node
return 0;
}
+PFLA("lv=%s segtype_str=%s", lv->name, segtype_str);
if (!(segtype = get_segtype_from_string(lv->vg->cmd, segtype_str)))
return_0;
@@ -366,20 +396,35 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node
!segtype->ops->text_import_area_count(sn_child, &area_count))
return_0;
+ if (segtype_is_mirror(segtype) || segtype_is_raid1(segtype))
+ data_copies = area_count;
+
+#if 0
if (!(seg = alloc_lv_segment(segtype, lv, start_extent,
extent_count, reshape_count, 0, 0, NULL, area_count,
- extent_count, 0, 0, 0, NULL))) {
+ extent_count, data_copies, 0, 0, 0, NULL))) {
+#else
+PFLA("lv=%s data_copies=%u", lv->name, data_copies);
+ area_extents = segtype->parity_devs ?
+ raid_rimage_extents(segtype, extent_count, area_count - segtype->parity_devs, data_copies) : extent_count;
+PFLA("lv=%s area_extents=%u", lv->name, area_extents);
+ if (!(seg = alloc_lv_segment(segtype, lv, start_extent,
+ extent_count, reshape_count, 0, 0, NULL, area_count,
+ area_extents, data_copies, 0, 0, 0, NULL))) {
+#endif
log_error("Segment allocation failed");
return 0;
}
+PFLA("lv=%s seg->len=%u seg->area_len=%u", lv->name, seg->len, seg->area_len);
if (seg->segtype->ops->text_import &&
!seg->segtype->ops->text_import(seg, sn_child, pv_hash))
return_0;
+PFLA("lv=%s seg->len=%u seg->area_len=%u", lv->name, seg->len, seg->area_len);
/* Optional tags */
if (dm_config_get_list(sn_child, "tags", &cv) &&
- !(read_tags(mem, &seg->tags, cv))) {
+ !(_read_str_list(mem, &seg->tags, cv))) {
log_error("Couldn't read tags for a segment of %s/%s.",
lv->vg->name, lv->name);
return 0;
@@ -389,6 +434,7 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node
* Insert into correct part of segment list.
*/
_insert_segment(lv, seg);
+PFLA("lv=%s seg->len=%u seg->area_len=%u", lv->name, seg->len, seg->area_len);
if (seg_is_mirror(seg))
lv->status |= MIRROR;
@@ -436,7 +482,8 @@ int text_import_areas(struct lv_segment *seg, const struct dm_config_node *sn,
}
if (cv->next->type != DM_CFG_INT) {
- log_error("Bad offset in areas array for segment %s.", seg_name);
+ log_error("Bad offset in areas array for segment %s, seg->lv %s.",
+ seg_name, display_lvname(seg->lv));
return 0;
}
@@ -535,7 +582,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
const char *str;
const struct dm_config_value *cv;
const char *hostname;
- uint64_t timestamp = 0;
+ uint64_t timestamp = 0, lvstatus;
if (!(lv = alloc_lv(mem)))
return_0;
@@ -548,12 +595,18 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
return 0;
}
- if (!_read_flag_config(lvn, &lv->status, LV_FLAGS)) {
+ if (!_read_flag_config(lvn, &lvstatus, LV_FLAGS)) {
log_error("Couldn't read status flags for logical volume %s.",
lv->name);
return 0;
}
+ if (lvstatus & LVM_WRITE_LOCKED) {
+ lvstatus |= LVM_WRITE;
+ lvstatus &= ~LVM_WRITE_LOCKED;
+ }
+ lv->status = lvstatus;
+
if (dm_config_has_node(lvn, "creation_time")) {
if (!_read_uint64(lvn, "creation_time", &timestamp)) {
log_error("Invalid creation_time for logical volume %s.",
@@ -571,6 +624,30 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
return 0;
}
+ /*
+ * The LV lock_args string is generated in lvmlockd, and the content
+ * depends on the lock_type.
+ *
+ * lock_type dlm does not use LV lock_args, so the LV lock_args field
+ * is just set to "dlm".
+ *
+ * lock_type sanlock uses the LV lock_args field to save the
+ * location on disk of that LV's sanlock lock. The disk name is
+ * specified in the VG lock_args. The lock_args string begins
+ * with a version number, e.g. 1.0.0, followed by a colon, followed
+ * by a number. The number is the offset on disk where sanlock is
+ * told to find the LV's lock.
+ * e.g. lock_args = 1.0.0:70254592
+ * means that the lock is located at offset 70254592.
+ *
+ * The lvmlockd code for each specific lock manager also validates
+ * the lock_args before using it to access the lock manager.
+ */
+ if (dm_config_get_str(lvn, "lock_args", &str)) {
+ if (!(lv->lock_args = dm_pool_strdup(mem, str)))
+ return_0;
+ }
+
lv->alloc = ALLOC_INHERIT;
if (dm_config_get_str(lvn, "allocation_policy", &str)) {
lv->alloc = get_alloc_from_string(str);
@@ -609,7 +686,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
/* Optional tags */
if (dm_config_get_list(lvn, "tags", &cv) &&
- !(read_tags(mem, &lv->tags, cv))) {
+ !(_read_str_list(mem, &lv->tags, cv))) {
log_error("Couldn't read tags for logical volume %s/%s.",
vg->name, lv->name);
return 0;
@@ -624,6 +701,9 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
if (timestamp && !lv_set_creation(lv, hostname, timestamp))
return_0;
+ if (!lv_is_visible(lv) && strstr(lv->name, "_dup_"))
+ lv->status |= LV_DUPLICATED;
+
if (!lv_is_visible(lv) && strstr(lv->name, "_pmspare")) {
if (vg->pool_metadata_spare_lv) {
log_error("Couldn't use another pool metadata spare "
@@ -636,6 +716,12 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
vg->pool_metadata_spare_lv = lv;
}
+ if (!lv_is_visible(lv) && !strcmp(lv->name, LOCKD_SANLOCK_LV_NAME)) {
+ log_debug_metadata("Logical volume %s is sanlock lv.", lv->name);
+ lv->status |= LOCKD_SANLOCK_LV;
+ vg->sanlock_lv = lv;
+ }
+
return 1;
}
@@ -733,14 +819,16 @@ static int _read_sections(struct format_instance *fid,
static struct volume_group *_read_vg(struct format_instance *fid,
const struct dm_config_tree *cft,
- unsigned use_cached_pvs)
+ unsigned use_cached_pvs,
+ unsigned allow_lvmetad_extensions)
{
const struct dm_config_node *vgn;
const struct dm_config_value *cv;
- const char *str;
+ const char *str, *format_str, *system_id;
struct volume_group *vg;
struct dm_hash_table *pv_hash = NULL, *lv_hash = NULL;
unsigned scan_done_once = use_cached_pvs;
+ uint64_t vgstatus;
/* skip any top-level values */
for (vgn = cft->root; (vgn && vgn->v); vgn = vgn->sib)
@@ -754,9 +842,6 @@ static struct volume_group *_read_vg(struct format_instance *fid,
if (!(vg = alloc_vg("read_vg", fid->fmt->cmd, vgn->key)))
return_NULL;
- if (!(vg->system_id = dm_pool_zalloc(vg->vgmem, NAME_LEN + 1)))
- goto_bad;
-
/*
* The pv hash memorises the pv section names -> pv
* structures.
@@ -777,8 +862,42 @@ static struct volume_group *_read_vg(struct format_instance *fid,
vgn = vgn->child;
- if (dm_config_get_str(vgn, "system_id", &str)) {
- strncpy(vg->system_id, str, NAME_LEN);
+ /* A backup file might be a backup of a different format */
+ if (dm_config_get_str(vgn, "format", &format_str) &&
+ !(vg->original_fmt = get_format_by_name(fid->fmt->cmd, format_str))) {
+ log_error("Unrecognised format %s for volume group %s.", format_str, vg->name);
+ goto bad;
+ }
+
+ if (dm_config_get_str(vgn, "lock_type", &str)) {
+ if (!(vg->lock_type = dm_pool_strdup(vg->vgmem, str)))
+ goto bad;
+ }
+
+ /*
+ * The VG lock_args string is generated in lvmlockd, and the content
+ * depends on the lock_type. lvmlockd begins the lock_args string
+ * with a version number, e.g. 1.0.0, followed by a colon, followed
+ * by a string that depends on the lock manager. The string after
+ * the colon is information needed to use the lock manager for the VG.
+ *
+ * For sanlock, the string is the name of the internal LV used to store
+ * sanlock locks. lvmlockd needs to know where the locks are located
+ * so it can pass that location to sanlock which needs to access the locks.
+ * e.g. lock_args = 1.0.0:lvmlock
+ * means that the locks are located on the the LV "lvmlock".
+ *
+ * For dlm, the string is the dlm cluster name. lvmlockd needs to use
+ * a dlm lockspace in this cluster to use the VG.
+ * e.g. lock_args = 1.0.0:foo
+ * means that the host needs to be a member of the cluster "foo".
+ *
+ * The lvmlockd code for each specific lock manager also validates
+ * the lock_args before using it to access the lock manager.
+ */
+ if (dm_config_get_str(vgn, "lock_args", &str)) {
+ if (!(vg->lock_args = dm_pool_strdup(vg->vgmem, str)))
+ goto bad;
}
if (!_read_id(&vg->id, vgn, "id")) {
@@ -792,12 +911,32 @@ static struct volume_group *_read_vg(struct format_instance *fid,
goto bad;
}
- if (!_read_flag_config(vgn, &vg->status, VG_FLAGS)) {
+ if (!_read_flag_config(vgn, &vgstatus, VG_FLAGS)) {
log_error("Error reading flags of volume group %s.",
vg->name);
goto bad;
}
+ /*
+ * A system id without WRITE_LOCKED is an old lvm1 system id.
+ */
+ if (dm_config_get_str(vgn, "system_id", &system_id)) {
+ if (!(vgstatus & LVM_WRITE_LOCKED)) {
+ if (!(vg->lvm1_system_id = dm_pool_zalloc(vg->vgmem, NAME_LEN + 1)))
+ goto_bad;
+ strncpy(vg->lvm1_system_id, system_id, NAME_LEN);
+ } else if (!(vg->system_id = dm_pool_strdup(vg->vgmem, system_id))) {
+ log_error("Failed to allocate memory for system_id in _read_vg.");
+ goto bad;
+ }
+ }
+
+ if (vgstatus & LVM_WRITE_LOCKED) {
+ vgstatus |= LVM_WRITE;
+ vgstatus &= ~LVM_WRITE_LOCKED;
+ }
+ vg->status = vgstatus;
+
if (!_read_int32(vgn, "extent_size", &vg->extent_size)) {
log_error("Couldn't read extent size for volume group %s.",
vg->name);
@@ -849,9 +988,15 @@ static struct volume_group *_read_vg(struct format_instance *fid,
goto bad;
}
+ if (allow_lvmetad_extensions)
+ _read_sections(fid, "outdated_pvs", _read_pv, vg,
+ vgn, pv_hash, lv_hash, 1, &scan_done_once);
+ else if (dm_config_has_node(vgn, "outdated_pvs"))
+ log_error(INTERNAL_ERROR "Unexpected outdated_pvs section in metadata of VG %s.", vg->name);
+
/* Optional tags */
if (dm_config_get_list(vgn, "tags", &cv) &&
- !(read_tags(vg->vgmem, &vg->tags, cv))) {
+ !(_read_str_list(vg->vgmem, &vg->tags, cv))) {
log_error("Couldn't read tags for volume group %s.", vg->name);
goto bad;
}
@@ -879,8 +1024,6 @@ static struct volume_group *_read_vg(struct format_instance *fid,
dm_hash_destroy(pv_hash);
dm_hash_destroy(lv_hash);
- /* FIXME Determine format type from file contents */
- /* eg Set to instance of fmt1 here if reading a format1 backup? */
vg_set_fid(vg, fid);
/*
@@ -915,19 +1058,21 @@ static void _read_desc(struct dm_pool *mem,
*when = u;
}
-static const char *_read_vgname(const struct format_type *fmt,
- const struct dm_config_tree *cft, struct id *vgid,
- uint64_t *vgstatus, char **creation_host)
+/*
+ * It would be more accurate to call this _read_vgsummary().
+ * It is used to read vgsummary information about a VG
+ * before locking and reading the VG via vg_read().
+ */
+static int _read_vgname(const struct format_type *fmt, const struct dm_config_tree *cft,
+ struct lvmcache_vgsummary *vgsummary)
{
const struct dm_config_node *vgn;
struct dm_pool *mem = fmt->cmd->mem;
- char *vgname;
int old_suppress;
old_suppress = log_suppress(2);
- *creation_host = dm_pool_strdup(mem,
- dm_config_find_str_allow_empty(cft->root,
- "creation_host", ""));
+ vgsummary->creation_host =
+ dm_pool_strdup(mem, dm_config_find_str_allow_empty(cft->root, "creation_host", ""));
log_suppress(old_suppress);
/* skip any top-level values */
@@ -938,23 +1083,25 @@ static const char *_read_vgname(const struct format_type *fmt,
return 0;
}
- if (!(vgname = dm_pool_strdup(mem, vgn->key)))
+ if (!(vgsummary->vgname = dm_pool_strdup(mem, vgn->key)))
return_0;
vgn = vgn->child;
- if (!_read_id(vgid, vgn, "id")) {
- log_error("Couldn't read uuid for volume group %s.", vgname);
+ if (!_read_id(&vgsummary->vgid, vgn, "id")) {
+ log_error("Couldn't read uuid for volume group %s.", vgsummary->vgname);
return 0;
}
- if (!_read_flag_config(vgn, vgstatus, VG_FLAGS)) {
+ if (!_read_flag_config(vgn, &vgsummary->vgstatus, VG_FLAGS)) {
log_error("Couldn't find status flags for volume group %s.",
- vgname);
+ vgsummary->vgname);
return 0;
}
- return vgname;
+ dm_config_get_str(vgn, "lock_type", &vgsummary->lock_type);
+
+ return 1;
}
static struct text_vg_version_ops _vsn1_ops = {
diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h
index 4753fde2b..64fc0e1ca 100644
--- a/lib/format_text/layout.h
+++ b/lib/format_text/layout.h
@@ -18,6 +18,7 @@
#include "config.h"
#include "metadata.h"
+#include "lvmcache.h"
#include "uuid.h"
/* disk_locn and data_area_list are defined in format-text.h */
@@ -97,11 +98,8 @@ struct mda_context {
#define LVM2_LABEL "LVM2 001"
#define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize())
-
-const char *vgname_from_mda(const struct format_type *fmt,
- struct mda_header *mdah,
- struct device_area *dev_area, struct id *vgid,
- uint64_t *vgstatus, char **creation_host,
- uint64_t *mda_free_sectors);
+int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah,
+ struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary,
+ uint64_t *mda_free_sectors);
#endif
diff --git a/lib/format_text/tags.c b/lib/format_text/tags.c
deleted file mode 100644
index dc138d140..000000000
--- a/lib/format_text/tags.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This file is part of LVM2.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU Lesser General Public License v.2.1.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "lib.h"
-#include "metadata.h"
-#include "import-export.h"
-#include "str_list.h"
-#include "lvm-string.h"
-
-char *alloc_printed_tags(struct dm_list *tagsl)
-{
- struct dm_str_list *sl;
- int first = 1;
- size_t size = 0;
- char *buffer, *buf;
-
- dm_list_iterate_items(sl, tagsl)
- /* '"' + tag + '"' + ',' + ' ' */
- size += strlen(sl->str) + 4;
- /* '[' + ']' + '\0' */
- size += 3;
-
- if (!(buffer = buf = dm_malloc(size))) {
- log_error("Could not allocate memory for tag list buffer.");
- return NULL;
- }
-
- if (!emit_to_buffer(&buf, &size, "["))
- goto_bad;
-
- dm_list_iterate_items(sl, tagsl) {
- if (!first) {
- if (!emit_to_buffer(&buf, &size, ", "))
- goto_bad;
- } else
- first = 0;
-
- if (!emit_to_buffer(&buf, &size, "\"%s\"", sl->str))
- goto_bad;
- }
-
- if (!emit_to_buffer(&buf, &size, "]"))
- goto_bad;
-
- return buffer;
-
-bad:
- dm_free(buffer);
- return_NULL;
-}
-
-int read_tags(struct dm_pool *mem, struct dm_list *tagsl, const struct dm_config_value *cv)
-{
- if (cv->type == DM_CFG_EMPTY_ARRAY)
- return 1;
-
- while (cv) {
- if (cv->type != DM_CFG_STRING) {
- log_error("Found a tag that is not a string");
- return 0;
- }
-
- if (!str_list_add(mem, tagsl, dm_pool_strdup(mem, cv->v.str)))
- return_0;
-
- cv = cv->next;
- }
-
- return 1;
-}
diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c
index 516d694fe..4ad2ccdb2 100644
--- a/lib/format_text/text_label.c
+++ b/lib/format_text/text_label.c
@@ -319,10 +319,14 @@ static int _update_mda(struct metadata_area *mda, void *baton)
const struct format_type *fmt = p->label->labeller->fmt;
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
struct mda_header *mdah;
- const char *vgname = NULL;
- struct id vgid;
- uint64_t vgstatus;
- char *creation_host;
+ struct lvmcache_vgsummary vgsummary = { 0 };
+
+ /*
+ * Using the labeller struct to preserve info about
+ * the last parsed vgname, vgid, creation host
+ *
+ * TODO: make lvmcache smarter and move this cache logic there
+ */
if (!dev_open_readonly(mdac->area.dev)) {
mda_set_ignored(mda, 1);
@@ -346,17 +350,14 @@ static int _update_mda(struct metadata_area *mda, void *baton)
return 1;
}
- if ((vgname = vgname_from_mda(fmt, mdah,
- &mdac->area,
- &vgid, &vgstatus, &creation_host,
- &mdac->free_sectors)) &&
- !lvmcache_update_vgname_and_id(p->info, vgname,
- (char *) &vgid, vgstatus,
- creation_host)) {
+ if (vgname_from_mda(fmt, mdah, &mdac->area, &vgsummary,
+ &mdac->free_sectors) &&
+ !lvmcache_update_vgname_and_id(p->info, &vgsummary)) {
if (!dev_close(mdac->area.dev))
stack;
return_0;
}
+
close_dev:
if (!dev_close(mdac->area.dev))
stack;
@@ -417,8 +418,8 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
if (!(ext_version = xlate32(pvhdr_ext->version)))
goto out;
- log_debug("%s: PV header extension version %" PRIu32 " found",
- dev_name(dev), ext_version);
+ log_debug_metadata("%s: PV header extension version %" PRIu32 " found",
+ dev_name(dev), ext_version);
/* Bootloader areas */
dlocn_xl = pvhdr_ext->bootloader_areas_xl;
@@ -465,7 +466,7 @@ struct labeller *text_labeller_create(const struct format_type *fmt)
{
struct labeller *l;
- if (!(l = dm_malloc(sizeof(*l)))) {
+ if (!(l = dm_zalloc(sizeof(*l)))) {
log_error("Couldn't allocate labeller object.");
return NULL;
}
diff --git a/lib/label/label.c b/lib/label/label.c
index ce59da8c5..bc3c65320 100644
--- a/lib/label/label.c
+++ b/lib/label/label.c
@@ -97,6 +97,18 @@ struct labeller *label_get_handler(const char *name)
return NULL;
}
+static void _update_lvmcache_orphan(struct lvmcache_info *info)
+{
+ struct lvmcache_vgsummary vgsummary_orphan = {
+ .vgname = lvmcache_fmt(info)->orphan_vg_name,
+ };
+
+ memcpy(&vgsummary_orphan.vgid, lvmcache_fmt(info)->orphan_vg_name, strlen(lvmcache_fmt(info)->orphan_vg_name));
+
+ if (!lvmcache_update_vgname_and_id(info, &vgsummary_orphan))
+ stack;
+}
+
static struct labeller *_find_labeller(struct device *dev, char *buf,
uint64_t *label_sector,
uint64_t scan_sector)
@@ -173,9 +185,7 @@ static struct labeller *_find_labeller(struct device *dev, char *buf,
out:
if (!found) {
if ((info = lvmcache_info_from_pvid(dev->pvid, 0)))
- lvmcache_update_vgname_and_id(info, lvmcache_fmt(info)->orphan_vg_name,
- lvmcache_fmt(info)->orphan_vg_name,
- 0, NULL);
+ _update_lvmcache_orphan(info);
log_very_verbose("%s: No label detected", dev_name(dev));
}
@@ -271,9 +281,7 @@ int label_read(struct device *dev, struct label **result,
stack;
if ((info = lvmcache_info_from_pvid(dev->pvid, 0)))
- lvmcache_update_vgname_and_id(info, lvmcache_fmt(info)->orphan_vg_name,
- lvmcache_fmt(info)->orphan_vg_name,
- 0, NULL);
+ _update_lvmcache_orphan(info);
return r;
}
@@ -348,10 +356,7 @@ int label_verify(struct device *dev)
if (!dev_open_readonly(dev)) {
if ((info = lvmcache_info_from_pvid(dev->pvid, 0)))
- lvmcache_update_vgname_and_id(info, lvmcache_fmt(info)->orphan_vg_name,
- lvmcache_fmt(info)->orphan_vg_name,
- 0, NULL);
-
+ _update_lvmcache_orphan(info);
return_0;
}
diff --git a/lib/locking/locking.c b/lib/locking/locking.c
index c88a85b82..22e83d2b1 100644
--- a/lib/locking/locking.c
+++ b/lib/locking/locking.c
@@ -119,8 +119,9 @@ int init_locking(int type, struct cmd_context *cmd, int suppress_messages)
switch (type) {
case 0:
init_no_locking(&_locking, cmd, suppress_messages);
- log_warn("WARNING: Locking disabled. Be careful! "
- "This could corrupt your metadata.");
+ log_warn_suppress(suppress_messages,
+ "WARNING: Locking disabled. Be careful! "
+ "This could corrupt your metadata.");
return 1;
case 1:
diff --git a/lib/locking/locking.h b/lib/locking/locking.h
index 644e07c9c..706b59e92 100644
--- a/lib/locking/locking.h
+++ b/lib/locking/locking.h
@@ -195,9 +195,10 @@ int check_lvm1_vg_inactive(struct cmd_context *cmd, const char *vgname);
#define unlock_vg(cmd, vol) \
do { \
- if (is_real_vg(vol)) \
- sync_dev_names(cmd); \
- (void) lock_vol(cmd, vol, LCK_VG_UNLOCK, NULL); \
+ if (is_real_vg(vol) && !sync_dev_names(cmd)) \
+ stack; \
+ if (!lock_vol(cmd, vol, LCK_VG_UNLOCK, NULL)) \
+ stack; \
} while (0)
#define unlock_and_release_vg(cmd, vg, vol) \
do { \
diff --git a/lib/locking/lvmlockd.c b/lib/locking/lvmlockd.c
new file mode 100644
index 000000000..5918de347
--- /dev/null
+++ b/lib/locking/lvmlockd.c
@@ -0,0 +1,2604 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "lib.h"
+#include "toolcontext.h"
+#include "metadata.h"
+#include "segtype.h"
+#include "activate.h"
+#include "lvmetad.h"
+#include "lvmlockd.h"
+#include "lvmcache.h"
+#include "lvmlockd-client.h"
+
+static daemon_handle _lvmlockd;
+static const char *_lvmlockd_socket = NULL;
+static struct cmd_context *_lvmlockd_cmd = NULL;
+static int _use_lvmlockd = 0; /* is 1 if command is configured to use lvmlockd */
+static int _lvmlockd_connected = 0; /* is 1 if command is connected to lvmlockd */
+static int _lvmlockd_init_failed = 0; /* used to suppress further warnings */
+
+void lvmlockd_set_socket(const char *sock)
+{
+ _lvmlockd_socket = sock;
+}
+
+/*
+ * Set directly from global/use_lvmlockd
+ */
+void lvmlockd_set_use(int use)
+{
+ _use_lvmlockd = use;
+}
+
+/*
+ * Returns the value of global/use_lvmlockd being used by the command.
+ */
+int lvmlockd_use(void)
+{
+ return _use_lvmlockd;
+}
+
+/*
+ * The command continues even if init and/or connect fail,
+ * because the command is allowed to use local VGs without lvmlockd,
+ * and is allowed to read lockd VGs without locks from lvmlockd.
+ */
+void lvmlockd_init(struct cmd_context *cmd)
+{
+ if (!_use_lvmlockd) {
+ /* Should never happen, don't call init when not using lvmlockd. */
+ log_error("Should not initialize lvmlockd with use_lvmlockd=0.");
+ }
+
+ if (!_lvmlockd_socket) {
+ log_warn("WARNING: lvmlockd socket location is not configured.");
+ _lvmlockd_init_failed = 1;
+ }
+
+ if (!!access(LVMLOCKD_PIDFILE, F_OK)) {
+ log_warn("WARNING: lvmlockd process is not running.");
+ _lvmlockd_init_failed = 1;
+ } else {
+ _lvmlockd_init_failed = 0;
+ }
+
+ _lvmlockd_cmd = cmd;
+}
+
+void lvmlockd_connect(void)
+{
+ if (!_use_lvmlockd) {
+ /* Should never happen, don't call connect when not using lvmlockd. */
+ log_error("Should not connect to lvmlockd with use_lvmlockd=0.");
+ }
+
+ if (_lvmlockd_connected) {
+ /* Should never happen, only call connect once. */
+ log_error("lvmlockd is already connected.");
+ }
+
+ if (_lvmlockd_init_failed)
+ return;
+
+ _lvmlockd = lvmlockd_open(_lvmlockd_socket);
+
+ if (_lvmlockd.socket_fd >= 0 && !_lvmlockd.error) {
+ log_debug("Successfully connected to lvmlockd on fd %d.", _lvmlockd.socket_fd);
+ _lvmlockd_connected = 1;
+ } else {
+ log_warn("WARNING: lvmlockd connect failed.");
+ }
+}
+
+void lvmlockd_disconnect(void)
+{
+ if (_lvmlockd_connected)
+ daemon_close(_lvmlockd);
+ _lvmlockd_connected = 0;
+ _lvmlockd_cmd = NULL;
+}
+
+/* Translate the result strings from lvmlockd to bit flags. */
+static void _flags_str_to_lockd_flags(const char *flags_str, uint32_t *lockd_flags)
+{
+ if (strstr(flags_str, "NO_LOCKSPACES"))
+ *lockd_flags |= LD_RF_NO_LOCKSPACES;
+
+ if (strstr(flags_str, "NO_GL_LS"))
+ *lockd_flags |= LD_RF_NO_GL_LS;
+
+ if (strstr(flags_str, "DUP_GL_LS"))
+ *lockd_flags |= LD_RF_DUP_GL_LS;
+
+ if (strstr(flags_str, "INACTIVE_LS"))
+ *lockd_flags |= LD_RF_INACTIVE_LS;
+
+ if (strstr(flags_str, "ADD_LS_ERROR"))
+ *lockd_flags |= LD_RF_ADD_LS_ERROR;
+
+ if (strstr(flags_str, "WARN_GL_REMOVED"))
+ *lockd_flags |= LD_RF_WARN_GL_REMOVED;
+}
+
+/*
+ * evaluate the reply from lvmlockd, check for errors, extract
+ * the result and lockd_flags returned by lvmlockd.
+ * 0 failure (no result/lockd_flags set)
+ * 1 success (result/lockd_flags set)
+ */
+
+/*
+ * This is an arbitrary number that we know lvmlockd
+ * will not return. daemon_reply_int reverts to this
+ * value if it finds no result value.
+ */
+#define NO_LOCKD_RESULT -1000
+
+static int _lockd_result(daemon_reply reply, int *result, uint32_t *lockd_flags)
+{
+ int reply_result;
+ const char *flags_str = NULL;
+ const char *lock_type = NULL;
+
+ if (reply.error) {
+ log_error("lockd_result reply error %d", reply.error);
+ return 0;
+ }
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("lockd_result bad response");
+ return 0;
+ }
+
+ reply_result = daemon_reply_int(reply, "op_result", NO_LOCKD_RESULT);
+ if (reply_result == NO_LOCKD_RESULT) {
+ log_error("lockd_result no op_result");
+ return 0;
+ }
+
+ /* The lock_type that lvmlockd used for locking. */
+ lock_type = daemon_reply_str(reply, "lock_type", "none");
+
+ *result = reply_result;
+
+ if (lockd_flags) {
+ if ((flags_str = daemon_reply_str(reply, "result_flags", NULL)))
+ _flags_str_to_lockd_flags(flags_str, lockd_flags);
+ }
+
+ log_debug("lockd_result %d flags %s lm %s", reply_result,
+ flags_str ? flags_str : "none", lock_type);
+ return 1;
+}
+
+static daemon_reply _lockd_send(const char *req_name, ...)
+{
+ va_list ap;
+ daemon_reply repl;
+ daemon_request req;
+
+ req = daemon_request_make(req_name);
+
+ va_start(ap, req_name);
+ daemon_request_extend_v(req, ap);
+ va_end(ap);
+
+ repl = daemon_send(_lvmlockd, req);
+
+ daemon_request_destroy(req);
+
+ return repl;
+}
+
+/*
+ * result/lockd_flags are values returned from lvmlockd.
+ *
+ * return 0 (failure)
+ * return 1 (result/lockd_flags indicate success/failure)
+ *
+ * return 1 result 0 (success)
+ * return 1 result < 0 (failure)
+ *
+ * caller may ignore result < 0 failure depending on
+ * lockd_flags and the specific command/mode.
+ *
+ * When this function returns 0 (failure), no result/lockd_flags
+ * were obtained from lvmlockd.
+ *
+ * When this function returns 1 (success), result/lockd_flags may
+ * have been obtained from lvmlockd. This lvmlockd result may
+ * indicate a locking failure.
+ */
+
+static int _lockd_request(struct cmd_context *cmd,
+ const char *req_name,
+ const char *vg_name,
+ const char *vg_lock_type,
+ const char *vg_lock_args,
+ const char *lv_name,
+ const char *lv_uuid,
+ const char *lv_lock_args,
+ const char *mode,
+ const char *opts,
+ int *result,
+ uint32_t *lockd_flags)
+{
+ const char *cmd_name = get_cmd_name();
+ daemon_reply reply;
+ int pid = getpid();
+
+ *result = 0;
+ *lockd_flags = 0;
+
+ if (!strcmp(mode, "na"))
+ return 1;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ /* cmd and pid are passed for informational and debugging purposes */
+
+ if (!cmd_name || !cmd_name[0])
+ cmd_name = "none";
+
+ if (vg_name && lv_name) {
+ reply = _lockd_send(req_name,
+ "cmd = %s", cmd_name,
+ "pid = %d", pid,
+ "mode = %s", mode,
+ "opts = %s", opts ?: "none",
+ "vg_name = %s", vg_name,
+ "lv_name = %s", lv_name,
+ "lv_uuid = %s", lv_uuid,
+ "vg_lock_type = %s", vg_lock_type ?: "none",
+ "vg_lock_args = %s", vg_lock_args ?: "none",
+ "lv_lock_args = %s", lv_lock_args ?: "none",
+ NULL);
+
+ if (!_lockd_result(reply, result, lockd_flags))
+ goto fail;
+
+ log_debug("lvmlockd %s %s vg %s lv %s result %d %x",
+ req_name, mode, vg_name, lv_name, *result, *lockd_flags);
+
+ } else if (vg_name) {
+ reply = _lockd_send(req_name,
+ "cmd = %s", cmd_name,
+ "pid = %d", pid,
+ "mode = %s", mode,
+ "opts = %s", opts ?: "none",
+ "vg_name = %s", vg_name,
+ "vg_lock_type = %s", vg_lock_type ?: "none",
+ "vg_lock_args = %s", vg_lock_args ?: "none",
+ NULL);
+
+ if (!_lockd_result(reply, result, lockd_flags))
+ goto fail;
+
+ log_debug("lvmlockd %s %s vg %s result %d %x",
+ req_name, mode, vg_name, *result, *lockd_flags);
+
+ } else {
+ reply = _lockd_send(req_name,
+ "cmd = %s", cmd_name,
+ "pid = %d", pid,
+ "mode = %s", mode,
+ "opts = %s", opts ?: "none",
+ "vg_lock_type = %s", vg_lock_type ?: "none",
+ NULL);
+
+ if (!_lockd_result(reply, result, lockd_flags))
+ goto fail;
+
+ log_debug("lvmlockd %s %s result %d %x",
+ req_name, mode, *result, *lockd_flags);
+ }
+
+ daemon_reply_destroy(reply);
+
+ /* result/lockd_flags have lvmlockd result */
+ return 1;
+
+ fail:
+ /* no result was obtained from lvmlockd */
+
+ log_error("lvmlockd %s %s failed no result", req_name, mode);
+
+ daemon_reply_destroy(reply);
+ return 0;
+}
+
+/*
+ * Eventually add an option to specify which pv the lvmlock lv should be placed on.
+ */
+
+static int _create_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lock_lv_name, int extend_mb)
+{
+ struct logical_volume *lv;
+ struct lvcreate_params lp = {
+ .activate = CHANGE_ALY,
+ .alloc = ALLOC_INHERIT,
+ .extents = (extend_mb * 1024 * 1024) / (vg->extent_size * SECTOR_SIZE),
+ .major = -1,
+ .minor = -1,
+ .permission = LVM_READ | LVM_WRITE,
+ .pvh = &vg->pvs,
+ .read_ahead = DM_READ_AHEAD_NONE,
+ .stripes = 1,
+ .vg_name = vg->name,
+ .lv_name = dm_pool_strdup(cmd->mem, lock_lv_name),
+ .zero = 1,
+ };
+
+ dm_list_init(&lp.tags);
+
+ if (!(lp.segtype = get_segtype_from_string(vg->cmd, "striped")))
+ return_0;
+
+ lv = lv_create_single(vg, &lp);
+ if (!lv) {
+ log_error("Failed to create sanlock lv %s in vg %s", lock_lv_name, vg->name);
+ return 0;
+ }
+
+ vg->sanlock_lv = lv;
+
+ return 1;
+}
+
+static int _remove_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ if (!lv_remove(vg->sanlock_lv)) {
+ log_error("Failed to remove sanlock LV %s/%s", vg->name, vg->sanlock_lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, int extend_mb)
+{
+ struct logical_volume *lv = vg->sanlock_lv;
+ struct lvresize_params lp = {
+ .lv_name = vg->sanlock_lv->name,
+ .sign = SIGN_NONE,
+ .percent = PERCENT_NONE,
+ .resize = LV_EXTEND,
+ .ac_force = 1,
+ .sizeargs = 1,
+ };
+
+ lp.size = lv->size + ((extend_mb * 1024 * 1024) / SECTOR_SIZE);
+
+ if (!lv_resize_prepare(cmd, lv, &lp, &vg->pvs) ||
+ !lv_resize(cmd, lv, &lp, &vg->pvs)) {
+ log_error("Extend LV %s/%s to size %llu failed.",
+ vg->name, lv->name, (unsigned long long)lp.size);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* When one host does _extend_sanlock_lv, the others need to refresh the size. */
+
+static int _refresh_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ if (!lv_refresh_suspend_resume(cmd, vg->sanlock_lv)) {
+ log_error("Failed to refresh %s.", vg->sanlock_lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Called at the beginning of lvcreate in a sanlock VG to ensure
+ * that there is space in the sanlock LV for a new lock. If it's
+ * full, then this extends it.
+ */
+
+int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ daemon_reply reply;
+ int extend_mb;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 1;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ extend_mb = find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL);
+
+ /*
+ * User can choose to not automatically extend the lvmlock LV
+ * so they can manually extend it.
+ */
+ if (!extend_mb)
+ return 1;
+
+ /*
+ * Another host may have extended the lvmlock LV already.
+ * Refresh so that we'll find the new space they added
+ * when we search for new space.
+ */
+ if (!_refresh_sanlock_lv(cmd, vg))
+ return 0;
+
+ /*
+ * Ask lvmlockd/sanlock to look for an unused lock.
+ */
+ reply = _lockd_send("find_free_lock",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ /* No space on the lvmlock lv for a new lease. */
+ if (result == -EMSGSIZE)
+ ret = _extend_sanlock_lv(cmd, vg, extend_mb);
+
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+static int _activate_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ if (!activate_lv(cmd, vg->sanlock_lv)) {
+ log_error("Failed to activate sanlock lv %s/%s", vg->name, vg->sanlock_lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _deactivate_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ if (!deactivate_lv(cmd, vg->sanlock_lv)) {
+ log_error("Failed to deactivate sanlock lv %s/%s", vg->name, vg->sanlock_lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _init_vg_dlm(struct cmd_context *cmd, struct volume_group *vg)
+{
+ daemon_reply reply;
+ const char *reply_str;
+ const char *vg_lock_args = NULL;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ reply = _lockd_send("init_vg",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", "dlm",
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ result = -ELOCKD;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ switch (result) {
+ case 0:
+ break;
+ case -ELOCKD:
+ log_error("VG %s init failed: lvmlockd not available", vg->name);
+ break;
+ case -EARGS:
+ log_error("VG %s init failed: invalid parameters for dlm", vg->name);
+ break;
+ case -EMANAGER:
+ log_error("VG %s init failed: lock manager dlm is not running", vg->name);
+ break;
+ case -EPROTONOSUPPORT:
+ log_error("VG %s init failed: lock manager dlm is not supported by lvmlockd", vg->name);
+ break;
+ default:
+ log_error("VG %s init failed: %d", vg->name, result);
+ }
+
+ if (!ret)
+ goto out;
+
+ if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
+ log_error("VG %s init failed: lock_args not returned", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
+ log_error("VG %s init failed: lock_args alloc failed", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ vg->lock_type = "dlm";
+ vg->lock_args = vg_lock_args;
+
+ if (!vg_write(vg) || !vg_commit(vg)) {
+ log_error("VG %s init failed: vg_write vg_commit", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+out:
+ daemon_reply_destroy(reply);
+ return ret;
+}
+
+static int _init_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg, int lv_lock_count)
+{
+ daemon_reply reply;
+ const char *reply_str;
+ const char *vg_lock_args = NULL;
+ const char *opts = NULL;
+ int extend_mb;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ /*
+ * Automatic extension of the sanlock lv is disabled by
+ * setting sanlock_lv_extend to 0. Zero won't work as
+ * an initial size, so in this case, use the default as
+ * the initial size.
+ */
+ if (!(extend_mb = find_config_tree_int(cmd, global_sanlock_lv_extend_CFG, NULL)))
+ extend_mb = DEFAULT_SANLOCK_LV_EXTEND_MB;
+
+ /*
+ * Creating the sanlock LV writes the VG containing the new lvmlock
+ * LV, then activates the lvmlock LV. The lvmlock LV must be active
+ * before we ask lvmlockd to initialize the VG because sanlock needs
+ * to initialize leases on the lvmlock LV.
+ *
+ * When converting an existing VG to sanlock, the sanlock lv needs to
+ * be large enough to hold leases for all existing lvs needing locks.
+ * One sanlock lease uses 1MB/8MB for 512/4K sector size devices, so
+ * increase the initial size by 1MB/8MB for each existing lv.
+ * FIXME: we don't know what sector size the pv will have, so we
+ * multiply by 8 (MB) unnecessarily when the sector size is 512.
+ */
+
+ if (lv_lock_count)
+ extend_mb += (lv_lock_count * 8);
+
+ if (!_create_sanlock_lv(cmd, vg, LOCKD_SANLOCK_LV_NAME, extend_mb)) {
+ log_error("Failed to create internal lv.");
+ return 0;
+ }
+
+ /*
+ * N.B. this passes the sanlock lv name as vg_lock_args
+ * even though it is only part of the final args string
+ * which will be returned from lvmlockd.
+ */
+
+ reply = _lockd_send("init_vg",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", "sanlock",
+ "vg_lock_args = %s", vg->sanlock_lv->name,
+ "opts = %s", opts ?: "none",
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ result = -ELOCKD;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ switch (result) {
+ case 0:
+ break;
+ case -ELOCKD:
+ log_error("VG %s init failed: lvmlockd not available", vg->name);
+ break;
+ case -EARGS:
+ log_error("VG %s init failed: invalid parameters for sanlock", vg->name);
+ break;
+ case -EMANAGER:
+ log_error("VG %s init failed: lock manager sanlock is not running", vg->name);
+ break;
+ case -EPROTONOSUPPORT:
+ log_error("VG %s init failed: lock manager sanlock is not supported by lvmlockd", vg->name);
+ break;
+ case -EMSGSIZE:
+ log_error("VG %s init failed: no disk space for leases", vg->name);
+ break;
+ default:
+ log_error("VG %s init failed: %d", vg->name, result);
+ }
+
+ if (!ret)
+ goto out;
+
+ if (!(reply_str = daemon_reply_str(reply, "vg_lock_args", NULL))) {
+ log_error("VG %s init failed: lock_args not returned", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ if (!(vg_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
+ log_error("VG %s init failed: lock_args alloc failed", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ lv_set_hidden(vg->sanlock_lv);
+ vg->sanlock_lv->status |= LOCKD_SANLOCK_LV;
+
+ vg->lock_type = "sanlock";
+ vg->lock_args = vg_lock_args;
+
+ if (!vg_write(vg) || !vg_commit(vg)) {
+ log_error("VG %s init failed: vg_write vg_commit", vg->name);
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+out:
+ if (!ret) {
+ /*
+ * The usleep delay gives sanlock time to close the lock lv,
+ * and usually avoids having an annoying error printed.
+ */
+ usleep(1000000);
+ _deactivate_sanlock_lv(cmd, vg);
+ _remove_sanlock_lv(cmd, vg);
+ if (!vg_write(vg) || !vg_commit(vg))
+ stack;
+ }
+
+ daemon_reply_destroy(reply);
+ return ret;
+}
+
+/* called after vg_remove on disk */
+
+static int _free_vg_dlm(struct cmd_context *cmd, struct volume_group *vg)
+{
+ uint32_t lockd_flags;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ /*
+ * Unlocking the vg lock here preempts the lvmlockd unlock in
+ * toollib.c which happens too late since the lockspace is
+ * left here.
+ */
+
+ /* Equivalent to a standard unlock. */
+ ret = _lockd_request(cmd, "lock_vg",
+ vg->name, NULL, NULL, NULL, NULL, NULL, "un", NULL,
+ &result, &lockd_flags);
+
+ if (!ret || result < 0) {
+ log_error("_free_vg_dlm lvmlockd result %d", result);
+ return 0;
+ }
+
+ /* Leave the dlm lockspace. */
+ lockd_stop_vg(cmd, vg);
+
+ return 1;
+}
+
+/* called before vg_remove on disk */
+
+static int _free_vg_sanlock(struct cmd_context *cmd, struct volume_group *vg)
+{
+ daemon_reply reply;
+ uint32_t lockd_flags = 0;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ /*
+ * vgremove originally held the global lock, but lost it because the
+ * vgremove command is removing multiple VGs, and removed the VG
+ * holding the global lock before attempting to remove this VG.
+ * To avoid this situation, the user should remove the VG holding
+ * the global lock in a command by itself, or as the last arg in a
+ * vgremove command that removes multiple VGs.
+ */
+ if (cmd->lockd_gl_removed) {
+ log_error("Global lock failed: global lock was lost by removing a previous VG.");
+ return 0;
+ }
+
+ if (!vg->lock_args || !strlen(vg->lock_args)) {
+ /* Shouldn't happen in general, but maybe in some error cases? */
+ log_debug("_free_vg_sanlock %s no lock_args", vg->name);
+ return 1;
+ }
+
+ reply = _lockd_send("free_vg",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ NULL);
+
+ if (!_lockd_result(reply, &result, &lockd_flags)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ /*
+ * Other hosts could still be joined to the lockspace, which means they
+ * are using the internal sanlock LV, which means we cannot remove the
+ * VG. Once other hosts stop using the VG it can be removed.
+ */
+ if (result == -EBUSY) {
+ log_error("Lockspace for \"%s\" not stopped on other hosts", vg->name);
+ goto out;
+ }
+
+ if (!ret) {
+ log_error("_free_vg_sanlock lvmlockd result %d", result);
+ goto out;
+ }
+
+ /*
+ * If the global lock was been removed by removing this VG, then:
+ *
+ * Print a warning indicating that the global lock should be enabled
+ * in another remaining sanlock VG.
+ *
+ * Do not allow any more VGs to be removed by this command, e.g.
+ * if a command removes two sanlock VGs, like vgremove foo bar,
+ * and the global lock existed in foo, do not continue to remove
+ * VG bar without the global lock. See the corresponding check above.
+ */
+ if (lockd_flags & LD_RF_WARN_GL_REMOVED) {
+ log_warn("VG %s held the sanlock global lock, enable global lock in another VG.", vg->name);
+ cmd->lockd_gl_removed = 1;
+ }
+
+ /*
+ * The usleep delay gives sanlock time to close the lock lv,
+ * and usually avoids having an annoying error printed.
+ */
+ usleep(1000000);
+
+ _deactivate_sanlock_lv(cmd, vg);
+ _remove_sanlock_lv(cmd, vg);
+ out:
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+/*
+ * Tell lvmlockd to forget about an old VG name.
+ * lvmlockd remembers previous lockd VGs so that it can provide more
+ * informative error messages (see INACTIVE_LS, ADD_LS_ERROR).
+ *
+ * If a new local VG is created with the same name as a previous lockd VG,
+ * lvmlockd's memory of the previous lockd VG interferes (causes incorrect
+ * lockd_vg failures).
+ *
+ * We could also remove the list of inactive (old) VG names from lvmlockd,
+ * and then this function would not be needed, but this would also reduce
+ * the ability to have helpful error messages.
+ */
+
+static void _forget_vg_name(struct cmd_context *cmd, struct volume_group *vg)
+{
+ daemon_reply reply;
+
+ if (!_use_lvmlockd)
+ return;
+ if (!_lvmlockd_connected)
+ return;
+
+ reply = _lockd_send("forget_vg_name",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ NULL);
+
+ daemon_reply_destroy(reply);
+}
+
+/* vgcreate */
+
+int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lock_type, int lv_lock_count)
+{
+ switch (get_lock_type_from_string(lock_type)) {
+ case LOCK_TYPE_NONE:
+ _forget_vg_name(cmd, vg);
+ return 1;
+ case LOCK_TYPE_CLVM:
+ return 1;
+ case LOCK_TYPE_DLM:
+ return _init_vg_dlm(cmd, vg);
+ case LOCK_TYPE_SANLOCK:
+ return _init_vg_sanlock(cmd, vg, lv_lock_count);
+ default:
+ log_error("Unknown lock_type.");
+ return 0;
+ }
+}
+
+/* vgremove before the vg is removed */
+
+int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg)
+{
+ switch (get_lock_type_from_string(vg->lock_type)) {
+ case LOCK_TYPE_NONE:
+ case LOCK_TYPE_CLVM:
+ case LOCK_TYPE_DLM:
+ return 1;
+ case LOCK_TYPE_SANLOCK:
+ /* returning an error will prevent vg_remove() */
+ return _free_vg_sanlock(cmd, vg);
+ default:
+ log_error("Unknown lock_type.");
+ return 0;
+ }
+}
+
+/* vgremove after the vg is removed */
+
+void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg)
+{
+ switch (get_lock_type_from_string(vg->lock_type)) {
+ case LOCK_TYPE_NONE:
+ case LOCK_TYPE_CLVM:
+ case LOCK_TYPE_SANLOCK:
+ break;
+ case LOCK_TYPE_DLM:
+ _free_vg_dlm(cmd, vg);
+ break;
+ default:
+ log_error("Unknown lock_type.");
+ }
+}
+
+/*
+ * Starting a vg involves:
+ * 1. reading the vg without a lock
+ * 2. getting the lock_type/lock_args from the vg metadata
+ * 3. doing start_vg in lvmlockd for the lock_type;
+ * this means joining the lockspace
+ *
+ * The vg read in step 1 should not be used for anything
+ * other than getting the lock_type/lock_args/uuid necessary
+ * for starting the lockspace. To use the vg after starting
+ * the lockspace, follow the standard method which is:
+ * lock the vg, read/use/write the vg, unlock the vg.
+ */
+
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+ char uuid[64] __attribute__((aligned(8)));
+ daemon_reply reply;
+ int host_id = 0;
+ int result;
+ int ret;
+
+ memset(uuid, 0, sizeof(uuid));
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+
+ if (!_use_lvmlockd) {
+ log_error("VG %s start failed: lvmlockd is not enabled", vg->name);
+ return 0;
+ }
+ if (!_lvmlockd_connected) {
+ log_error("VG %s start failed: lvmlockd is not running", vg->name);
+ return 0;
+ }
+
+ log_debug("lockd start VG %s lock_type %s",
+ vg->name, vg->lock_type ? vg->lock_type : "empty");
+
+ if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
+ return_0;
+
+ if (vg->lock_type && !strcmp(vg->lock_type, "sanlock")) {
+ /*
+ * This is the big difference between starting
+ * sanlock vgs vs starting dlm vgs: the internal
+ * sanlock lv needs to be activated before lvmlockd
+ * does the start because sanlock needs to use the lv
+ * to access locks.
+ */
+ if (!_activate_sanlock_lv(cmd, vg))
+ return 0;
+
+ host_id = find_config_tree_int(cmd, local_host_id_CFG, NULL);
+ }
+
+ reply = _lockd_send("start_vg",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args ?: "none",
+ "vg_uuid = %s", uuid[0] ? uuid : "none",
+ "version = %d", (int64_t)vg->seqno,
+ "host_id = %d", host_id,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ result = -ELOCKD;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ switch (result) {
+ case 0:
+ log_print_unless_silent("VG %s starting %s lockspace", vg->name, vg->lock_type);
+ break;
+ case -ELOCKD:
+ log_error("VG %s start failed: lvmlockd not available", vg->name);
+ break;
+ case -EEXIST:
+ log_debug("VG %s start error: already started", vg->name);
+ ret = 1;
+ break;
+ case -EARGS:
+ log_error("VG %s start failed: invalid parameters for %s", vg->name, vg->lock_type);
+ break;
+ case -EHOSTID:
+ log_error("VG %s start failed: invalid sanlock host_id, set in lvmlocal.conf", vg->name);
+ break;
+ case -EMANAGER:
+ log_error("VG %s start failed: lock manager %s is not running", vg->name, vg->lock_type);
+ break;
+ case -EPROTONOSUPPORT:
+ log_error("VG %s start failed: lock manager %s is not supported by lvmlockd", vg->name, vg->lock_type);
+ break;
+ default:
+ log_error("VG %s start failed: %d", vg->name, result);
+ }
+
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ log_debug("lockd stop VG %s lock_type %s",
+ vg->name, vg->lock_type ? vg->lock_type : "empty");
+
+ reply = _lockd_send("stop_vg",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ if (result == -ENOLS) {
+ ret = 1;
+ goto out;
+ }
+
+ if (result == -EBUSY) {
+ log_error("VG %s stop failed: LVs must first be deactivated", vg->name);
+ goto out;
+ }
+
+ if (!ret) {
+ log_error("VG %s stop failed: %d", vg->name, result);
+ goto out;
+ }
+
+ if (!strcmp(vg->lock_type, "sanlock")) {
+ log_debug("lockd_stop_vg deactivate sanlock lv");
+ _deactivate_sanlock_lv(cmd, vg);
+ }
+out:
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+int lockd_start_wait(struct cmd_context *cmd)
+{
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ reply = _lockd_send("start_wait",
+ "pid = %d", getpid(),
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ if (!ret)
+ log_error("Lock start failed");
+
+ /*
+ * FIXME: get a list of vgs that started so we can
+ * better report what worked and what didn't?
+ */
+
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+/*
+ * lockd_gl_create() is a variation of lockd_gl() used only by vgcreate.
+ * It handles the case that when using sanlock, the global lock does
+ * not exist until after the first vgcreate is complete, since the global
+ * lock exists on storage within an actual VG. So, the first vgcreate
+ * needs special logic to detect this bootstrap case.
+ *
+ * When the vgcreate is not creating the first VG, then lockd_gl_create()
+ * behaves the same as lockd_gl().
+ *
+ * vgcreate will have a lock_type for the new VG which lockd_gl_create()
+ * can provide in the lock-gl call.
+ *
+ * lockd_gl() and lockd_gl_create() differ in the specific cases where
+ * ENOLS (no lockspace found) is overriden. In the vgcreate case, the
+ * override cases are related to sanlock bootstrap, and the lock_type of
+ * the vg being created is needed.
+ *
+ * 1. vgcreate of the first lockd-type vg calls lockd_gl_create()
+ * to acquire the global lock.
+ *
+ * 2. vgcreate/lockd_gl_create passes gl lock request to lvmlockd,
+ * along with lock_type of the new vg.
+ *
+ * 3. lvmlockd finds no global lockspace/lock.
+ *
+ * 4. dlm:
+ * If the lock_type from vgcreate is dlm, lvmlockd creates the
+ * dlm global lockspace, and queues the global lock request
+ * for vgcreate. lockd_gl_create returns sucess with the gl held.
+ *
+ * sanlock:
+ * If the lock_type from vgcreate is sanlock, lvmlockd returns -ENOLS
+ * with the NO_GL_LS flag. lvmlockd cannot create or acquire a sanlock
+ * global lock until the VG exists on disk (the locks live within the VG).
+ *
+ * lockd_gl_create sees sanlock/ENOLS/NO_GL_LS (and optionally the
+ * "enable" lock-gl arg), determines that this is the sanlock
+ * bootstrap special case, and returns success without the global lock.
+ *
+ * vgcreate creates the VG on disk, and calls lockd_init_vg() which
+ * initializes/enables a global lock on the new VG's internal sanlock lv.
+ * Future lockd_gl/lockd_gl_create calls will acquire the existing gl.
+ */
+
+int lockd_gl_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
+{
+ const char *mode = NULL;
+ uint32_t lockd_flags;
+ int retries = 0;
+ int result;
+
+ /*
+ * There are four variations of creating a local/lockd VG
+ * with/without use_lvmlockd set.
+ *
+ * use_lvmlockd=1, lockd VG:
+ * This function should acquire or create the global lock.
+ *
+ * use_lvmlockd=0, local VG:
+ * This function is a no-op, just returns 1.
+ *
+ * use_lvmlockd=0, lockd VG
+ * An error is returned in vgcreate_params_set_from_args (before this is called).
+ *
+ * use_lvmlockd=1, local VG
+ * This function should acquire the global lock.
+ */
+ if (!_use_lvmlockd) {
+ if (!is_lockd_type(vg_lock_type))
+ return 1;
+ log_error("Cannot create VG with lock_type %s without lvmlockd.", vg_lock_type);
+ return 0;
+ }
+
+ log_debug("lockd global lock_type %s", vg_lock_type);
+
+ if (!mode)
+ mode = def_mode;
+ if (!mode) {
+ log_error("Unknown lock-gl mode");
+ return 0;
+ }
+
+ req:
+ if (!_lockd_request(cmd, "lock_gl",
+ NULL, vg_lock_type, NULL, NULL, NULL, NULL, mode, NULL,
+ &result, &lockd_flags)) {
+ /* No result from lvmlockd, it is probably not running. */
+ log_error("Global lock failed: check that lvmlockd is running.");
+ return 0;
+ }
+
+ if (result == -EAGAIN) {
+ if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
+ log_warn("Retrying %s global lock", mode);
+ sleep(1);
+ retries++;
+ goto req;
+ }
+ }
+
+ /*
+ * ENOLS: no lockspace was found with a global lock.
+ * It may not exist (perhaps this command is creating the first),
+ * or it may not be visible or started on the system yet.
+ */
+
+ if (result == -ENOLS) {
+ if (!strcmp(mode, "un"))
+ return 1;
+
+ /*
+ * This is the sanlock bootstrap condition for proceding
+ * without the global lock: a chicken/egg case for the first
+ * sanlock VG that is created. When creating the first
+ * sanlock VG, there is no global lock to acquire because
+ * the gl will exist in the VG being created. So, we
+ * skip acquiring the global lock when creating this initial
+ * VG, and enable the global lock in this VG.
+ *
+ * This initial bootstrap condition is identified based on
+ * two things:
+ *
+ * 1. No sanlock VGs have been started in lvmlockd, causing
+ * lvmlockd to return NO_GL_LS/NO_LOCKSPACES.
+ *
+ * 2. No sanlock VGs are seen in lvmcache after the disk
+ * scan performed in lvmetad_validate_global_cache().
+ *
+ * If both of those are true, we go ahead and create this new
+ * VG which will have the global lock enabled. However, this
+ * has a shortcoming: another sanlock VG may exist that hasn't
+ * appeared to the system yet. If that VG has its global lock
+ * enabled, then when it appears later, duplicate global locks
+ * will be seen, and a warning will indicate that one of them
+ * should be disabled.
+ *
+ * The two bootstrap conditions have another shortcoming to the
+ * opposite effect: other sanlock VGs may be visible to the
+ * system, but none of them have a global lock enabled.
+ * In that case, it would make sense to create this new VG with
+ * an enabled global lock. (FIXME: we could detect that none
+ * of the existing sanlock VGs have a gl enabled and allow this
+ * vgcreate to go ahead.) Enabling the global lock in one of
+ * the existing sanlock VGs is currently the simplest solution.
+ */
+
+ if ((lockd_flags & LD_RF_NO_GL_LS) &&
+ (lockd_flags & LD_RF_NO_LOCKSPACES) &&
+ !strcmp(vg_lock_type, "sanlock")) {
+ lvmetad_validate_global_cache(cmd, 1);
+ /*
+ * lvmcache holds provisional VG lock_type info because
+ * lvmetad_validate_global_cache did a disk scan.
+ */
+ if (lvmcache_contains_lock_type_sanlock(cmd)) {
+ /* FIXME: we could check that all are started, and then check that none have gl enabled. */
+ log_error("Global lock failed: start existing sanlock VGs to access global lock.");
+ log_error("(If all sanlock VGs are started, enable global lock with lvmlockctl.)");
+ return 0;
+ }
+ log_print_unless_silent("Enabling sanlock global lock");
+ return 1;
+ }
+
+ if (!strcmp(vg_lock_type, "sanlock"))
+ log_error("Global lock failed: check that VG holding global lock exists and is started.");
+ else
+ log_error("Global lock failed: check that global lockspace is started.");
+ return 0;
+ }
+
+ /*
+ * Check for each specific error that can be returned so a helpful
+ * message can be printed for it.
+ */
+ if (result < 0) {
+ if (result == -ESTARTING)
+ log_error("Global lock failed: lockspace is starting.");
+ else if (result == -EAGAIN)
+ log_error("Global lock failed: held by other host.");
+ else if (result == -EPROTONOSUPPORT)
+ log_error("VG create failed: lock manager %s is not supported by lvmlockd.", vg_lock_type);
+ else
+ log_error("Global lock failed: error %d", result);
+ return 0;
+ }
+
+ lvmetad_validate_global_cache(cmd, 1);
+
+ return 1;
+}
+
+/*
+ * The global lock protects:
+ *
+ * - The global VG namespace. Two VGs cannot have the same name.
+ * Used by any command that creates or removes a VG name,
+ * e.g. vgcreate, vgremove, vgrename, vgsplit, vgmerge.
+ *
+ * - The set of orphan PVs.
+ * Used by any command that changes a non-PV device into an orphan PV,
+ * an orphan PV into a device, a non-orphan PV (in a VG) into an orphan PV
+ * (not in a VG), or an orphan PV into a non-orphan PV,
+ * e.g. pvcreate, pvremove, vgcreate, vgremove, vgextend, vgreduce.
+ *
+ * - The properties of orphan PVs. It is possible to make changes to the
+ * properties of an orphan PV, e.g. pvresize, pvchange.
+ *
+ * These are things that cannot be protected by a VG lock alone, since
+ * orphan PVs do not belong to a real VG (an artificial VG does not
+ * apply since a sanlock lock only exists on real storage.)
+ *
+ * If a command will change any of the things above, it must first acquire
+ * the global lock in exclusive mode.
+ *
+ * If command is reading any of the things above, it must acquire the global
+ * lock in shared mode. A number of commands read the things above, including:
+ *
+ * - Reporting/display commands which show all VGs. Any command that
+ * will iterate through the entire VG namespace must first acquire the
+ * global lock shared so that it has an accurate view of the namespace.
+ *
+ * - A command where a tag name is used to identify what to process.
+ * A tag requires reading all VGs to check if they match the tag.
+ *
+ * In these cases, the global lock must be acquired before the list of
+ * all VGs is created.
+ *
+ * The global lock is not generally unlocked explicitly in the code.
+ * When the command disconnects from lvmlockd, lvmlockd automatically
+ * releases the locks held by the command. The exception is if a command
+ * will continue running for a long time while not needing the global lock,
+ * e.g. commands that poll to report progress.
+ *
+ * Acquiring the global lock also updates the local lvmetad cache if
+ * necessary. lockd_gl() first acquires the lock via lvmlockd, then
+ * before returning to the caller, it checks that the global information
+ * (e.g. VG namespace, set of orphans) is up to date in lvmetad. If
+ * not, it scans disks and updates the lvmetad cache before returning
+ * to the caller. It does this checking using a version number associated
+ * with the global lock. The version number is incremented each time
+ * a change is made to the state associated with the global lock, and
+ * if the local version number is lower than the version number in the
+ * lock, then the local lvmetad state must be updated.
+ *
+ * There are two cases where the global lock can be taken in shared mode,
+ * and then later converted to ex. pvchange and pvresize use process_each_pv
+ * which does lockd_gl("sh") to get the list of VGs. Later, in the "_single"
+ * function called within process_each_pv, the PV may be an orphan, in which
+ * case the ex global lock is needed, so it's converted to ex at that point.
+ *
+ * Effects of misconfiguring use_lvmlockd.
+ *
+ * - Setting use_lvmlockd=1 tells lvm commands to use the global lock.
+ * This should not be set unless a lock manager and lockd VGs will
+ * be used. Setting use_lvmlockd=1 without setting up a lock manager
+ * or using lockd VGs will cause lvm commands to fail when they attempt
+ * to change any global state (requiring the ex global lock), and will
+ * cause warnings when the commands read global state (requiring the sh
+ * global lock). In this condition, lvm is nominally useful, and existing
+ * local VGs can continue to be used mostly as usual. But, the
+ * warnings/errors should lead a user to either set up a lock manager
+ * and lockd VGs, or set use_lvmlockd to 0.
+ *
+ * - Setting use_lvmlockd=0 tells lvm commands to not use the global lock.
+ * If use_lvmlockd=0 when lockd VGs exist which require lvmlockd, the
+ * lockd_gl() calls become no-ops, but the lockd_vg() calls for the lockd
+ * VGs will fail. The warnings/errors from accessing the lockd VGs
+ * should lead the user to set use_lvmlockd to 1 and run the necessary
+ * lock manager. In this condition, lvm reverts to the behavior of
+ * the following case, in which system ID largely protects shared
+ * devices, but has limitations.
+ *
+ * - Setting use_lvmlockd=0 with shared devices, no lockd VGs and
+ * no lock manager is a recognized mode of operation that is
+ * described in the lvmsystemid man page. Using lvm on shared
+ * devices this way is made safe by using system IDs to assign
+ * ownership of VGs to single hosts. The main limitation of this
+ * mode (among others outlined in the man page), is that orphan PVs
+ * are unprotected.
+ */
+
+int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
+{
+ const char *mode = NULL;
+ const char *opts = NULL;
+ uint32_t lockd_flags;
+ int force_cache_update = 0;
+ int retries = 0;
+ int result;
+
+ if (!_use_lvmlockd)
+ return 1;
+
+ /*
+ * Verify that when --readonly is used, no ex locks should be used.
+ */
+ if (cmd->metadata_read_only && def_mode && !strcmp(def_mode, "ex")) {
+ log_error("Exclusive locks are not allowed with readonly option.");
+ return 0;
+ }
+
+ if (cmd->lockd_gl_disable)
+ return 1;
+
+ if (def_mode && !strcmp(def_mode, "un")) {
+ mode = "un";
+ goto req;
+ }
+
+ if (!mode)
+ mode = def_mode;
+ if (!mode) {
+ log_error("Unknown lock-gl mode");
+ return 0;
+ }
+
+ req:
+ log_debug("lockd global mode %s", mode);
+
+ if (!_lockd_request(cmd, "lock_gl",
+ NULL, NULL, NULL, NULL, NULL, NULL, mode, opts,
+ &result, &lockd_flags)) {
+ /* No result from lvmlockd, it is probably not running. */
+
+ /* We don't care if an unlock fails. */
+ if (!strcmp(mode, "un"))
+ return 1;
+
+ /* We can continue reading if a shared lock fails. */
+ if (!strcmp(mode, "sh")) {
+ log_warn("Reading without shared global lock.");
+ force_cache_update = 1;
+ goto allow;
+ }
+
+ log_error("Global lock failed: check that lvmlockd is running.");
+ return 0;
+ }
+
+ if (result == -EAGAIN) {
+ if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
+ log_warn("Retrying %s global lock", mode);
+ sleep(1);
+ retries++;
+ goto req;
+ }
+ }
+
+ /*
+ * ENOLS: no lockspace was found with a global lock.
+ * The VG with the global lock may not be visible or started yet,
+ * this should be a temporary condition.
+ *
+ * ESTARTING: the lockspace with the gl is starting.
+ * The VG with the global lock is starting and should finish shortly.
+ *
+ * ELOCKIO: sanlock gets i/o errors when trying to read/write leases
+ * (This can progress to EVGKILLED.)
+ *
+ * EVGKILLED: the sanlock lockspace is being killed after losing
+ * access to lease storage.
+ */
+
+ if (result == -ENOLS ||
+ result == -ESTARTING ||
+ result == -EVGKILLED ||
+ result == -ELOCKIO) {
+
+ if (!strcmp(mode, "un"))
+ return 1;
+
+ /*
+ * If an ex global lock fails, then the command fails.
+ */
+ if (strcmp(mode, "sh")) {
+ if (result == -ESTARTING)
+ log_error("Global lock failed: lockspace is starting");
+ else if (result == -ENOLS)
+ log_error("Global lock failed: check that global lockspace is started");
+ else if (result == -ELOCKIO)
+ log_error("Global lock failed: storage errors for sanlock leases");
+ else if (result == -EVGKILLED)
+ log_error("Global lock failed: storage failed for sanlock leases");
+ else
+ log_error("Global lock failed: error %d", result);
+ return 0;
+ }
+
+ /*
+ * If a sh global lock fails, then the command can continue
+ * reading without it, but force a global cache validation,
+ * and print a warning.
+ */
+
+ if (result == -ESTARTING) {
+ log_warn("Skipping global lock: lockspace is starting");
+ force_cache_update = 1;
+ goto allow;
+ }
+
+ if (result == -ELOCKIO || result == -EVGKILLED) {
+ log_warn("Skipping global lock: storage %s for sanlock leases",
+ result == -ELOCKIO ? "errors" : "failed");
+ force_cache_update = 1;
+ goto allow;
+ }
+
+ if ((lockd_flags & LD_RF_NO_GL_LS) || (lockd_flags & LD_RF_NO_LOCKSPACES)) {
+ log_warn("Skipping global lock: lockspace not found or started");
+ force_cache_update = 1;
+ goto allow;
+ }
+
+ /*
+ * This is for completeness. If we reach here, then
+ * a specific check for the error should be added above
+ * with a more helpful message.
+ */
+ log_error("Global lock failed: error %d", result);
+ return 0;
+ }
+
+ if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
+ log_warn("Duplicate sanlock global locks should be corrected");
+
+ if (result < 0) {
+ if (result == -EAGAIN) {
+ /*
+ * Most of the time, retries should avoid this case.
+ */
+ log_error("Global lock failed: held by other host.");
+ return 0;
+ } else {
+ /*
+ * We don't intend to reach this. We should check
+ * any known/possible error specifically and print
+ * a more helpful message. This is for completeness.
+ */
+ log_error("Global lock failed: error %d.", result);
+ return 0;
+ }
+ }
+
+ allow:
+ lvmetad_validate_global_cache(cmd, force_cache_update);
+ return 1;
+}
+
+/*
+ * VG lock
+ *
+ * Return 1: continue, lockd_state may still indicate an error
+ * Return 0: failure, do not continue
+ *
+ * lvmlockd could also return the lock_type that it used for the VG,
+ * and we could encode that in lockd_state, and verify later that it
+ * matches vg->lock_type.
+ *
+ * The result of the VG lock operation needs to be saved in lockd_state
+ * because the result needs to be passed into vg_read so it can be
+ * assessed in combination with vg->lock_type.
+ *
+ * The VG lock protects the VG metadata on disk from concurrent access
+ * among hosts. The VG lock also ensures that the local lvmetad cache
+ * contains the latest version of the VG metadata from disk. (Since
+ * another host may have changed the VG since it was last read.)
+ *
+ * The VG lock must be acquired before the VG is read, i.e. before vg_read().
+ * The result from lockd_vg() is saved in the "lockd_state" variable, and
+ * this result is passed into vg_read(). After vg_read() reads the VG,
+ * it checks if the VG lock_type (sanlock or dlm) requires a lock to be
+ * held, and if so, it verifies that the lock was correctly acquired by
+ * looking at lockd_state. If vg_read() sees that the VG is a local VG,
+ * i.e. lock_type is not sanlock or dlm, then no lock is required, and it
+ * ignores lockd_state (which would indicate no lock was found.)
+ *
+ * When acquiring the VG lock, lvmlockd checks if the local cached copy
+ * of the VG metadata in lvmetad is up to date. If not, it invalidates
+ * the VG cached in lvmetad. This would happen if another host changed
+ * the VG since it was last read. When lvm commands read the VG from
+ * lvmetad, they will check if the metadata is invalid, and if so they
+ * will reread it from disk, and update the copy in lvmetad.
+ */
+
+int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
+ uint32_t flags, uint32_t *lockd_state)
+{
+ const char *mode = NULL;
+ uint32_t lockd_flags;
+ uint32_t prev_state = *lockd_state;
+ int retries = 0;
+ int result;
+ int ret;
+
+ *lockd_state = 0;
+
+ if (!is_real_vg(vg_name))
+ return 1;
+
+ /*
+ * Verify that when --readonly is used, no ex locks should be used.
+ */
+ if (cmd->metadata_read_only &&
+ ((def_mode && !strcmp(def_mode, "ex")) ||
+ (!def_mode && !cmd->lockd_vg_default_sh))) {
+ log_error("Exclusive locks are not allowed with readonly option.");
+ return 0;
+ }
+
+ /*
+ * Some special cases need to disable the vg lock.
+ */
+ if (cmd->lockd_vg_disable)
+ return 1;
+
+ /*
+ * An unlock is simply sent or skipped without any need
+ * for the mode checking for sh/ex.
+ *
+ * Look at lockd_state from the sh/ex lock, and if it failed,
+ * don't bother sending the unlock to lvmlockd. The main
+ * purpose of this is to avoid sending an unnecessary unlock
+ * for local VGs (the lockd_state from sh/ex on the local VG
+ * will be failed.) This implies that the lockd_state value
+ * should be preserved from the sh/ex lockd_vg() call and
+ * passed back to lockd_vg() for the corresponding unlock.
+ */
+ if (def_mode && !strcmp(def_mode, "un")) {
+ if (prev_state & LDST_FAIL) {
+ log_debug("VG %s unlock skipped: lockd_state is failed", vg_name);
+ return 1;
+ }
+
+ mode = "un";
+ goto req;
+ }
+
+ /*
+ * The default mode may not have been provided in the
+ * function args. This happens when lockd_vg is called
+ * from a process_each function that handles different
+ * commands. Commands that only read/check/report/display
+ * the vg have LOCKD_VG_SH set in commands.h, which is
+ * copied to lockd_vg_default_sh. Commands without this
+ * set modify the vg and need ex.
+ */
+ if (!mode)
+ mode = def_mode;
+ if (!mode)
+ mode = cmd->lockd_vg_default_sh ? "sh" : "ex";
+
+ if (!strcmp(mode, "ex"))
+ *lockd_state |= LDST_EX;
+
+ req:
+ /*
+ * This check is not at the top of the function so that
+ * we can first set LDST_EX which will be used later to
+ * decide whether a failure can be ignored or not.
+ *
+ * We do not know if this is a local VG or lockd VG yet,
+ * so we must return success, go ahead and read the VG,
+ * then check if the lock_type required lvmlockd or not.
+ */
+ if (!_use_lvmlockd) {
+ *lockd_state |= LDST_FAIL_REQUEST;
+ return 1;
+ }
+
+ log_debug("lockd VG %s mode %s", vg_name, mode);
+
+ if (!_lockd_request(cmd, "lock_vg",
+ vg_name, NULL, NULL, NULL, NULL, NULL, mode, NULL,
+ &result, &lockd_flags)) {
+ /*
+ * No result from lvmlockd, it is probably not running.
+ * Decide if it is ok to continue without a lock in
+ * access_vg_lock_type() after the VG has been read and
+ * the lock_type can be checked. We don't care about
+ * this error for local VGs, but we do care for lockd VGs.
+ */
+ *lockd_state |= LDST_FAIL_REQUEST;
+ return 1;
+ }
+
+ if (result == -EAGAIN) {
+ if (retries < find_config_tree_int(cmd, global_lvmlockd_lock_retries_CFG, NULL)) {
+ log_warn("Retrying %s lock on VG %s", mode, vg_name);
+ sleep(1);
+ retries++;
+ goto req;
+ }
+ }
+
+ switch (result) {
+ case 0:
+ /* success */
+ break;
+ case -ENOLS:
+ *lockd_state |= LDST_FAIL_NOLS;
+ break;
+ case -ESTARTING:
+ *lockd_state |= LDST_FAIL_STARTING;
+ break;
+ default:
+ *lockd_state |= LDST_FAIL_OTHER;
+ }
+
+ /*
+ * Normal success.
+ */
+ if (!result) {
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * The lockspace for the VG is starting (the VG must not
+ * be local), and is not yet ready to do locking. Allow
+ * reading without a sh lock during this period.
+ */
+ if (result == -ESTARTING) {
+ if (!strcmp(mode, "un")) {
+ ret = 1;
+ goto out;
+ } else if (!strcmp(mode, "sh")) {
+ log_warn("VG %s lock skipped: lock start in progress", vg_name);
+ ret = 1;
+ goto out;
+ } else {
+ log_error("VG %s lock failed: lock start in progress", vg_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * sanlock is getting i/o errors while reading/writing leases, or the
+ * lockspace/VG is being killed after failing to renew its lease for
+ * too long.
+ */
+ if (result == -EVGKILLED || result == -ELOCKIO) {
+ const char *problem = (result == -ELOCKIO ? "errors" : "failed");
+
+ if (!strcmp(mode, "un")) {
+ ret = 1;
+ goto out;
+ } else if (!strcmp(mode, "sh")) {
+ log_warn("VG %s lock skipped: storage %s for sanlock leases", vg_name, problem);
+ ret = 1;
+ goto out;
+ } else {
+ log_error("VG %s lock failed: storage %s for sanlock leases", vg_name, problem);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * An unused/previous lockspace for the VG was found.
+ * This means it must be a lockd VG, not local. The
+ * lockspace needs to be started to be used.
+ */
+ if ((result == -ENOLS) && (lockd_flags & LD_RF_INACTIVE_LS)) {
+ if (!strcmp(mode, "un")) {
+ ret = 1;
+ goto out;
+ } else if (!strcmp(mode, "sh")) {
+ log_warn("VG %s lock skipped: lockspace is inactive", vg_name);
+ ret = 1;
+ goto out;
+ } else {
+ log_error("VG %s lock failed: lockspace is inactive", vg_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * An unused lockspace for the VG was found. The previous
+ * start of the lockspace failed, so we can print a more useful
+ * error message.
+ */
+ if ((result == -ENOLS) && (lockd_flags & LD_RF_ADD_LS_ERROR)) {
+ if (!strcmp(mode, "un")) {
+ ret = 1;
+ goto out;
+ } else if (!strcmp(mode, "sh")) {
+ log_warn("VG %s lock skipped: lockspace start error", vg_name);
+ ret = 1;
+ goto out;
+ } else {
+ log_error("VG %s lock failed: lockspace start error", vg_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * No lockspace for the VG was found. It may be a local
+ * VG that lvmlockd doesn't keep track of, or it may be
+ * a lockd VG that lvmlockd doesn't yet know about (it hasn't
+ * been started yet.) Decide what to do after the VG is
+ * read and we can see the lock_type.
+ */
+ if (result == -ENOLS) {
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * Another error. We don't intend to reach here, but
+ * want to check for each specific error above so that
+ * a helpful message can be printed.
+ */
+ if (result) {
+ if (!strcmp(mode, "un")) {
+ ret = 1;
+ goto out;
+ } else if (!strcmp(mode, "sh")) {
+ log_warn("VG %s lock skipped: error %d", vg_name, result);
+ ret = 1;
+ goto out;
+ } else {
+ log_error("VG %s lock failed: error %d", vg_name, result);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ /*
+ * A notice from lvmlockd that duplicate gl locks have been found.
+ * It would be good for the user to disable one of them.
+ */
+ if ((lockd_flags & LD_RF_DUP_GL_LS) && strcmp(mode, "un"))
+ log_warn("Duplicate sanlock global lock in VG %s", vg_name);
+
+ return ret;
+}
+
+/*
+ * This must be called before a new version of the VG metadata is
+ * written to disk. For local VGs, this is a no-op, but for lockd
+ * VGs, this notifies lvmlockd of the new VG seqno. lvmlockd must
+ * know the latest VG seqno so that it can save it within the lock's
+ * LVB. The VG seqno in the VG lock's LVB is used by other hosts to
+ * detect when their cached copy of the VG metadata is stale, i.e.
+ * the cached VG metadata has a lower seqno than the seqno seen in
+ * the VG lock.
+ */
+
+int lockd_vg_update(struct volume_group *vg)
+{
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ reply = _lockd_send("vg_update",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "version = %d", (int64_t)vg->seqno,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ daemon_reply_destroy(reply);
+ return ret;
+}
+
+/*
+ * When this is called directly (as opposed to being called from
+ * lockd_lv), the caller knows that the LV has a lock.
+ */
+
+int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id,
+ const char *lock_args, const char *def_mode, uint32_t flags)
+{
+ char lv_uuid[64] __attribute__((aligned(8)));
+ const char *mode = NULL;
+ const char *opts = NULL;
+ uint32_t lockd_flags;
+ int refreshed = 0;
+ int result;
+
+ /*
+ * Verify that when --readonly is used, no LVs should be activated or used.
+ */
+ if (cmd->metadata_read_only) {
+ log_error("LV locks are not allowed with readonly option.");
+ return 0;
+ }
+
+ if (cmd->lockd_lv_disable)
+ return 1;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
+ return_0;
+
+ /*
+ * For lvchange/vgchange activation, def_mode is "sh" or "ex"
+ * according to the specific -a{e,s}y mode designation.
+ * No e,s designation gives NULL def_mode.
+ */
+
+ if (def_mode)
+ mode = def_mode;
+
+ if (mode && !strcmp(mode, "sh") && (flags & LDLV_MODE_NO_SH)) {
+ log_error("Shared activation not compatible with LV type: %s/%s",
+ vg->name, lv_name);
+ return 0;
+ }
+
+ if (!mode)
+ mode = "ex";
+
+ if (flags & LDLV_PERSISTENT)
+ opts = "persistent";
+
+ retry:
+ log_debug("lockd LV %s/%s mode %s uuid %s", vg->name, lv_name, mode, lv_uuid);
+
+ if (!_lockd_request(cmd, "lock_lv",
+ vg->name, vg->lock_type, vg->lock_args,
+ lv_name, lv_uuid, lock_args, mode, opts,
+ &result, &lockd_flags)) {
+ /* No result from lvmlockd, it is probably not running. */
+ log_error("Locking failed for LV %s/%s", vg->name, lv_name);
+ return 0;
+ }
+
+ /* The lv was not active/locked. */
+ if (result == -ENOENT && !strcmp(mode, "un"))
+ return 1;
+
+ if (result == -EALREADY)
+ return 1;
+
+ if (result == -EAGAIN) {
+ log_error("LV locked by other host: %s/%s", vg->name, lv_name);
+ return 0;
+ }
+
+ if (result == -EMSGSIZE) {
+ /* Another host probably extended lvmlock. */
+ if (!refreshed++) {
+ log_debug("Refresh lvmlock");
+ _refresh_sanlock_lv(cmd, vg);
+ goto retry;
+ }
+ }
+
+ if (result == -ENOLS) {
+ log_error("LV %s/%s lock failed: lockspace is inactive", vg->name, lv_name);
+ return 0;
+ }
+
+ if (result == -EVGKILLED || result == -ELOCKIO) {
+ const char *problem = (result == -ELOCKIO ? "errors" : "failed");
+ log_error("LV %s/%s lock failed: storage %s for sanlock leases", vg->name, lv_name, problem);
+ return 0;
+ }
+
+ if (result < 0) {
+ log_error("LV %s/%s lock failed: error %d", vg->name, lv_name, result);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Direct the lock request to the pool LV.
+ * For a thin pool and all its thin volumes, one ex lock is used.
+ * It is the one specified in metadata of the pool data lv.
+ */
+
+static int _lockd_lv_thin(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *def_mode, uint32_t flags)
+{
+ struct logical_volume *pool_lv;
+
+ if (lv_is_thin_volume(lv)) {
+ struct lv_segment *pool_seg = first_seg(lv);
+ pool_lv = pool_seg ? pool_seg->pool_lv : NULL;
+
+ } else if (lv_is_thin_pool(lv)) {
+ pool_lv = lv;
+
+ } else {
+ /* This should not happen AFAIK. */
+ log_error("Lock on incorrect thin lv type %s/%s",
+ lv->vg->name, lv->name);
+ return 0;
+ }
+
+ if (!pool_lv) {
+ /* This should not happen. */
+ log_error("Cannot find thin pool for %s/%s",
+ lv->vg->name, lv->name);
+ return 0;
+ }
+
+ /*
+ * Locking a locked lv (pool in this case) is a no-op.
+ * Unlock when the pool is no longer active.
+ */
+
+ if (def_mode && !strcmp(def_mode, "un") && pool_is_active(pool_lv))
+ return 1;
+
+ flags |= LDLV_MODE_NO_SH;
+
+ return lockd_lv_name(cmd, pool_lv->vg, pool_lv->name, &pool_lv->lvid.id[1],
+ pool_lv->lock_args, def_mode, flags);
+}
+
+/*
+ * If the VG has no lock_type, then this function can return immediately.
+ * The LV itself may have no lock (NULL lv->lock_args), but the lock request
+ * may be directed to another lock, e.g. the pool LV lock in _lockd_lv_thin.
+ * If the lock request is not directed to another LV, and the LV has no
+ * lock_type set, it means that the LV has no lock, and no locking is done
+ * for it.
+ *
+ * An LV lock is acquired before the LV is activated, and released
+ * after the LV is deactivated. If the LV lock cannot be acquired,
+ * it means that the LV is active on another host and the activation
+ * fails. Commands that modify an inactive LV also acquire the LV lock.
+ *
+ * In non-lockd VGs, this is a no-op.
+ *
+ * In lockd VGs, normal LVs each have their own lock, but other
+ * LVs do not have their own lock, e.g. the lock for a thin LV is
+ * acquired on the thin pool LV, and a thin LV does not have a lock
+ * of its own. A cache pool LV does not have a lock of its own.
+ * When the cache pool LV is linked to an origin LV, the lock of
+ * the orgin LV protects the combined origin + cache pool.
+ */
+
+int lockd_lv(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *def_mode, uint32_t flags)
+{
+ if (!is_lockd_type(lv->vg->lock_type))
+ return 1;
+
+ if (!_use_lvmlockd) {
+ log_error("LV in VG %s with lock_type %s requires lvmlockd.",
+ lv->vg->name, lv->vg->lock_type);
+ return 0;
+ }
+
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (lv_is_thin_type(lv))
+ return _lockd_lv_thin(cmd, lv, def_mode, flags);
+
+ /*
+ * An LV with NULL lock_args does not have a lock of its own.
+ */
+ if (!lv->lock_args)
+ return 1;
+
+ /*
+ * LV type cannot be active concurrently on multiple hosts,
+ * so shared mode activation is not allowed.
+ */
+ if (lv_is_external_origin(lv) ||
+ lv_is_thin_type(lv) ||
+ lv_is_mirror_type(lv) ||
+ lv_is_raid_type(lv) ||
+ lv_is_cache_type(lv)) {
+ flags |= LDLV_MODE_NO_SH;
+ }
+
+ return lockd_lv_name(cmd, lv->vg, lv->name, &lv->lvid.id[1],
+ lv->lock_args, def_mode, flags);
+}
+
+static int _init_lv_sanlock(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id,
+ const char **lock_args_ret)
+{
+ char lv_uuid[64] __attribute__((aligned(8)));
+ daemon_reply reply;
+ const char *reply_str;
+ const char *lv_lock_args = NULL;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
+ return_0;
+
+ reply = _lockd_send("init_lv",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "lv_name = %s", lv_name,
+ "lv_uuid = %s", lv_uuid,
+ "vg_lock_type = %s", "sanlock",
+ "vg_lock_args = %s", vg->lock_args,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ if (result == -EEXIST) {
+ log_error("Lock already exists for LV %s/%s", vg->name, lv_name);
+ goto out;
+ }
+
+ if (result == -EMSGSIZE) {
+ /*
+ * No space on the lvmlock lv for a new lease, this should be
+ * detected by handle_sanlock_lv() called before.
+ */
+ log_error("No sanlock space for lock for LV %s/%s", vg->name, lv_name);
+ goto out;
+ }
+
+ if (!ret) {
+ log_error("_init_lv_sanlock lvmlockd result %d", result);
+ goto out;
+ }
+
+ if (!(reply_str = daemon_reply_str(reply, "lv_lock_args", NULL))) {
+ log_error("lv_lock_args not returned");
+ ret = 0;
+ goto out;
+ }
+
+ if (!(lv_lock_args = dm_pool_strdup(cmd->mem, reply_str))) {
+ log_error("lv_lock_args allocation failed");
+ ret = 0;
+ }
+out:
+ daemon_reply_destroy(reply);
+
+ *lock_args_ret = lv_lock_args;
+ return ret;
+}
+
+static int _free_lv(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id, const char *lock_args)
+{
+ char lv_uuid[64] __attribute__((aligned(8)));
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (!id_write_format(lv_id, lv_uuid, sizeof(lv_uuid)))
+ return_0;
+
+ reply = _lockd_send("free_lv",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "lv_name = %s", lv_name,
+ "lv_uuid = %s", lv_uuid,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ "lv_lock_args = %s", lock_args ?: "none",
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ if (!ret)
+ log_error("_free_lv lvmlockd result %d", result);
+
+ daemon_reply_destroy(reply);
+
+ return ret;
+}
+
+int lockd_init_lv_args(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv,
+ const char *lock_type, const char **lock_args)
+{
+ /* sanlock is the only lock type that sets per-LV lock_args. */
+ if (!strcmp(lock_type, "sanlock"))
+ return _init_lv_sanlock(cmd, vg, lv->name, &lv->lvid.id[1], lock_args);
+ return 1;
+}
+
+/*
+ * lvcreate
+ *
+ * An LV created in a lockd VG inherits the lock_type of the VG. In some
+ * cases, e.g. thin LVs, this function may decide that the LV should not be
+ * given a lock, in which case it sets lp lock_args to NULL, which will cause
+ * the LV to not have lock_args set in its metadata. A lockd_lv() request on
+ * an LV with no lock_args will do nothing (unless the LV type causes the lock
+ * request to be directed to another LV with a lock, e.g. to the thin pool LV
+ * for thin LVs.)
+ *
+ * Current limitations:
+ * - cache-type LV's in a lockd VG must be created with lvconvert.
+ * - creating a thin pool and thin lv in one command is not allowed.
+ */
+
+int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logical_volume *lv,
+ struct lvcreate_params *lp)
+{
+ int lock_type_num = get_lock_type_from_string(vg->lock_type);
+
+ switch (lock_type_num) {
+ case LOCK_TYPE_NONE:
+ case LOCK_TYPE_CLVM:
+ return 1;
+ case LOCK_TYPE_SANLOCK:
+ case LOCK_TYPE_DLM:
+ break;
+ default:
+ log_error("lockd_init_lv: unknown lock_type.");
+ return 0;
+ }
+
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (!lp->needs_lockd_init) {
+ /* needs_lock_init is set for LVs that need a lockd lock. */
+ return 1;
+
+ } else if (seg_is_cache(lp) || seg_is_cache_pool(lp)) {
+ log_error("Use lvconvert for cache with lock type %s", vg->lock_type);
+ return 0;
+
+ } else if (!seg_is_thin_volume(lp) && lp->snapshot) {
+ struct logical_volume *origin_lv;
+
+ /*
+ * COW snapshots are associated with their origin LV,
+ * and only the origin LV needs its own lock, which
+ * represents itself and all associated cow snapshots.
+ */
+
+ if (!(origin_lv = find_lv(vg, lp->origin_name))) {
+ log_error("Failed to find origin LV %s/%s", vg->name, lp->origin_name);
+ return 0;
+ }
+ if (!lockd_lv(cmd, origin_lv, "ex", LDLV_PERSISTENT)) {
+ log_error("Failed to lock origin LV %s/%s", vg->name, lp->origin_name);
+ return 0;
+ }
+ lv->lock_args = NULL;
+ return 1;
+
+ } else if (seg_is_thin(lp)) {
+ if ((seg_is_thin_volume(lp) && !lp->create_pool) ||
+ (!seg_is_thin_volume(lp) && lp->snapshot)) {
+ struct lv_list *lvl;
+
+ /*
+ * Creating a new thin lv or snapshot. These lvs do not get
+ * their own lock but use the pool lock. If an lv does not
+ * use its own lock, its lock_args is set to NULL.
+ */
+
+ if (!(lvl = find_lv_in_vg(vg, lp->pool_name))) {
+ log_error("Failed to find thin pool %s/%s", vg->name, lp->pool_name);
+ return 0;
+ }
+ if (!lockd_lv(cmd, lvl->lv, "ex", LDLV_PERSISTENT)) {
+ log_error("Failed to lock thin pool %s/%s", vg->name, lp->pool_name);
+ return 0;
+ }
+ lv->lock_args = NULL;
+ return 1;
+
+ } else if (seg_is_thin_volume(lp) && lp->create_pool) {
+ /*
+ * Creating a thin pool and a thin lv in it. We could
+ * probably make this work.
+ */
+ log_error("Create thin pool and thin LV separately with lock type %s",
+ vg->lock_type);
+ return 0;
+
+ } else if (!seg_is_thin_volume(lp) && lp->create_pool) {
+ /* Creating a thin pool only. */
+ /* lv_name_lock = lp->pool_name; */
+
+ } else {
+ log_error("Unknown thin options for lock init.");
+ return 0;
+ }
+
+ } else {
+ /* Creating a normal lv. */
+ /* lv_name_lock = lv_name; */
+ }
+
+ /*
+ * The LV gets its own lock, so set lock_args to non-NULL.
+ *
+ * lockd_init_lv_args() will be called during vg_write()
+ * to complete the sanlock LV lock initialization, where
+ * actual space on disk is allocated. Waiting to do this
+ * last step until vg_write() avoids the need to revert
+ * the sanlock allocation if the lvcreate function isn't
+ * completed.
+ *
+ * This works, but would leave the sanlock lease allocated
+ * unless the lease was freed on each early exit path from
+ * lvcreate:
+ *
+ * return lockd_init_lv_args(cmd, vg, lv_name_lock, lv_id,
+ * vg->lock_type, &lv->lock_args);
+ */
+
+ if (!strcmp(vg->lock_type, "sanlock"))
+ lv->lock_args = "pending";
+ else if (!strcmp(vg->lock_type, "dlm"))
+ lv->lock_args = "dlm";
+
+ return 1;
+}
+
+/* lvremove */
+
+int lockd_free_lv(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id, const char *lock_args)
+{
+ switch (get_lock_type_from_string(vg->lock_type)) {
+ case LOCK_TYPE_NONE:
+ case LOCK_TYPE_CLVM:
+ return 1;
+ case LOCK_TYPE_DLM:
+ case LOCK_TYPE_SANLOCK:
+ if (!lock_args)
+ return 1;
+ return _free_lv(cmd, vg, lv_name, lv_id, lock_args);
+ default:
+ log_error("lockd_free_lv: unknown lock_type.");
+ return 0;
+ }
+}
+
+int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
+{
+ struct lv_list *lvl;
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (lvs_in_vg_activated(vg)) {
+ log_error("LVs must be inactive before vgrename.");
+ return 0;
+ }
+
+ /* Check that no LVs are active on other hosts. */
+
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (!lockd_lv(cmd, lvl->lv, "ex", 0)) {
+ log_error("LV %s/%s must be inactive on all hosts before vgrename.",
+ vg->name, lvl->lv->name);
+ return 0;
+ }
+
+ if (!lockd_lv(cmd, lvl->lv, "un", 0)) {
+ log_error("Failed to unlock LV %s/%s.", vg->name, lvl->lv->name);
+ return 0;
+ }
+ }
+
+ /*
+ * lvmlockd:
+ * checks for other hosts in lockspace
+ * leaves the lockspace
+ */
+
+ reply = _lockd_send("rename_vg_before",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ daemon_reply_destroy(reply);
+
+ if (!ret) {
+ log_error("lockd_rename_vg_before lvmlockd result %d", result);
+ return 0;
+ }
+
+ if (!strcmp(vg->lock_type, "sanlock")) {
+ log_debug("lockd_rename_vg_before deactivate sanlock lv");
+ _deactivate_sanlock_lv(cmd, vg);
+ }
+
+ return 1;
+}
+
+int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int success)
+{
+ daemon_reply reply;
+ int result;
+ int ret;
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+ if (!_use_lvmlockd)
+ return 0;
+ if (!_lvmlockd_connected)
+ return 0;
+
+ if (!success) {
+ /*
+ * Depending on the problem that caused the rename to
+ * fail, it may make sense to not restart the VG here.
+ */
+ if (!lockd_start_vg(cmd, vg))
+ log_error("Failed to restart VG %s lockspace.", vg->name);
+ return 1;
+ }
+
+ if (!strcmp(vg->lock_type, "sanlock")) {
+ if (!_activate_sanlock_lv(cmd, vg))
+ return 0;
+
+ /*
+ * lvmlockd needs to rewrite the leases on disk
+ * with the new VG (lockspace) name.
+ */
+ reply = _lockd_send("rename_vg_final",
+ "pid = %d", getpid(),
+ "vg_name = %s", vg->name,
+ "vg_lock_type = %s", vg->lock_type,
+ "vg_lock_args = %s", vg->lock_args,
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ ret = 0;
+ } else {
+ ret = (result < 0) ? 0 : 1;
+ }
+
+ daemon_reply_destroy(reply);
+
+ if (!ret) {
+ /*
+ * The VG has been renamed on disk, but renaming the
+ * sanlock leases failed. Cleaning this up can
+ * probably be done by converting the VG to lock_type
+ * none, then converting back to sanlock.
+ */
+ log_error("lockd_rename_vg_final lvmlockd result %d", result);
+ return 0;
+ }
+ }
+
+ if (!lockd_start_vg(cmd, vg))
+ log_error("Failed to start VG %s lockspace.", vg->name);
+
+ return 1;
+}
+
+const char *lockd_running_lock_type(struct cmd_context *cmd)
+{
+ daemon_reply reply;
+ const char *lock_type = NULL;
+ int result;
+
+ if (!_use_lvmlockd)
+ return NULL;
+ if (!_lvmlockd_connected)
+ return NULL;
+
+ reply = _lockd_send("running_lm",
+ "pid = %d", getpid(),
+ NULL);
+
+ if (!_lockd_result(reply, &result, NULL)) {
+ log_error("Failed to get result from lvmlockd");
+ goto out;
+ }
+
+ switch (result) {
+ case -EXFULL:
+ log_error("lvmlockd found multiple lock managers, use --lock-type to select one.");
+ break;
+ case -ENOLCK:
+ log_error("lvmlockd found no lock manager running.");
+ break;
+ case LOCK_TYPE_SANLOCK:
+ log_debug("lvmlockd found sanlock");
+ lock_type = "sanlock";
+ break;
+ case LOCK_TYPE_DLM:
+ log_debug("lvmlockd found dlm");
+ lock_type = "dlm";
+ break;
+ default:
+ log_error("Failed to find a running lock manager.");
+ break;
+ }
+out:
+ daemon_reply_destroy(reply);
+
+ return lock_type;
+}
+
+/* Some LV types have no lock. */
+
+int lockd_lv_uses_lock(struct logical_volume *lv)
+{
+ if (lv_is_thin_volume(lv))
+ return 0;
+
+ if (lv_is_thin_pool_data(lv))
+ return 0;
+
+ if (lv_is_thin_pool_metadata(lv))
+ return 0;
+
+ if (lv_is_pool_metadata_spare(lv))
+ return 0;
+
+ if (lv_is_cache_pool(lv))
+ return 0;
+
+ if (lv_is_cache_pool_data(lv))
+ return 0;
+
+ if (lv_is_cache_pool_metadata(lv))
+ return 0;
+
+ if (lv_is_cow(lv))
+ return 0;
+
+ if (lv->status & SNAPSHOT)
+ return 0;
+
+ /* FIXME: lv_is_virtual_origin ? */
+
+ if (lv_is_lockd_sanlock_lv(lv))
+ return 0;
+
+ if (lv_is_mirror_image(lv))
+ return 0;
+
+ if (lv_is_mirror_log(lv))
+ return 0;
+
+ if (lv_is_raid_image(lv))
+ return 0;
+
+ if (lv_is_raid_metadata(lv))
+ return 0;
+
+ if (!lv_is_visible(lv))
+ return 0;
+
+ return 1;
+}
diff --git a/lib/locking/lvmlockd.h b/lib/locking/lvmlockd.h
new file mode 100644
index 000000000..64b3ce9aa
--- /dev/null
+++ b/lib/locking/lvmlockd.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVMLOCKD_H
+#define _LVMLOCKD_H
+
+#include "config-util.h"
+#include "daemon-client.h"
+
+#define LOCKD_SANLOCK_LV_NAME "lvmlock"
+
+/* lockd_gl flags */
+#define LDGL_UPDATE_NAMES 0x00000001
+
+/* lockd_lv flags */
+#define LDLV_MODE_NO_SH 0x00000001
+#define LDLV_PERSISTENT 0x00000002
+
+/* lvmlockd result flags */
+#define LD_RF_NO_LOCKSPACES 0x00000001
+#define LD_RF_NO_GL_LS 0x00000002
+#define LD_RF_WARN_GL_REMOVED 0x00000004
+#define LD_RF_DUP_GL_LS 0x00000008
+#define LD_RF_INACTIVE_LS 0x00000010
+#define LD_RF_ADD_LS_ERROR 0x00000020
+
+/* lockd_state flags */
+#define LDST_EX 0x00000001
+#define LDST_SH 0x00000002
+#define LDST_FAIL_REQUEST 0x00000004
+#define LDST_FAIL_NOLS 0x00000008
+#define LDST_FAIL_STARTING 0x00000010
+#define LDST_FAIL_OTHER 0x00000020
+#define LDST_FAIL (LDST_FAIL_REQUEST | LDST_FAIL_NOLS | LDST_FAIL_STARTING | LDST_FAIL_OTHER)
+
+#ifdef LVMLOCKD_SUPPORT
+
+/* lvmlockd connection and communication */
+
+void lvmlockd_set_socket(const char *sock);
+void lvmlockd_set_use(int use);
+int lvmlockd_use(void);
+void lvmlockd_init(struct cmd_context *cmd);
+void lvmlockd_connect(void);
+void lvmlockd_disconnect(void);
+
+/* vgcreate/vgremove use init/free */
+
+int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count);
+int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg);
+void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg);
+
+/* vgrename */
+
+int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg);
+int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int success);
+
+/* start and stop the lockspace for a vg */
+
+int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg);
+int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg);
+int lockd_start_wait(struct cmd_context *cmd);
+
+/* locking */
+
+int lockd_gl_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type);
+int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags);
+int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
+ uint32_t flags, uint32_t *lockd_state);
+int lockd_vg_update(struct volume_group *vg);
+
+int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id,
+ const char *lock_args, const char *def_mode, uint32_t flags);
+int lockd_lv(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *def_mode, uint32_t flags);
+
+/* lvcreate/lvremove use init/free */
+
+int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg, struct logical_volume *lv,
+ struct lvcreate_params *lp);
+int lockd_init_lv_args(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv, const char *lock_type, const char **lock_args);
+int lockd_free_lv(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id, const char *lock_args);
+
+const char *lockd_running_lock_type(struct cmd_context *cmd);
+
+int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg);
+
+int lockd_lv_uses_lock(struct logical_volume *lv);
+
+#else /* LVMLOCKD_SUPPORT */
+
+static inline void lvmlockd_set_socket(const char *sock)
+{
+}
+
+static inline void lvmlockd_set_use(int use)
+{
+}
+
+static inline void lvmlockd_init(struct cmd_context *cmd)
+{
+}
+
+static inline void lvmlockd_disconnect(void)
+{
+}
+
+static inline void lvmlockd_connect(void)
+{
+}
+
+static inline int lvmlockd_use(void)
+{
+ return 0;
+}
+
+static inline int lockd_init_vg(struct cmd_context *cmd, struct volume_group *vg, const char *lock_type, int lv_lock_count)
+{
+ return 1;
+}
+
+static inline int lockd_free_vg_before(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return 1;
+}
+
+static inline void lockd_free_vg_final(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return;
+}
+
+static inline int lockd_rename_vg_before(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return 1;
+}
+
+static inline int lockd_rename_vg_final(struct cmd_context *cmd, struct volume_group *vg, int success)
+{
+ return 1;
+}
+
+static inline int lockd_start_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return 0;
+}
+
+static inline int lockd_stop_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return 0;
+}
+
+static inline int lockd_start_wait(struct cmd_context *cmd)
+{
+ return 0;
+}
+
+static inline int lockd_gl_create(struct cmd_context *cmd, const char *def_mode, const char *vg_lock_type)
+{
+ /*
+ * When lvm is built without lvmlockd support, creating a VG with
+ * a shared lock type should fail.
+ */
+ if (is_lockd_type(vg_lock_type)) {
+ log_error("Using a shared lock type requires lvmlockd.");
+ return 0;
+ }
+ return 1;
+}
+
+static inline int lockd_gl(struct cmd_context *cmd, const char *def_mode, uint32_t flags)
+{
+ return 1;
+}
+
+static inline int lockd_vg(struct cmd_context *cmd, const char *vg_name, const char *def_mode,
+ uint32_t flags, uint32_t *lockd_state)
+{
+ *lockd_state = 0;
+ return 1;
+}
+
+static inline int lockd_vg_update(struct volume_group *vg)
+{
+ return 1;
+}
+
+static inline int lockd_lv_name(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id,
+ const char *lock_args, const char *def_mode, uint32_t flags)
+{
+ return 1;
+}
+
+static inline int lockd_lv(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *def_mode, uint32_t flags)
+{
+ return 1;
+}
+
+static inline int lockd_init_lv(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv, struct lvcreate_params *lp)
+{
+ return 1;
+}
+
+static inline int lockd_init_lv_args(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv, const char *lock_type, const char **lock_args)
+{
+ return 1;
+}
+
+static inline int lockd_free_lv(struct cmd_context *cmd, struct volume_group *vg,
+ const char *lv_name, struct id *lv_id, const char *lock_args)
+{
+ return 1;
+}
+
+static inline const char *lockd_running_lock_type(struct cmd_context *cmd)
+{
+ log_error("Using a shared lock type requires lvmlockd.");
+ return NULL;
+}
+
+static inline int handle_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return 0;
+}
+
+static inline int lockd_lv_uses_lock(struct logical_volume *lv)
+{
+ return 0;
+}
+
+#endif /* LVMLOCKD_SUPPORT */
+
+#endif /* _LVMLOCKD_H */
diff --git a/lib/log/log.c b/lib/log/log.c
index 83e5be42c..bd1136393 100644
--- a/lib/log/log.c
+++ b/lib/log/log.c
@@ -18,10 +18,13 @@
#include "memlock.h"
#include "defaults.h"
+#include <stdio.h>
#include <stdarg.h>
#include <syslog.h>
+#include <ctype.h>
static FILE *_log_file;
+static char _log_file_path[PATH_MAX];
static struct device _log_dev;
static struct dm_str_list _log_dev_alias;
@@ -33,7 +36,7 @@ static int _indent = 1;
static int _log_suppress = 0;
static char _msg_prefix[30] = " ";
static int _already_logging = 0;
-static int _abort_on_internal_errors = 0;
+static int _abort_on_internal_errors_config = 0;
static lvm2_log_fn_t _lvm2_log_fn = NULL;
@@ -52,11 +55,59 @@ void init_log_fn(lvm2_log_fn_t log_fn)
_lvm2_log_fn = NULL;
}
+/*
+ * Support envvar LVM_LOG_FILE_EPOCH and allow to attach
+ * extra keyword (consist of upto 32 alpha chars) to
+ * opened log file. After this 'epoch' word pid and starttime
+ * (in kernel units, read from /proc/self/stat)
+ * is automatically attached.
+ * If command/daemon forks multiple times, it could create multiple
+ * log files ensuring, there are no overwrites.
+ */
void init_log_file(const char *log_file, int append)
{
- const char *open_mode = append ? "a" : "w";
+ static const char statfile[] = "/proc/self/stat";
+ const char *env;
+ int pid;
+ unsigned long long starttime;
+ FILE *st;
+ int i = 0;
+
+ _log_file_path[0] = '\0';
+ if ((env = getenv("LVM_LOG_FILE_EPOCH"))) {
+ while (isalpha(env[i]) && i < 32) /* Up to 32 alphas */
+ i++;
+ if (env[i]) {
+ if (i)
+ log_warn("WARNING: Ignoring invalid LVM_LOG_FILE_EPOCH envvar \"%s\".", env);
+ goto no_epoch;
+ }
- if (!(_log_file = fopen(log_file, open_mode))) {
+ if (!(st = fopen(statfile, "r")))
+ log_sys_error("fopen", statfile);
+ else if (fscanf(st, "%d %*s %*c %*d %*d %*d %*d " /* tty_nr */
+ "%*d %*u %*u %*u %*u " /* mjflt */
+ "%*u %*u %*u %*d %*d " /* cstim */
+ "%*d %*d %*d %*d " /* itrealvalue */
+ "%llu", &pid, &starttime) != 2) {
+ log_warn("WARNING: Cannot parse content of %s.", statfile);
+ } else {
+ if (dm_snprintf(_log_file_path, sizeof(_log_file_path),
+ "%s_%s_%d_%lld", log_file, env, pid, starttime) < 0) {
+ log_warn("WARNING: Debug log file path is too long for epoch.");
+ _log_file_path[0] = '\0';
+ } else {
+ log_file = _log_file_path;
+ append = 1; /* force */
+ }
+ }
+
+ if (st && fclose(st))
+ log_sys_debug("fclose", statfile);
+ }
+
+no_epoch:
+ if (!(_log_file = fopen(log_file, append ? "a" : "w"))) {
log_sys_error("fopen", log_file);
return;
}
@@ -64,6 +115,31 @@ void init_log_file(const char *log_file, int append)
_log_to_file = 1;
}
+/*
+ * Unlink the log file depeding on command's return value
+ *
+ * When envvar LVM_EXPECTED_EXIT_STATUS is set, compare
+ * resulting status with this string.
+ *
+ * It's possible to specify 2 variants - having it equal to
+ * a single number or having it different from a single number.
+ *
+ * i.e. LVM_EXPECTED_EXIT_STATUS=">1" # delete when ret > 1.
+ */
+void unlink_log_file(int ret)
+{
+ const char *env;
+
+ if (_log_file_path[0] &&
+ (env = getenv("LVM_EXPECTED_EXIT_STATUS")) &&
+ ((env[0] == '>' && ret > atoi(env + 1)) ||
+ (atoi(env) == ret))) {
+ if (unlink(_log_file_path))
+ log_sys_error("unlink", _log_file_path);
+ _log_file_path[0] = '\0';
+ }
+}
+
void init_log_direct(const char *log_file, int append)
{
int open_flags = append ? 0 : O_TRUNC;
@@ -142,9 +218,10 @@ void init_indent(int indent)
_indent = indent;
}
+/* If present, environment setting will override this. */
void init_abort_on_internal_errors(int fatal)
{
- _abort_on_internal_errors = fatal;
+ _abort_on_internal_errors_config = fatal;
}
void reset_lvm_errno(int store_errmsg)
@@ -201,10 +278,24 @@ void print_log(int level, const char *file, int line, int dm_errno_or_class,
size_t msglen;
const char *indent_spaces = "";
FILE *stream;
+ static int _abort_on_internal_errors_env_present = -1;
+ static int _abort_on_internal_errors_env = 0;
+ char *env_str;
level &= ~(_LOG_STDERR|_LOG_ONCE);
- if (_abort_on_internal_errors &&
+ if (_abort_on_internal_errors_env_present < 0) {
+ if ((env_str = getenv("DM_ABORT_ON_INTERNAL_ERRORS"))) {
+ _abort_on_internal_errors_env_present = 1;
+ /* Set when env DM_ABORT_ON_INTERNAL_ERRORS is not "0" */
+ _abort_on_internal_errors_env = strcmp(env_str, "0");
+ } else
+ _abort_on_internal_errors_env_present = 0;
+ }
+
+ /* Use value from environment if present, otherwise use value from config. */
+ if (((_abort_on_internal_errors_env_present && _abort_on_internal_errors_env) ||
+ (!_abort_on_internal_errors_env_present && _abort_on_internal_errors_config)) &&
!strncmp(format, INTERNAL_ERROR, sizeof(INTERNAL_ERROR) - 1)) {
fatal_internal_error = 1;
/* Internal errors triggering abort cannot be suppressed. */
@@ -299,17 +390,19 @@ void print_log(int level, const char *file, int line, int dm_errno_or_class,
va_start(ap, format);
switch (level) {
case _LOG_DEBUG:
- if ((verbose_level() == level) &&
- (strcmp("<backtrace>", format) == 0))
- break;
if (verbose_level() < _LOG_DEBUG)
break;
if (!debug_class_is_logged(dm_errno_or_class))
break;
+ if ((verbose_level() == level) &&
+ (strcmp("<backtrace>", format) == 0))
+ break;
/* fall through */
default:
/* Typically only log_warn goes to stdout */
stream = (use_stderr || (level != _LOG_WARN)) ? stderr : stdout;
+ if (stream == stderr)
+ fflush(stdout);
fprintf(stream, "%s%s%s%s", buf, log_command_name(),
_msg_prefix, indent_spaces);
vfprintf(stream, trformat, ap);
@@ -333,7 +426,7 @@ void print_log(int level, const char *file, int line, int dm_errno_or_class,
vfprintf(_log_file, trformat, ap);
va_end(ap);
- fprintf(_log_file, "\n");
+ fputc('\n', _log_file);
fflush(_log_file);
}
diff --git a/lib/log/log.h b/lib/log/log.h
index 99e7623dd..cb6b0bca6 100644
--- a/lib/log/log.h
+++ b/lib/log/log.h
@@ -37,8 +37,6 @@
*
*/
-#include <stdio.h> /* FILE */
-#include <string.h> /* strerror() */
#include <errno.h>
#define EUNCLASSIFIED -1 /* Generic error code */
@@ -67,6 +65,7 @@
#define LOG_CLASS_METADATA 0x0020 /* "metadata" */
#define LOG_CLASS_CACHE 0x0040 /* "cache" */
#define LOG_CLASS_LOCKING 0x0080 /* "locking" */
+#define LOG_CLASS_LVMPOLLD 0x0100 /* "lvmpolld" */
#define log_debug(x...) LOG_LINE(_LOG_DEBUG, x)
#define log_debug_mem(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_MEM, x)
@@ -77,6 +76,7 @@
#define log_debug_metadata(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_METADATA, x)
#define log_debug_cache(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_CACHE, x)
#define log_debug_locking(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_LOCKING, x)
+#define log_debug_lvmpolld(x...) LOG_LINE_WITH_CLASS(_LOG_DEBUG, LOG_CLASS_LVMPOLLD, x)
#define log_info(x...) LOG_LINE(_LOG_INFO, x)
#define log_notice(x...) LOG_LINE(_LOG_NOTICE, x)
@@ -109,6 +109,8 @@
#define return_0 do { stack; return 0; } while (0)
#define return_NULL do { stack; return NULL; } while (0)
+#define return_EINVALID_CMD_LINE \
+ do { stack; return EINVALID_CMD_LINE; } while (0)
#define return_ECMD_FAILED do { stack; return ECMD_FAILED; } while (0)
#define goto_out do { stack; goto out; } while (0)
#define goto_bad do { stack; goto bad; } while (0)
diff --git a/lib/log/lvm-logging.h b/lib/log/lvm-logging.h
index 145e2a1c9..983da5b38 100644
--- a/lib/log/lvm-logging.h
+++ b/lib/log/lvm-logging.h
@@ -40,6 +40,7 @@ void init_indent(int indent);
void init_msg_prefix(const char *prefix);
void init_log_file(const char *log_file, int append);
+void unlink_log_file(int ret);
void init_log_direct(const char *log_file, int append);
void init_log_while_suspended(int log_while_suspended);
void init_abort_on_internal_errors(int fatal);
diff --git a/lib/lvmpolld/lvmpolld-client.c b/lib/lvmpolld/lvmpolld-client.c
new file mode 100644
index 000000000..f7faa8b1e
--- /dev/null
+++ b/lib/lvmpolld/lvmpolld-client.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lib.h"
+
+#include "daemon-io.h"
+#include "lvmpolld-client.h"
+#include "lvmpolld-protocol.h"
+#include "metadata-exported.h"
+#include "polldaemon.h"
+#include "toolcontext.h"
+#include "lvm2cmd.h"
+
+struct progress_info {
+ unsigned error:1;
+ unsigned finished:1;
+ int cmd_signal;
+ int cmd_retcode;
+};
+
+static int _lvmpolld_use;
+static int _lvmpolld_connected;
+static const char* _lvmpolld_socket;
+
+static daemon_handle _lvmpolld = { .error = 0 };
+
+static daemon_handle _lvmpolld_open(const char *socket)
+{
+ daemon_info lvmpolld_info = {
+ .path = "lvmpolld",
+ .socket = socket ?: LVMPOLLD_SOCKET,
+ .protocol = LVMPOLLD_PROTOCOL,
+ .protocol_version = LVMPOLLD_PROTOCOL_VERSION
+ };
+
+ return daemon_open(lvmpolld_info);
+}
+
+void lvmpolld_set_active(int active)
+{
+ _lvmpolld_use = active;
+}
+
+void lvmpolld_set_socket(const char *socket)
+{
+ _lvmpolld_socket = socket;
+}
+
+static void _lvmpolld_connect_or_warn(void)
+{
+ if (!_lvmpolld_connected && !_lvmpolld.error) {
+ _lvmpolld = _lvmpolld_open(_lvmpolld_socket);
+ if ( _lvmpolld.socket_fd >= 0 && !_lvmpolld.error) {
+ log_debug_lvmpolld("Sucessfully connected to lvmpolld on fd %d.", _lvmpolld.socket_fd);
+ _lvmpolld_connected = 1;
+ } else {
+ log_warn("WARNING: Failed to connect to lvmpolld. Proceeding with polling without using lvmpolld.");
+ log_warn("WARNING: Check global/use_lvmpolld in lvm.conf or the lvmpolld daemon state.");
+ }
+ }
+}
+
+int lvmpolld_use(void)
+{
+ if (!_lvmpolld_use || !_lvmpolld_socket)
+ return 0;
+
+ _lvmpolld_connect_or_warn();
+
+ return _lvmpolld_connected;
+}
+
+void lvmpolld_disconnect(void)
+{
+ if (_lvmpolld_connected) {
+ daemon_close(_lvmpolld);
+ _lvmpolld_connected = 0;
+ }
+}
+
+static void _explain_error_codes(int retcode)
+{
+ switch (retcode) {
+ /* LVM2 return codes */
+ case LVM2_NO_SUCH_COMMAND:
+ log_error("LVM command run by lvmpolld responded with: 'No such command.'");
+ break;
+ case LVM2_INVALID_PARAMETERS:
+ log_error("LVM command run by lvmpolld failed due to invalid parameters.");
+ break;
+ case LVM2_PROCESSING_FAILED:
+ log_error("LVM command executed by lvmpolld failed.");
+ break;
+
+ /* lvmpolld specific return codes */
+ case LVMPD_RET_DUP_FAILED:
+ log_error("lvmpolld failed to duplicate file descriptors.");
+ /* fall through */
+ case LVMPD_RET_EXC_FAILED:
+ log_error("lvmpolld failed to exec() lvm binary.");
+ break;
+ default:
+ log_error("lvmpolld responded with unexpected return code.");
+ }
+
+ log_print_unless_silent("For more information see lvmpolld messages in syslog or lvmpolld log file.");
+}
+
+static void _process_error_response(daemon_reply rep)
+{
+ if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_FAILED))
+ log_error("lvmpolld failed to process a request. The reason was: %s.",
+ daemon_reply_str(rep, "reason", "<empty>"));
+ else if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_EINVAL))
+ log_error("lvmpolld couldn't handle a request. "
+ "It might be due to daemon internal state. The reason was: %s.",
+ daemon_reply_str(rep, "reason", "<empty>"));
+ else
+ log_error("Unexpected response %s. The reason: %s.",
+ daemon_reply_str(rep, "response", "<empty>"),
+ daemon_reply_str(rep, "reason", "<empty>"));
+
+ log_print_unless_silent("For more information see lvmpolld messages in syslog or lvmpolld log file.");
+}
+
+static struct progress_info _request_progress_info(const char *uuid, unsigned abort_polling)
+{
+ daemon_reply rep;
+ const char *e = getenv("LVM_SYSTEM_DIR");
+ struct progress_info ret = { .error = 1, .finished = 1 };
+ daemon_request req = daemon_request_make(LVMPD_REQ_PROGRESS);
+
+ if (!daemon_request_extend(req, LVMPD_PARM_LVID " = %s", uuid, NULL)) {
+ log_error("Failed to create " LVMPD_REQ_PROGRESS " request.");
+ goto out_req;
+ }
+
+ if (abort_polling &&
+ !daemon_request_extend(req, LVMPD_PARM_ABORT " = %d", (int64_t)abort_polling, NULL)) {
+ log_error("Failed to create " LVMPD_REQ_PROGRESS " request.");
+ goto out_req;
+ }
+
+ if (e &&
+ !(daemon_request_extend(req, LVMPD_PARM_SYSDIR " = %s",
+ e, NULL))) {
+ log_error("Failed to create " LVMPD_REQ_PROGRESS " request.");
+ goto out_req;
+ }
+
+ rep = daemon_send(_lvmpolld, req);
+ if (rep.error) {
+ log_error("Failed to process request with error %s (errno: %d).",
+ strerror(rep.error), rep.error);
+ goto out_rep;
+ }
+
+ if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_IN_PROGRESS)) {
+ ret.finished = 0;
+ ret.error = 0;
+ } else if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_FINISHED)) {
+ if (!strcmp(daemon_reply_str(rep, "reason", ""), LVMPD_REAS_SIGNAL))
+ ret.cmd_signal = daemon_reply_int(rep, LVMPD_PARM_VALUE, 0);
+ else
+ ret.cmd_retcode = daemon_reply_int(rep, LVMPD_PARM_VALUE, -1);
+ ret.error = 0;
+ } else if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_NOT_FOUND)) {
+ log_verbose("No polling operation in progress regarding LV %s.", uuid);
+ ret.error = 0;
+ } else {
+ _process_error_response(rep);
+ stack;
+ }
+
+out_rep:
+ daemon_reply_destroy(rep);
+out_req:
+ daemon_request_destroy(req);
+
+ return ret;
+}
+
+/*
+ * interval in seconds long
+ * enough for more than a year
+ * of waiting
+ */
+#define INTERV_SIZE 10
+
+static int _process_poll_init(const struct cmd_context *cmd, const char *poll_type,
+ const struct poll_operation_id *id, const struct daemon_parms *parms)
+{
+ char *str;
+ daemon_reply rep;
+ daemon_request req;
+ const char *e = getenv("LVM_SYSTEM_DIR");
+ int r = 0;
+
+ str = dm_malloc(INTERV_SIZE * sizeof(char));
+ if (!str)
+ return r;
+
+ if (snprintf(str, INTERV_SIZE, "%u", parms->interval) >= INTERV_SIZE) {
+ log_warn("Interval string conversion got truncated.");
+ str[INTERV_SIZE - 1] = '\0';
+ }
+
+ req = daemon_request_make(poll_type);
+ if (!daemon_request_extend(req, LVMPD_PARM_LVID " = %s", id->uuid,
+ LVMPD_PARM_VGNAME " = %s", id->vg_name,
+ LVMPD_PARM_LVNAME " = %s", id->lv_name,
+ LVMPD_PARM_INTERVAL " = %s", str,
+ "cmdline = %s", cmd->cmd_line, /* FIXME: debug param only */
+ NULL)) {
+ log_error("Failed to create %s request.", poll_type);
+ goto out_req;
+ }
+
+ if (parms->aborting &&
+ !(daemon_request_extend(req, LVMPD_PARM_ABORT " = %d", (int64_t)(parms->aborting), NULL))) {
+ log_error("Failed to create %s request." , poll_type);
+ goto out_req;
+ }
+
+ if (cmd->handles_missing_pvs &&
+ !(daemon_request_extend(req, LVMPD_PARM_HANDLE_MISSING_PVS " = %d",
+ (int64_t)(cmd->handles_missing_pvs), NULL))) {
+ log_error("Failed to create %s request." , poll_type);
+ goto out_req;
+ }
+
+ if (e &&
+ !(daemon_request_extend(req, LVMPD_PARM_SYSDIR " = %s",
+ e, NULL))) {
+ log_error("Failed to create %s request." , poll_type);
+ goto out_req;
+ }
+
+ rep = daemon_send(_lvmpolld, req);
+
+ if (rep.error) {
+ log_error("Failed to process request with error %s (errno: %d).",
+ strerror(rep.error), rep.error);
+ goto out_rep;
+ }
+
+ if (!strcmp(daemon_reply_str(rep, "response", ""), LVMPD_RESP_OK))
+ r = 1;
+ else {
+ _process_error_response(rep);
+ stack;
+ }
+
+out_rep:
+ daemon_reply_destroy(rep);
+out_req:
+ daemon_request_destroy(req);
+ dm_free(str);
+
+ return r;
+}
+
+int lvmpolld_poll_init(const struct cmd_context *cmd, const struct poll_operation_id *id,
+ const struct daemon_parms *parms)
+{
+ int r = 0;
+
+ if (!id->uuid) {
+ log_error(INTERNAL_ERROR "Use of lvmpolld requires uuid set");
+ return 0;
+ }
+
+ if (!id->vg_name) {
+ log_error(INTERNAL_ERROR "Use of lvmpolld requires vgname set");
+ return 0;
+ }
+
+ if (!id->lv_name) {
+ log_error(INTERNAL_ERROR "Use of lvmpolld requires lvname set");
+ return 0;
+ }
+
+ if (parms->lv_type & PVMOVE) {
+ log_debug_lvmpolld("Asking lvmpolld for pvmove%s on %s/%s.",
+ parms->aborting ? " abort" : "", id->vg_name, id->lv_name);
+ r = _process_poll_init(cmd, LVMPD_REQ_PVMOVE, id, parms);
+ } else if (parms->lv_type & CONVERTING) {
+ log_debug_lvmpolld("Asking lvmpolld for mirror conversion on %s/%s.",
+ id->vg_name, id->lv_name);
+ r = _process_poll_init(cmd, LVMPD_REQ_CONVERT, id, parms);
+ } else if (parms->lv_type & MERGING) {
+ if (parms->lv_type & SNAPSHOT) {
+ log_debug_lvmpolld("Asking lvmpolld for snapshot merge on %s/%s.",
+ id->vg_name, id->lv_name);
+ r = _process_poll_init(cmd, LVMPD_REQ_MERGE, id, parms);
+ }
+ else if (parms->lv_type & THIN_VOLUME) {
+ log_debug_lvmpolld("Asking lvmpolld for thin snapshot merge on %s/%s.",
+ id->vg_name, id->lv_name);
+ r = _process_poll_init(cmd, LVMPD_REQ_MERGE_THIN, id, parms);
+ }
+ else {
+ log_error(INTERNAL_ERROR "Unsupported poll operation.");
+ }
+ } else
+ log_error(INTERNAL_ERROR "Unsupported poll operation");
+
+ return r;
+}
+
+int lvmpolld_request_info(const struct poll_operation_id *id, const struct daemon_parms *parms, unsigned *finished)
+{
+ struct progress_info info;
+ int ret = 0;
+
+ *finished = 1;
+
+ if (!id->uuid) {
+ log_error(INTERNAL_ERROR "use of lvmpolld requires uuid being set");
+ return 0;
+ }
+
+ log_debug_lvmpolld("Asking lvmpolld for progress status of an operation on %s/%s.",
+ id->vg_name, id->lv_name);
+ info = _request_progress_info(id->uuid, parms->aborting);
+ *finished = info.finished;
+
+ if (info.error)
+ return_0;
+
+ if (info.finished) {
+ if (info.cmd_signal)
+ log_error("Command executed by lvmpolld got terminated by signal (%d).",
+ info.cmd_signal);
+ else if (info.cmd_retcode)
+ _explain_error_codes(info.cmd_retcode);
+ else {
+ log_verbose("Polling finished successfully.");
+ ret = 1;
+ }
+ } else
+ ret = 1;
+
+ return ret;
+}
diff --git a/lib/lvmpolld/lvmpolld-client.h b/lib/lvmpolld/lvmpolld-client.h
new file mode 100644
index 000000000..09dd95967
--- /dev/null
+++ b/lib/lvmpolld/lvmpolld-client.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVMPOLLD_CLIENT_H
+#define _LVM_LVMPOLLD_CLIENT_H
+# ifdef LVMPOLLD_SUPPORT
+
+# include "daemon-client.h"
+
+# define LVMPOLLD_SOCKET DEFAULT_RUN_DIR "/lvmpolld.socket"
+
+struct cmd_context;
+struct poll_operation_id;
+struct daemon_parms;
+
+void lvmpolld_disconnect(void);
+
+int lvmpolld_poll_init(const struct cmd_context *cmd, const struct poll_operation_id *id,
+ const struct daemon_parms *parms);
+
+int lvmpolld_request_info(const struct poll_operation_id *id, const struct daemon_parms *parms,
+ unsigned *finished);
+
+int lvmpolld_use(void);
+
+void lvmpolld_set_active(int active);
+
+void lvmpolld_set_socket(const char *socket);
+
+# else
+
+# define lvmpolld_disconnect() do {} while (0)
+# define lvmpolld_poll_init(cmd, id, parms) (0)
+# define lvmpolld_request_info(id, parms, finished) (0)
+# define lvmpolld_use() (0)
+# define lvmpolld_set_active(active) do {} while (0)
+# define lvmpolld_set_socket(socket) do {} while (0)
+
+# endif /* LVMPOLLD_SUPPORT */
+
+#endif /* _LVM_LVMPOLLD_CLIENT_H */
diff --git a/tools/polldaemon.h b/lib/lvmpolld/polldaemon.h
index 89c3aaee2..36b654fa3 100644
--- a/tools/polldaemon.h
+++ b/lib/lvmpolld/polldaemon.h
@@ -1,6 +1,6 @@
/*
- * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -29,14 +29,6 @@ struct daemon_parms;
struct poll_functions {
const char *(*get_copy_name_from_lv) (const struct logical_volume *lv);
- struct volume_group *(*get_copy_vg) (struct cmd_context *cmd,
- const char *name,
- const char *uuid);
- struct logical_volume *(*get_copy_lv) (struct cmd_context *cmd,
- struct volume_group *vg,
- const char *name,
- const char *uuid,
- uint64_t lv_type);
progress_t (*poll_progress)(struct cmd_context *cmd,
struct logical_volume *lv,
const char *name,
@@ -51,6 +43,13 @@ struct poll_functions {
struct dm_list *lvs_changed);
};
+struct poll_operation_id {
+ const char *vg_name;
+ const char *lv_name;
+ const char *display_name;
+ const char *uuid;
+};
+
struct daemon_parms {
unsigned interval;
unsigned wait_before_testing;
@@ -63,13 +62,15 @@ struct daemon_parms {
struct poll_functions *poll_fns;
};
-int poll_daemon(struct cmd_context *cmd, const char *name, const char *uuid,
- unsigned background,
+int poll_daemon(struct cmd_context *cmd, unsigned background,
uint64_t lv_type, struct poll_functions *poll_fns,
- const char *progress_title);
+ const char *progress_title, struct poll_operation_id *id);
progress_t poll_mirror_progress(struct cmd_context *cmd,
struct logical_volume *lv, const char *name,
struct daemon_parms *parms);
+int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id,
+ struct daemon_parms *parms);
+
#endif
diff --git a/lib/metadata/cache_manip.c b/lib/metadata/cache_manip.c
index 6a13ec1c1..30d890c44 100644
--- a/lib/metadata/cache_manip.c
+++ b/lib/metadata/cache_manip.c
@@ -29,7 +29,17 @@
#define DM_HINT_OVERHEAD_PER_BLOCK 8 /* bytes */
#define DM_MAX_HINT_WIDTH (4+16) /* bytes. FIXME Configurable? */
-const char *get_cache_pool_cachemode_name(const struct lv_segment *seg)
+int cache_mode_is_set(const struct lv_segment *seg)
+{
+ if (seg_is_cache(seg))
+ seg = first_seg(seg->pool_lv);
+
+ return (seg->feature_flags & (DM_CACHE_FEATURE_WRITEBACK |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_PASSTHROUGH)) ? 1 : 0;
+}
+
+const char *get_cache_mode_name(const struct lv_segment *seg)
{
if (seg->feature_flags & DM_CACHE_FEATURE_WRITEBACK)
return "writeback";
@@ -46,19 +56,48 @@ const char *get_cache_pool_cachemode_name(const struct lv_segment *seg)
return NULL;
}
-int set_cache_pool_feature(uint64_t *feature_flags, const char *str)
+int cache_set_mode(struct lv_segment *seg, const char *str)
{
+ struct cmd_context *cmd = seg->lv->vg->cmd;
+ int id;
+ uint64_t mode;
+
+ if (!str && !seg_is_cache(seg))
+ return 1; /* Defaults only for cache */
+
+ if (seg_is_cache(seg))
+ seg = first_seg(seg->pool_lv);
+
+ if (!str) {
+ if (cache_mode_is_set(seg))
+ return 1; /* Default already set in cache pool */
+
+ id = allocation_cache_mode_CFG;
+
+ /* If present, check backward compatible settings */
+ if (!find_config_node(cmd, cmd->cft, id) &&
+ find_config_node(cmd, cmd->cft, allocation_cache_pool_cachemode_CFG))
+ id = allocation_cache_pool_cachemode_CFG;
+
+ str = find_config_tree_str(cmd, id, NULL);
+ }
+
if (!strcmp(str, "writeback"))
- *feature_flags |= DM_CACHE_FEATURE_WRITEBACK;
+ mode = DM_CACHE_FEATURE_WRITEBACK;
else if (!strcmp(str, "writethrough"))
- *feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
- else if (!strcmp(str, "passhrough"))
- *feature_flags |= DM_CACHE_FEATURE_PASSTHROUGH;
+ mode = DM_CACHE_FEATURE_WRITETHROUGH;
+ else if (!strcmp(str, "passthrough"))
+ mode = DM_CACHE_FEATURE_PASSTHROUGH;
else {
- log_error("Cache pool feature \"%s\" is unknown.", str);
+ log_error("Cannot set unknown cache mode \"%s\".", str);
return 0;
}
+ seg->feature_flags &= ~(DM_CACHE_FEATURE_WRITEBACK |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_PASSTHROUGH);
+ seg->feature_flags |= mode;
+
return 1;
}
@@ -322,7 +361,7 @@ int lv_cache_remove(struct logical_volume *cache_lv)
dirty_blocks = status->cache->dirty_blocks;
dm_pool_destroy(status->mem);
if (dirty_blocks) {
- log_print_unless_silent("%" PRIu64 " blocks must still be flushed.",
+ log_print_unless_silent(FMTu64 " blocks must still be flushed.",
dirty_blocks);
sleep(1);
}
@@ -395,38 +434,107 @@ int lv_is_cache_origin(const struct logical_volume *lv)
return seg && lv_is_cache(seg->lv) && !lv_is_pending_delete(seg->lv) && (seg_lv(seg, 0) == lv);
}
-int lv_cache_setpolicy(struct logical_volume *lv, struct dm_config_tree *policy)
+static const char *_get_default_cache_policy(struct cmd_context *cmd)
+{
+ const struct segment_type *segtype = get_segtype_from_string(cmd, "cache");
+ unsigned attr = ~0;
+ const char *def = NULL;
+
+ if (!segtype ||
+ !segtype->ops->target_present ||
+ !segtype->ops->target_present(cmd, NULL, &attr)) {
+ log_warn("WARNING: Cannot detect default cache policy, using \""
+ DEFAULT_CACHE_POLICY "\".");
+ return DEFAULT_CACHE_POLICY;
+ }
+
+ if (attr & CACHE_FEATURE_POLICY_SMQ)
+ def = "smq";
+ else if (attr & CACHE_FEATURE_POLICY_MQ)
+ def = "mq";
+ else {
+ log_error("Default cache policy is not available.");
+ return NULL;
+ }
+
+ log_debug_metadata("Detected default cache_policy \"%s\".", def);
+
+ return def;
+}
+
+int cache_set_policy(struct lv_segment *seg, const char *name,
+ const struct dm_config_tree *settings)
{
- struct lv_segment *seg = first_seg(lv);
- const char *name;
struct dm_config_node *cn;
+ const struct dm_config_node *cns;
struct dm_config_tree *old = NULL, *new = NULL, *tmp = NULL;
int r = 0;
+ const int passed_seg_is_cache = seg_is_cache(seg);
- if (lv_is_cache(lv))
+ if (passed_seg_is_cache)
seg = first_seg(seg->pool_lv);
- if (seg->policy_settings) {
- if (!(old = dm_config_create()))
- goto_out;
- if (!(new = dm_config_create()))
- goto_out;
- new->root = policy->root;
- old->root = seg->policy_settings;
- new->cascade = old;
- if (!(tmp = policy = dm_config_flatten(new)))
- goto_out;
+ if (name) {
+ if (!(seg->policy_name = dm_pool_strdup(seg->lv->vg->vgmem, name))) {
+ log_error("Failed to duplicate policy name.");
+ return 0;
+ }
+ } else if (!seg->policy_name && passed_seg_is_cache) {
+ if (!(seg->policy_name = find_config_tree_str(seg->lv->vg->cmd, allocation_cache_policy_CFG, NULL)) &&
+ !(seg->policy_name = _get_default_cache_policy(seg->lv->vg->cmd)))
+ return_0;
}
- if (!(seg->policy_settings = dm_config_clone_node_with_mem(lv->vg->vgmem, policy->root, 0)))
- goto_out;
+ if (settings) {
+ if (!seg->policy_name) {
+ log_error(INTERNAL_ERROR "Can't set policy settings without policy name.");
+ return 0;
+ }
+
+ if (seg->policy_settings) {
+ if (!(old = dm_config_create()))
+ goto_out;
+ if (!(new = dm_config_create()))
+ goto_out;
+ new->root = settings->root;
+ old->root = seg->policy_settings;
+ new->cascade = old;
+ if (!(tmp = dm_config_flatten(new)))
+ goto_out;
+ }
+
+ if ((cn = dm_config_find_node((tmp) ? tmp->root : settings->root, "policy_settings")) &&
+ !(seg->policy_settings = dm_config_clone_node_with_mem(seg->lv->vg->vgmem, cn, 0)))
+ goto_out;
+ } else if (passed_seg_is_cache && /* Look for command's profile cache_policies */
+ (cns = find_config_tree_node(seg->lv->vg->cmd, allocation_cache_settings_CFG_SECTION, NULL))) {
+ /* Try to find our section for given policy */
+ for (cn = cns->child; cn; cn = cn->sib) {
+ /* Only matching section names */
+ if (cn->v || strcmp(cn->key, seg->policy_name) != 0)
+ continue;
+
+ if (!cn->child)
+ break;
+
+ if (!(new = dm_config_create()))
+ goto_out;
+
+ if (!(new->root = dm_config_clone_node_with_mem(new->mem,
+ cn->child, 1)))
+ goto_out;
- if ((name = dm_config_find_str(policy->root, "policy", NULL)) &&
- !(seg->policy_name = dm_pool_strdup(lv->vg->vgmem, name)))
- goto_out;
+ if (!(seg->policy_settings = dm_config_create_node(new, "policy_settings")))
+ goto_out;
+
+ seg->policy_settings->child = new->root;
+
+ break; /* Only first match counts */
+ }
+ }
restart: /* remove any 'default" nodes */
- cn = seg->policy_settings->child;
+ cn = seg->policy_settings ? seg->policy_settings->child : NULL;
while (cn) {
if (cn->v->type == DM_CFG_STRING && !strcmp(cn->v->v.str, "default")) {
dm_config_remove_node(seg->policy_settings, cn);
@@ -438,12 +546,13 @@ restart: /* remove any 'default" nodes */
r = 1;
out:
- if (old)
- dm_config_destroy(old);
- if (new)
- dm_config_destroy(new);
if (tmp)
dm_config_destroy(tmp);
+ if (new)
+ dm_config_destroy(new);
+ if (old)
+ dm_config_destroy(old);
+
return r;
}
diff --git a/lib/metadata/lv.c b/lib/metadata/lv.c
index 531699d4c..8b4fcfe23 100644
--- a/lib/metadata/lv.c
+++ b/lib/metadata/lv.c
@@ -20,6 +20,7 @@
#include "toolcontext.h"
#include "segtype.h"
#include "str_list.h"
+#include "lvmlockd.h"
#include <time.h>
#include <sys/utsname.h>
@@ -78,7 +79,7 @@ static char *_format_pvsegs(struct dm_pool *mem, const struct lv_segment *seg,
if (range_format) {
if (dm_snprintf(extent_str, sizeof(extent_str),
- "%" PRIu32, extent + seg->area_len - 1) < 0) {
+ FMTu32, extent + seg->area_len - 1) < 0) {
log_error("Extent number dm_snprintf failed");
return NULL;
}
@@ -130,7 +131,7 @@ char *lvseg_discards_dup(struct dm_pool *mem, const struct lv_segment *seg)
char *lvseg_cachemode_dup(struct dm_pool *mem, const struct lv_segment *seg)
{
- const char *name = get_cache_pool_cachemode_name(seg);
+ const char *name = get_cache_mode_name(seg);
if (!name)
return_NULL;
@@ -152,11 +153,10 @@ char *lvseg_monitor_dup(struct dm_pool *mem, const struct lv_segment *seg)
if (lv_is_cow(seg->lv) && !lv_is_merging_cow(seg->lv))
segm = first_seg(seg->lv->snapshot->lv);
- else if (seg->log_lv)
- segm = first_seg(seg->log_lv);
// log_debug("Query LV:%s mon:%s segm:%s tgtm:%p segmon:%d statusm:%d", seg->lv->name, segm->lv->name, segm->segtype->name, segm->segtype->ops->target_monitored, seg_monitored(segm), (int)(segm->status & PVMOVE));
if ((dmeventd_monitor_mode() != 1) ||
+ !segm->segtype->ops ||
!segm->segtype->ops->target_monitored)
/* Nothing to do, monitoring not supported */;
else if (lv_is_cow_covering_origin(seg->lv))
@@ -640,10 +640,10 @@ int lv_raid_healthy(const struct logical_volume *lv)
return 1;
}
-char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
+char *lv_attr_dup_with_info_and_seg_status(struct dm_pool *mem, const struct lv_with_info_and_seg_status *lvdm)
{
dm_percent_t snap_percent;
- struct lvinfo info;
+ const struct logical_volume *lv = lvdm->lv;
struct lv_segment *seg;
char *repstr;
@@ -717,30 +717,30 @@ char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
repstr[3] = (lv->status & FIXED_MINOR) ? 'm' : '-';
- if (!activation() || !lv_info(lv->vg->cmd, lv, 0, &info, 1, 0)) {
+ if (!activation() || !lvdm->info_ok) {
repstr[4] = 'X'; /* Unknown */
repstr[5] = 'X'; /* Unknown */
- } else if (info.exists) {
- if (info.suspended)
+ } else if (lvdm->info.exists) {
+ if (lvdm->info.suspended)
repstr[4] = 's'; /* Suspended */
- else if (info.live_table)
+ else if (lvdm->info.live_table)
repstr[4] = 'a'; /* Active */
- else if (info.inactive_table)
+ else if (lvdm->info.inactive_table)
repstr[4] = 'i'; /* Inactive with table */
else
repstr[4] = 'd'; /* Inactive without table */
/* Snapshot dropped? */
- if (info.live_table && lv_is_cow(lv)) {
+ if (lvdm->info.live_table && lv_is_cow(lv)) {
if (!lv_snapshot_percent(lv, &snap_percent) ||
snap_percent == DM_PERCENT_INVALID) {
- if (info.suspended)
+ if (lvdm->info.suspended)
repstr[4] = 'S'; /* Susp Inv snapshot */
else
repstr[4] = 'I'; /* Invalid snapshot */
}
else if (snap_percent == LVM_PERCENT_MERGE_FAILED) {
- if (info.suspended)
+ if (lvdm->info.suspended)
repstr[4] = 'M'; /* Susp snapshot merge failed */
else
repstr[4] = 'm'; /* snapshot merge failed */
@@ -751,10 +751,10 @@ char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
* 'R' indicates read-only activation of a device that
* does not have metadata flagging it as read-only.
*/
- if (repstr[1] != 'r' && info.read_only)
+ if (repstr[1] != 'r' && lvdm->info.read_only)
repstr[1] = 'R';
- repstr[5] = (info.open_count) ? 'o' : '-';
+ repstr[5] = (lvdm->info.open_count) ? 'o' : '-';
} else {
repstr[4] = '-';
repstr[5] = '-';
@@ -798,6 +798,18 @@ char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
repstr[8] = 'm'; /* RAID has 'm'ismatches */
} else if (lv->status & LV_WRITEMOSTLY)
repstr[8] = 'w'; /* sub-LV has 'w'ritemostly */
+ else if (lv->status & LV_RESHAPE_REMOVED)
+ repstr[8] = 'R'; /* sub-LV got 'R'emoved from raid set by reshaping */
+ } else if (lv_is_thin_pool(lv) &&
+ (lvdm->seg_status.type != SEG_STATUS_NONE)) {
+ if (lvdm->seg_status.type == SEG_STATUS_UNKNOWN)
+ repstr[8] = 'X'; /* Unknown */
+ else if (lvdm->seg_status.thin_pool->fail)
+ repstr[8] = 'F';
+ else if (lvdm->seg_status.thin_pool->out_of_data_space)
+ repstr[8] = 'D';
+ else if (lvdm->seg_status.thin_pool->read_only)
+ repstr[8] = 'M';
}
if (lv->status & LV_ACTIVATION_SKIP)
@@ -809,6 +821,28 @@ out:
return repstr;
}
+/* backward compatible internal API for lvm2api, TODO improve it */
+char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv)
+{
+ char *ret = NULL;
+ struct lv_with_info_and_seg_status status = {
+ .seg_status.type = SEG_STATUS_NONE,
+ .lv = lv
+ };
+
+ if (!(status.seg_status.mem = dm_pool_create("reporter_pool", 1024)))
+ return_0;
+
+ if (!(status.info_ok = lv_info_with_seg_status(lv->vg->cmd, lv, first_seg(lv), 1, &status, 1, 1)))
+ goto_bad;
+
+ ret = lv_attr_dup_with_info_and_seg_status(mem, &status);
+bad:
+ dm_pool_destroy(status.seg_status.mem);
+
+ return ret;
+}
+
int lv_set_creation(struct logical_volume *lv,
const char *hostname, uint64_t timestamp)
{
@@ -843,17 +877,16 @@ int lv_set_creation(struct logical_volume *lv,
return 1;
}
-char *lv_time_dup(struct dm_pool *mem, const struct logical_volume *lv)
+char *lv_time_dup(struct dm_pool *mem, const struct logical_volume *lv, int iso_mode)
{
- char buffer[50];
+ char buffer[4096];
struct tm *local_tm;
time_t ts = (time_t)lv->timestamp;
+ const char *format = iso_mode ? DEFAULT_TIME_FORMAT : lv->vg->cmd->time_format;
if (!ts ||
!(local_tm = localtime(&ts)) ||
- /* FIXME: make this lvm.conf configurable */
- !strftime(buffer, sizeof(buffer),
- "%Y-%m-%d %T %z", local_tm))
+ !strftime(buffer, sizeof(buffer), format, local_tm))
buffer[0] = 0;
return dm_pool_strdup(mem, buffer);
@@ -881,14 +914,30 @@ static int _lv_is_exclusive(struct logical_volume *lv)
int lv_active_change(struct cmd_context *cmd, struct logical_volume *lv,
enum activation_change activate, int needs_exclusive)
{
+ const char *ay_with_mode = NULL;
+
+ if (activate == CHANGE_ASY)
+ ay_with_mode = "sh";
+ if (activate == CHANGE_AEY)
+ ay_with_mode = "ex";
+
+ if (is_change_activating(activate) &&
+ !lockd_lv(cmd, lv, ay_with_mode, LDLV_PERSISTENT)) {
+ log_error("Failed to lock logical volume %s/%s", lv->vg->name, lv->name);
+ return 0;
+ }
+
switch (activate) {
case CHANGE_AN:
+PFLA("activate=%x", activate);
deactivate:
+PFL();
log_verbose("Deactivating logical volume \"%s\"", lv->name);
if (!deactivate_lv(cmd, lv))
return_0;
break;
case CHANGE_ALN:
+PFL();
if (vg_is_clustered(lv->vg) && (needs_exclusive || _lv_is_exclusive(lv))) {
if (!lv_is_active_locally(lv)) {
log_error("Cannot deactivate remotely exclusive device locally.");
@@ -904,6 +953,7 @@ deactivate:
break;
case CHANGE_ALY:
case CHANGE_AAY:
+PFL();
if (needs_exclusive || _lv_is_exclusive(lv)) {
log_verbose("Activating logical volume \"%s\" exclusively locally.",
lv->name);
@@ -918,12 +968,15 @@ deactivate:
break;
case CHANGE_AEY:
exclusive:
+PFL();
log_verbose("Activating logical volume \"%s\" exclusively.",
lv->name);
if (!activate_lv_excl(cmd, lv))
return_0;
break;
- default: /* CHANGE_AY */
+ case CHANGE_ASY:
+ case CHANGE_AY:
+ default:
if (needs_exclusive || _lv_is_exclusive(lv))
goto exclusive;
log_verbose("Activating logical volume \"%s\".", lv->name);
@@ -931,6 +984,10 @@ exclusive:
return_0;
}
+ if (!is_change_activating(activate) &&
+ !lockd_lv(cmd, lv, "un", LDLV_PERSISTENT))
+ log_error("Failed to unlock logical volume %s/%s", lv->vg->name, lv->name);
+
return 1;
}
@@ -970,6 +1027,12 @@ char *lv_profile_dup(struct dm_pool *mem, const struct logical_volume *lv)
return dm_pool_strdup(mem, profile_name);
}
+char *lv_lock_args_dup(struct dm_pool *mem, const struct logical_volume *lv)
+{
+ const char *lock_args = lv->lock_args ? lv->lock_args : "";
+ return dm_pool_strdup(mem, lock_args);
+}
+
/* For given LV find recursively the LV which holds lock for it */
const struct logical_volume *lv_lock_holder(const struct logical_volume *lv)
{
@@ -986,6 +1049,10 @@ const struct logical_volume *lv_lock_holder(const struct logical_volume *lv)
return sl->seg->lv;
}
+ /* RAID changes visibility of splitted LVs but references them still as leg/meta */
+ if ((lv_is_raid_image(lv) || lv_is_raid_metadata(lv)) && lv_is_visible(lv))
+ return lv;
+
/* For other types, by default look for the first user */
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
/* FIXME: complete this exception list */
diff --git a/lib/metadata/lv.h b/lib/metadata/lv.h
index 3564acd16..af7db7f67 100644
--- a/lib/metadata/lv.h
+++ b/lib/metadata/lv.h
@@ -51,11 +51,16 @@ struct logical_volume {
struct dm_list segs_using_this_lv;
uint64_t timestamp;
+ unsigned new_lock_args:1;
const char *hostname;
+ const char *lock_args;
};
+struct lv_with_info_and_seg_status;
+
uint64_t lv_size(const struct logical_volume *lv);
uint64_t lv_metadata_size(const struct logical_volume *lv);
+char *lv_attr_dup_with_info_and_seg_status(struct dm_pool *mem, const struct lv_with_info_and_seg_status *lvdm);
char *lv_attr_dup(struct dm_pool *mem, const struct logical_volume *lv);
char *lv_uuid_dup(const struct logical_volume *lv);
char *lv_tags_dup(const struct logical_volume *lv);
@@ -88,7 +93,7 @@ char *lvseg_monitor_dup(struct dm_pool *mem, const struct lv_segment *seg);
char *lvseg_tags_dup(const struct lv_segment *seg);
char *lvseg_devices(struct dm_pool *mem, const struct lv_segment *seg);
char *lvseg_seg_pe_ranges(struct dm_pool *mem, const struct lv_segment *seg);
-char *lv_time_dup(struct dm_pool *mem, const struct logical_volume *lv);
+char *lv_time_dup(struct dm_pool *mem, const struct logical_volume *lv, int iso_mode);
char *lv_host_dup(struct dm_pool *mem, const struct logical_volume *lv);
int lv_set_creation(struct logical_volume *lv,
const char *hostname, uint64_t timestamp);
@@ -100,6 +105,7 @@ const struct logical_volume *lv_lock_holder(const struct logical_volume *lv);
const struct logical_volume *lv_ondisk(const struct logical_volume *lv);
struct profile *lv_config_profile(const struct logical_volume *lv);
char *lv_profile_dup(struct dm_pool *mem, const struct logical_volume *lv);
+char *lv_lock_args_dup(struct dm_pool *mem, const struct logical_volume *lv);
int lv_mirror_image_in_sync(const struct logical_volume *lv);
int lv_raid_image_in_sync(const struct logical_volume *lv);
int lv_raid_healthy(const struct logical_volume *lv);
diff --git a/lib/metadata/lv_alloc.h b/lib/metadata/lv_alloc.h
index 241e0e4d6..94fce2a2d 100644
--- a/lib/metadata/lv_alloc.h
+++ b/lib/metadata/lv_alloc.h
@@ -25,8 +25,8 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
uint64_t status,
uint32_t stripe_size,
struct logical_volume *log_lv,
- uint32_t area_count,
- uint32_t area_len,
+ uint32_t area_count, uint32_t area_len,
+ uint32_t data_copies,
uint32_t chunk_size,
uint32_t region_size,
uint32_t extents_copied,
@@ -59,6 +59,7 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
int lv_add_segment(struct alloc_handle *ah,
uint32_t first_area, uint32_t num_areas,
+ uint32_t data_copies,
struct logical_volume *lv,
const struct segment_type *segtype,
uint32_t stripe_size,
diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c
index 728f7ee09..5df10180c 100644
--- a/lib/metadata/lv_manip.c
+++ b/lib/metadata/lv_manip.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -30,15 +30,7 @@
#include "lvm-exec.h"
#include "lvm-signal.h"
#include "memlock.h"
-
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
+#include "lvmlockd.h"
typedef enum {
PREFERRED,
@@ -58,6 +50,7 @@ typedef enum {
#define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */
#define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */
+#define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */
/*
* Constant parameters during a single allocation attempt.
@@ -78,6 +71,7 @@ struct alloc_state {
uint32_t areas_size;
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
uint32_t allocated; /* Total number of extents allocated so far */
+ uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */
};
struct lv_names {
@@ -127,7 +121,11 @@ enum _lv_type_name_enum {
LV_TYPE_RAID0,
LV_TYPE_RAID0_META,
LV_TYPE_RAID1,
+ LV_TYPE_RAID01,
LV_TYPE_RAID10,
+ LV_TYPE_RAID10_NEAR,
+ LV_TYPE_RAID10_FAR,
+ LV_TYPE_RAID10_OFFSET,
LV_TYPE_RAID4,
LV_TYPE_RAID5,
LV_TYPE_RAID5_N,
@@ -152,7 +150,7 @@ static const char *_lv_type_names[] = {
[LV_TYPE_PRIVATE] = "private",
[LV_TYPE_LINEAR] = "linear",
[LV_TYPE_STRIPED] = "striped",
- [LV_TYPE_MIRROR] = "mirror",
+ [LV_TYPE_MIRROR] = SEG_TYPE_NAME_MIRROR,
[LV_TYPE_RAID] = "raid",
[LV_TYPE_THIN] = "thin",
[LV_TYPE_CACHE] = "cache",
@@ -176,10 +174,15 @@ static const char *_lv_type_names[] = {
[LV_TYPE_DATA] = "data",
[LV_TYPE_SPARE] = "spare",
[LV_TYPE_VIRTUAL] = "virtual",
+
[LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0,
[LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META,
[LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1,
+ [LV_TYPE_RAID01] = SEG_TYPE_NAME_RAID01,
[LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10,
+ [LV_TYPE_RAID10_NEAR] = SEG_TYPE_NAME_RAID10_NEAR,
+ [LV_TYPE_RAID10_FAR] = SEG_TYPE_NAME_RAID10_FAR,
+ [LV_TYPE_RAID10_OFFSET] = SEG_TYPE_NAME_RAID10_OFFSET,
[LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4,
[LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5,
[LV_TYPE_RAID5_N] = SEG_TYPE_NAME_RAID5_N,
@@ -455,7 +458,7 @@ bad:
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
struct dm_list **layout, struct dm_list **role) {
- int linear, striped, unknown;
+ int linear, striped;
struct lv_segment *seg;
int public_lv = 1;
@@ -463,7 +466,7 @@ int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
if (!(*layout = str_list_create(mem))) {
log_error("LV layout list allocation failed");
- goto bad;
+ return 0;
}
if (!(*role = str_list_create(mem))) {
@@ -508,7 +511,7 @@ int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
* linear or striped or mixture of these two.
*/
if (dm_list_empty(*layout)) {
- linear = striped = unknown = 0;
+ linear = striped = 0;
dm_list_iterate_items(seg, &lv->segments) {
if (seg_is_linear(seg))
linear = 1;
@@ -522,10 +525,8 @@ int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
* the role above and we need add proper
* detection for such role!
*/
- unknown = 1;
- log_error(INTERNAL_ERROR "Failed to properly detect "
- "layout and role for LV %s/%s",
- lv->vg->name, lv->name);
+ log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect "
+ "layout and role for LV %s.", display_lvname(lv));
}
}
@@ -553,10 +554,8 @@ int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
return 1;
bad:
- if (*role)
- dm_pool_free(mem, *role);
- if (*layout)
- dm_pool_free(mem, *layout);
+ dm_pool_free(mem, *layout);
+
return 0;
}
@@ -779,9 +778,6 @@ int get_default_region_size(struct cmd_context *cmd)
{
int region_size = _get_default_region_size(cmd);
- if (region_size > INT32_MAX)
- region_size = INT32_MAX;
-
if (region_size & (region_size - 1)) {
region_size = _round_down_pow2(region_size);
log_verbose("Reducing region size to %u kiB (power of 2).",
@@ -859,7 +855,7 @@ struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *l
}
dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
- /* Needs to be he only item in list */
+ /* Needs to be the only item in list */
if (!dm_list_end(&lv->segs_using_this_lv, &sl->list))
break;
@@ -931,7 +927,7 @@ dm_percent_t copy_percent(const struct logical_volume *lv)
uint32_t numerator = 0u, denominator = 0u;
struct lv_segment *seg;
- if (seg_is_raid0(first_seg(lv)) || seg_is_raid0_meta(first_seg(lv)))
+ if (seg_is_any_raid0(first_seg(lv)))
return DM_PERCENT_INVALID;
dm_list_iterate_items(seg, &lv->segments) {
@@ -960,6 +956,7 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
struct logical_volume *log_lv,
uint32_t area_count,
uint32_t area_len,
+ uint32_t data_copies,
uint32_t chunk_size,
uint32_t region_size,
uint32_t extents_copied,
@@ -982,24 +979,26 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
return_NULL;
}
- /* HM FIXME raid0 optionally w/o rmeta */
if (segtype_is_raid(segtype) &&
!segtype_is_raid0(segtype) &&
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg); /* frees everything alloced since seg */
return_NULL;
}
-PFLA("lv=%s seg->meta_areas=%p", lv->name, seg->meta_areas);
+PFLA("lv=%s segtype=%s le=%u len=%u area_len=%u area_count=%u seg->meta_areas=%p",
+ lv->name, segtype->name, le, len, area_len, area_count, seg->meta_areas);
seg->segtype = segtype;
seg->lv = lv;
seg->le = le;
seg->len = len;
+ seg->reshape_len = reshape_len;
seg->status = status;
seg->stripe_size = stripe_size;
+PFLA("seg->stripe_size=%u", seg->stripe_size);
seg->area_count = area_count;
seg->area_len = area_len;
- seg->reshape_len = reshape_len;
+ seg->data_copies = data_copies ?: 1;
seg->chunk_size = chunk_size;
seg->region_size = region_size;
seg->extents_copied = extents_copied;
@@ -1015,28 +1014,38 @@ PFLA("lv=%s seg->meta_areas=%p", lv->name, seg->meta_areas);
if (segtype_is_mirrored(segtype))
lv->status |= MIRRORED;
-
+PFL();
return seg;
}
/* Round up @extents to next stripe boundary for number of @stripes */
-static uint32_t _round_to_stripe_boundary(struct logical_volume *lv, uint32_t extents, uint32_t stripes, int extend)
+static uint32_t _round_to_stripe_boundary(struct logical_volume *lv, uint32_t extents,
+ uint32_t stripes, int extend)
{
- uint32_t rest;
+ uint32_t r = extents;
-PFLA("lv=%s extents=%u stripes=%u", lv->name, extents, stripes);
+PFLA("extents=%u stripes=%u", extents, stripes);
+ /* Caller should ensure... */
if (!stripes)
- return extents;
+ stripes = 1;
- /* Round up extents to stripe divisable amount */
- if ((rest = extents % stripes)) {
- extents += extend ? stripes - rest : -rest;
- log_print_unless_silent("Rounding up size to full stripe size %s",
- display_size(lv->vg->cmd, extents * lv->vg->extent_size));
+ if (stripes > 1) {
+ uint32_t mod;
+
+ if ((mod = r % stripes)) {
+ if (extend ||
+ r < stripes)
+ r += stripes - mod;
+ else
+ r -= mod;
+ }
}
-PFLA("extents=%u stripes=%u rest=%u", extents, stripes, rest);
- return extents;
+ if (r != extents)
+ log_print_unless_silent("Rounding up size to full stripe size %s",
+ display_size(lv->vg->cmd, r * lv->vg->extent_size));
+PFLA("r=%u stripes=%u", r, stripes);
+ return r;
}
struct lv_segment *alloc_snapshot_seg(struct logical_volume *lv,
@@ -1053,7 +1062,7 @@ struct lv_segment *alloc_snapshot_seg(struct logical_volume *lv,
if (!(seg = alloc_lv_segment(segtype, lv, old_le_count,
lv->le_count - old_le_count, 0, status, 0,
- NULL, 0, lv->le_count - old_le_count,
+ NULL, 0, lv->le_count - old_le_count, 1,
0, 0, 0, NULL))) {
log_error("Couldn't allocate new snapshot segment.");
return NULL;
@@ -1072,7 +1081,7 @@ static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t
struct lv_segment *cache_seg;
struct logical_volume *lv = seg_lv(seg, s);
-PFLA("seg_lv(seg, %u)=%s, area_reduction=%u, with_discard=%d", s, seg_type(seg, s) == AREA_LV ? seg_lv(seg,s)->name: NULL, area_reduction, with_discard);
+PFLA("lv=%s,s=%u area_reduction=%u, with_discard=%d", seg_type(seg, s) == AREA_LV ? display_lvname(lv): NULL, s, area_reduction, with_discard);
if (seg_type(seg, s) == AREA_UNASSIGNED)
return 1;
@@ -1097,16 +1106,26 @@ PFLA("seg_lv(seg, %u)=%s, area_reduction=%u, with_discard=%d", s, seg_type(seg,
return 1;
}
+ if (seg_is_cache_pool(seg) &&
+ !dm_list_empty(&seg->lv->segs_using_this_lv)) {
+ if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
+ return_0;
+
+ if (!lv_cache_remove(cache_seg->lv))
+ return_0;
+ }
+
if (lv_is_raid_image(lv)) {
if (seg->meta_areas) {
uint32_t meta_area_reduction;
struct logical_volume *mlv;
struct volume_group *vg = lv->vg;
- if (!(mlv = seg_metalv(seg, s)))
+ if (seg_metatype(seg, s) != AREA_LV ||
+ !(mlv = seg_metalv(seg, s)))
return 0;
-PFLA("area_reduction=%u lv->le_count=%u mlv->le_count=%u" , area_reduction, lv->le_count, mlv->le_count);
+PFLA("mlv=%s area_reduction=%u lv->le_count=%u mlv->le_count=%u" , mlv->name, area_reduction, lv->le_count, mlv->le_count);
meta_area_reduction = raid_rmeta_extents_delta(vg->cmd, lv->le_count, lv->le_count - area_reduction,
seg->region_size, vg->extent_size);
PFLA("meta_area_reduction=%u" , meta_area_reduction);
@@ -1122,15 +1141,6 @@ PFLA("meta_area_reduction=%u" , meta_area_reduction);
return_0; /* FIXME: any upper level reporting */
return 1;
- }
-
- if (seg_is_cache_pool(seg) &&
- !dm_list_empty(&seg->lv->segs_using_this_lv)) {
- if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
- return_0;
-
- if (!lv_cache_remove(cache_seg->lv))
- return_0;
} else if (area_reduction == seg->area_len) {
log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from "
@@ -1235,7 +1245,7 @@ int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
if (status & RAID_META) {
if (seg->meta_areas) {
-PFLA("metadata LV = %s", lv->name);
+PFLA("seg->lv=%s metadata LV=%s s=%u le=%u len=%u status=%lX", display_lvname(seg->lv), display_lvname(lv), area_num, le, lv->le_count, status);
seg->meta_areas[area_num].type = AREA_LV;
seg_metalv(seg, area_num) = lv;
seg_metale(seg, area_num) = le;
@@ -1244,6 +1254,7 @@ else {
PFLA("NO meta_areas!!! metadata LV = %s", lv->name);
}
} else {
+PFLA("seg->lv=%s data LV=%s s=%u le=%u len=%u status=%lX", display_lvname(seg->lv), display_lvname(lv), area_num, le, lv->le_count, status);
seg->areas[area_num].type = AREA_LV;
seg_lv(seg, area_num) = lv;
seg_le(seg, area_num) = le;
@@ -1278,58 +1289,44 @@ static int _lv_segment_add_areas(struct logical_volume *lv,
return 1;
}
-/* Return @area_len for @extents based on @seg's properties (e.g. striped, ...) */
-static uint32_t _area_len(struct lv_segment *seg, uint32_t extents, uint32_t *area_len)
+/* Return area_len for @extents based on @seg's properties (e.g. striped, ...) */
+static uint32_t _seg_area_len(struct lv_segment *seg, uint32_t extents)
{
- /* Caller must ensure exact divisibility */
- if (seg_is_striped(seg) || seg_is_striped_raid(seg)) {
- uint32_t data_devs = seg->area_count - seg->segtype->parity_devs;
-
- if (seg_is_raid10(seg) &&
- data_devs > 1) {
- if (data_devs % 2) {
- log_error("raid10 data devices not divisible by 2");
- return 0;
- }
-
- data_devs /= 2;
- }
+ /* Prevent parity_devs to be subtracted in case of 2 devs raid4/5 */
+ uint32_t stripes = seg->area_count - (seg->area_count > 2 ? seg->segtype->parity_devs : 0);
- if (extents % data_devs) {
- /* HM FIXME: message not right for raid10 */
- log_error("Extents %" PRIu32 " not divisible by #stripes %" PRIu32, extents, data_devs);
- return 0;
- }
+PFLA("lv=%s extents=%u", display_lvname(seg->lv), extents);
+PFLA("segtype=%s seg->reshape_len=%u stripes=%u data_copies=%u", lvseg_name(seg), seg->reshape_len, stripes, seg->data_copies);
- *area_len = extents / data_devs;
- } else
- *area_len = extents;
-
- return 1;
-}
+ return raid_rimage_extents(seg->segtype, extents, stripes, seg->data_copies ?: 1);
+}
/*
* Reduce the size of an lv_segment. New size can be zero.
*/
-static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
+static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction, int delete)
{
- uint32_t area_reduction, s;
+ uint32_t area_reduction = _seg_area_len(seg, reduction), s;
-PFLA("reduction=%u", reduction);
- if (!_area_len(seg, reduction, &area_reduction))
- return 0;
+PFLA("seg->lv=%s reduction=%u, area_reduction=%u delete=%u", display_lvname(seg->lv), reduction, area_reduction, delete);
/* Release extents from all segment areas */
- for (s = 0; s < seg->area_count; s++)
-{
-PFLA("seg_lv(seg, %u)=%s area_reduction=%u", s, seg_type(seg, s) == AREA_LV ? seg_lv(seg, s)->name : NULL, area_reduction);
+ for (s = 0; s < seg->area_count; s++) {
+ /*
+ * If called for a duplicating LV on remove, area_len is bogus
+ * for sub LVs which may differ in size because of stripe boundary
+ * rounding, so retrieve the different reduction from the sub LV
+ */
+ if (delete && seg_type(seg, s) == AREA_LV)
+ area_reduction = seg_lv(seg, s)->le_count;
+PFLA("seg_lv(seg, %u)=%s area_reduction=%u seg->area_len=%u", s, seg_type(seg, s) == AREA_LV ? seg_lv(seg, s)->name : NULL, area_reduction, seg->area_len);
if (!release_and_discard_lv_segment_area(seg, s, area_reduction))
return_0;
-}
+ }
seg->len -= reduction;
seg->lv->size -= reduction * seg->lv->vg->extent_size;
- seg->area_len -= seg_is_striped(seg) ? area_reduction : reduction;
+ seg->area_len -= area_reduction;
PFLA("seg->len=%u seg->lv->size=%llu, seg->area_len=%u", seg->len, (unsigned long long) seg->lv->size, seg->area_len);
return 1;
@@ -1342,6 +1339,10 @@ static uint32_t _calc_area_multiple(const struct segment_type *segtype,
if (!area_count)
return 1;
+ if (segtype_is_raid1(segtype) ||
+ segtype_is_raid01(segtype))
+ return 1;
+
/* Striped */
PFLA("area_count=%u stripes=%u", area_count, stripes);
if (segtype_is_striped(segtype))
@@ -1353,8 +1354,8 @@ PFL();
* the 'stripes' argument will always need to
* be given.
*/
- if (segtype_is_raid10(segtype))
- return stripes ?: area_count / 2;
+ if (segtype_is_any_raid10(segtype))
+ return stripes;
PFL();
/* RAID0 and parity RAID (e.g. RAID 4/5/6) */
if (segtype_is_striped_raid(segtype)) {
@@ -1368,7 +1369,7 @@ PFL();
if (area_count <= segtype->parity_devs)
return 1;
-PFLA("r=%u", area_count - segtype->parity_devs);
+PFLA("area_count=%u, parity_devs=%u, r=%u", area_count, segtype->parity_devs, area_count - segtype->parity_devs);
return area_count - segtype->parity_devs;
}
@@ -1380,21 +1381,116 @@ PFL();
return 1;
}
+/* Return 1 in case any last segment of @lv, area @s contains another layered LV */
+/* HM FIXME: correct? */
+static int _is_layered_lv(struct logical_volume *lv, uint32_t s)
+{
+ struct lv_segment *seg = last_seg(lv);
+
+ if (!seg)
+ return 0;
+
+ if (seg_is_raid01(seg))
+ return 1;
+
+ return seg_is_raid1(seg) &&
+ seg_type(seg, s) == AREA_LV &&
+ strstr(seg_lv(seg, s)->name, "_dup_");
+}
+
+/* Find smallest one of any sub lvs of @seg */
+static uint32_t _seg_smallest_sub_lv(struct lv_segment *seg)
+{
+ uint32_t r = ~0U, s, lvs = 0;
+
+ /* Find smallest LV and use that for length of top-level LV */
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg_type(seg, s) == AREA_LV) {
+ lvs++;
+
+ if (seg_lv(seg, s)->le_count < r)
+ r = seg_lv(seg, s)->le_count;
+ }
+ }
+
+ return lvs ? r : 0;
+}
+
/*
* Entry point for all LV reductions in size.
*/
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
{
- struct lv_segment *seg = first_seg(lv);;
- uint32_t count;
- uint32_t reduction;
+ int dont_recurse = !delete && extents == lv->le_count;
+ int reduced = 0;
+ struct lv_segment *seg = last_seg(lv);
+ uint32_t count, extents_sav = extents, reduction, s, stripes;
struct logical_volume *pool_lv;
+ if (!lv->le_count)
+ return 1;
+
+ stripes = seg->area_count - seg->segtype->parity_devs;
+PFLA("lv=%s lv->le_count=%u seg=%p extents=%u stripes=%u data_copies=%u delete=%u", lv->name, lv->le_count, seg, extents, stripes, seg->data_copies, delete);
+#if 1
+ /* Check for multi-level stack (e.g. reduction of a duplicated LV stack) */
+ if (!dont_recurse) {
+ for (s = 0; s < seg->area_count; s++) {
+ if (_is_layered_lv(lv, s)) {
+ uint32_t seg_lv_extents = seg_lv(seg, s)->le_count;
+
+ if (!delete)
+ seg_lv_extents -= lv->le_count - extents;
+
+PFLA("recursive seg_lv(seg, %u)=%s", s, display_lvname(seg_lv(seg, s)));
+ if (!_lv_reduce(seg_lv(seg, s), seg_lv_extents, delete))
+ return_0;
+PFLA("end recursive seg_lv(seg, %u)=%s", s, display_lvname(seg_lv(seg, s)));
+ reduced++;
+ }
+ }
+
+ if (reduced) {
+ seg->len = _seg_smallest_sub_lv(seg);
+ seg->area_len = _seg_area_len(seg, seg->len);
+ lv->le_count = seg->len;
+ lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
+
+ if (delete &&
+ !lv->le_count &&
+ seg->meta_areas) {
+ for (s = 0; s < seg->area_count; s++)
+ if (seg_metatype(seg, s) == AREA_LV &&
+ !lv_remove(seg_metalv(seg, s)))
+ return_0;
+ goto out;
+ }
+
+ return 1;
+ }
+ }
+#endif
+
+ extents_sav = extents;
+#if 0
+ if (extents != lv->le_count &&
+ (seg_is_striped(seg) || seg_is_striped_raid(seg)))
+ extents = _round_to_stripe_boundary(lv, extents, stripes, 0 /* reduce */);
+#endif
+
+ if (extents > extents_sav) {
+ log_warn("LV %s would be %u extents smaller than requested due to stripe boundary rounding!",
+ display_lvname(lv), extents - extents_sav);
+ if (yes_no_prompt("Do you really want to reduce %s? [y/n]: ",
+ display_lvname(lv)) == 'n') {
+ log_error("Logical volume %s NOT reduced", display_lvname(lv));
+ return 0;
+ }
+ if (sigint_caught())
+ return_0;
+ }
+
PFLA("lv=%s lv->le_count=%u seg=%p extents=%u", lv->name, lv->le_count, seg, extents);
-if (!seg)
-return_0;
- if (seg_is_striped(seg) || seg_is_striped_raid(seg))
- extents = _round_to_stripe_boundary(lv, extents, _calc_area_multiple(seg->segtype, seg->area_count, 0), 0);
if (lv_is_merging_origin(lv)) {
log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
@@ -1402,12 +1498,24 @@ return_0;
clear_snapshot_merge(lv);
}
-PFLA("lv=%s extents=%u", lv->name, extents);
+ /*
+ * For raid10_far on reduction (not deletion!), the given address
+ * space of the data image LVs have to be split in #data_copies
+ * (i.e. the raid10_far stripe zones being mirrored)
+ * and @extents/#data_copies extents at the end of the splits
+ * have to be moved to the end of the images segment list
+ * before the normal reduction flow is allowed to happen
+ */
+ if (seg_is_raid10_far(seg) &&
+ !lv_raid10_far_reorder_segments(lv, extents, 0 /* reduce */))
+ return 0;
+
count = extents;
dm_list_iterate_back_items(seg, &lv->segments) {
if (!count)
break;
+PFLA("seg->len=%u count=%u extents=%u", seg->len, count, extents);
if (seg->len <= count) {
if (seg->merge_lv) {
log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.",
@@ -1451,8 +1559,8 @@ PFLA("lv=%s extents=%u", lv->name, extents);
} else
reduction = count;
-PFLA("reduction=%u", reduction);
- if (!_lv_segment_reduce(seg, reduction))
+PFLA("seg->lv=%s reduction=%u", display_lvname(seg->lv), reduction);
+ if (!_lv_segment_reduce(seg, reduction, delete))
return_0;
count -= reduction;
@@ -1463,15 +1571,18 @@ PFLA("reduction=%u", reduction);
if (!delete)
return 1;
-
-PFLA("deleting %s", lv->name);
+out:
+PFLA("deleting %s le_count=%u", lv->name, lv->le_count);
if (lv == lv->vg->pool_metadata_spare_lv) {
lv->status &= ~POOL_METADATA_SPARE;
lv->vg->pool_metadata_spare_lv = NULL;
}
+ if (lv->le_count)
+ return 1;
+
/* Remove the LV if it is now empty */
- if (!lv->le_count && !unlink_lv_from_vg(lv))
+ if (find_lv_in_vg(lv->vg, lv->name) && !unlink_lv_from_vg(lv))
return_0;
else if (lv->vg->fid->fmt->ops->lv_setup &&
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
@@ -1510,7 +1621,7 @@ PFLA("lv->le_count=%u", lv->le_count);
* that suggest it is anything other than "error".
*/
/* FIXME Check for other flags that need removing */
- lv->status &= ~(MIRROR|MIRRORED|RAID|RAID_IMAGE|RAID_META|PVMOVE|LOCKED);
+ lv->status &= ~(MIRROR|MIRRORED|RAID|RAID_IMAGE|RAID_META|PVMOVE|LOCKED|PARTIAL_LV);
/* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */
@@ -1520,12 +1631,41 @@ PFLA("lv->le_count=%u", lv->le_count);
return 1;
}
+int lv_refresh_suspend_resume(struct cmd_context *cmd, struct logical_volume *lv)
+{
+ if (!cmd->partial_activation && (lv->status & PARTIAL_LV)) {
+ log_error("Refusing refresh of partial LV %s."
+ " Use '--activationmode partial' to override.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!suspend_lv(cmd, lv)) {
+ log_error("Failed to suspend %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if (!resume_lv(cmd, lv)) {
+ log_error("Failed to reactivate %s.", display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Remove given number of extents from LV.
*/
int lv_reduce(struct logical_volume *lv, uint32_t extents)
{
- return _lv_reduce(lv, extents, 1);
+ int delete = (extents == lv->le_count ? 1 : 0);
+
+ if (!delete && !lv_raid_in_sync(lv)) {
+ log_error("RAID LV %s has to be in-sync to extend its size!", display_lvname(lv));
+ return 0;
+ }
+
+ return _lv_reduce(lv, extents, delete);
}
/*
@@ -1560,7 +1700,7 @@ struct alloc_handle {
uint32_t area_count; /* Number of parallel areas */
uint32_t parity_count; /* Adds to area_count, but not area_multiple */
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
- uint32_t area_multiple_check; /* Check area_multiple in _allocate(); needed for striped image additions */
+ uint32_t data_copies; /* # if data copies */
uint32_t log_area_count; /* Number of parallel logs */
uint32_t metadata_area_count; /* Number of parallel metadata areas */
uint32_t log_len; /* Length of log/metadata_area */
@@ -1584,7 +1724,9 @@ struct alloc_handle {
unsigned split_metadata_is_allocated; /* Metadata has been allocated */
const struct dm_config_node *cling_tag_list_cn;
-
+#if 1
+ const struct segment_type *segtype;
+#endif
struct dm_list *parallel_areas; /* PVs to avoid */
/*
@@ -1634,14 +1776,15 @@ static uint32_t mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint3
* stripes and mirrors refer to the parallel areas used for data.
* If log_area_count > 1 it is always mirrored (not striped).
*/
+static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn);
static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
struct dm_pool *mem,
const struct segment_type *segtype,
alloc_policy_t alloc, int approx_alloc,
- int extend,
uint32_t existing_extents,
uint32_t new_extents,
- uint32_t mirrors,
+ uint32_t areas,
+ uint32_t mirrors /* #data_copies */,
uint32_t stripes,
uint32_t metadata_area_count,
uint32_t extent_size,
@@ -1650,21 +1793,22 @@ static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
{
struct alloc_handle *ah;
uint32_t s, area_count, alloc_count, parity_count, total_extents;
- size_t size = 0;
+ size_t size = sizeof(*ah);
- /* FIXME Caller should ensure this */
- if (mirrors && !stripes)
+ /* FIXME: caller should ensure both... */
+ if (!mirrors)
+ mirrors = 1;
+ if (!stripes)
stripes = 1;
if (segtype_is_virtual(segtype))
area_count = 0;
- else if (mirrors > 1)
- area_count = mirrors * stripes;
+ else if (segtype_is_raid01(segtype))
+ area_count = stripes * mirrors;
else
- area_count = stripes;
+ area_count = stripes < 2 ? mirrors : stripes;
- size = sizeof(*ah);
-PFLA("extend=%u existing_extents=%u, new_extents=%u, area_count=%u mirrors=%u stripes=%u metadata_area_count=%u", extend, existing_extents, new_extents, area_count, mirrors, stripes, metadata_area_count);
+PFLA("existing_extents=%u, new_extents=%u, area_count=%u mirrors=%u stripes=%u metadata_area_count=%u", existing_extents, new_extents, area_count, mirrors, stripes, metadata_area_count);
/*
* It is a requirement that RAID 4/5/6 have to have at least 2 stripes.
@@ -1680,23 +1824,17 @@ PFLA("extend=%u existing_extents=%u, new_extents=%u, area_count=%u mirrors=%u st
* account for the extra parity devices because the array already
* exists and they only want replacement drives.
*/
-
/* HM FIXME: avoid this overload to define the parity_count to allocate! */
-#if 0
- parity_count = (area_count <= segtype->parity_devs) ? 0 : segtype->parity_devs;
- parity_count = segtype->parity_devs;
-#else
- parity_count = extend ? segtype->parity_devs : 0;
-#endif
+ // parity_count = (area_count <= segtype->parity_devs) ? 0 : segtype->parity_devs;
+
+ parity_count = (areas && area_count < 2) ? 0 : segtype->parity_devs;
+
alloc_count = area_count + parity_count;
+
PFLA("alloc_count=%u parity_count=%u metadata_area_count=%u", alloc_count, parity_count, metadata_area_count);
if (segtype_is_raid(segtype) && metadata_area_count) {
- if (metadata_area_count != alloc_count) {
- log_error(INTERNAL_ERROR "Bad metadata_area_count");
- return 0;
- }
-
/* RAID has a meta area for each device */
+ metadata_area_count = alloc_count;
alloc_count *= 2;
} else
/* mirrors specify their exact log count */
@@ -1704,7 +1842,6 @@ PFLA("alloc_count=%u parity_count=%u metadata_area_count=%u", alloc_count, parit
PFLA("alloc_count=%u parity_count=%u metadata_area_count=%u", alloc_count, parity_count, metadata_area_count);
size += sizeof(ah->alloced_areas[0]) * alloc_count;
-
if (!(ah = dm_pool_zalloc(mem, size))) {
log_error("allocation handle allocation failed");
return NULL;
@@ -1727,6 +1864,7 @@ PFLA("alloc_count=%u parity_count=%u metadata_area_count=%u", alloc_count, parit
ah->area_count = area_count;
ah->parity_count = parity_count;
+ ah->data_copies = mirrors;
ah->region_size = region_size;
ah->alloc = alloc;
@@ -1739,20 +1877,22 @@ PFLA("alloc_count=%u parity_count=%u metadata_area_count=%u", alloc_count, parit
*/
ah->area_multiple = _calc_area_multiple(segtype, area_count + segtype->parity_devs, stripes);
- ah->area_multiple_check = extend ? 1 : 0;
-
//FIXME: s/mirror_logs_separate/metadata_separate/ so it can be used by others?
ah->mirror_logs_separate = find_config_tree_bool(cmd, allocation_mirror_logs_require_separate_pvs_CFG, NULL);
total_extents = new_extents;
-PFLA("ah->area_multiple=%u area_count=%u new_extents=%u total_extents=%u", ah->area_multiple, area_count, new_extents, total_extents);
-
+PFLA("ah->area_multiple=%u area_count=%u new_extents=%u total_extents=%u stripes=%u mirrors=%u", ah->area_multiple, area_count, new_extents, total_extents, stripes, mirrors);
if (segtype_is_raid(segtype)) {
+ total_extents = raid_total_extents(segtype, total_extents, stripes, mirrors);
+
+PFLA("total_extents=%u stripes=%u mirrors=%u", total_extents, stripes, mirrors);
if (metadata_area_count) {
+ uint32_t cur_rimage_extents, new_rimage_extents;
+
PFLA("area_count=%u metadata_area_count=%u total_extents=%u", area_count, metadata_area_count, total_extents);
- ah->log_len = raid_rmeta_extents_delta(cmd,
- existing_extents / ah->area_multiple,
- (existing_extents + new_extents) / ah->area_multiple,
+ cur_rimage_extents = raid_rimage_extents(segtype, existing_extents, stripes, mirrors);
+ new_rimage_extents = raid_rimage_extents(segtype, existing_extents + new_extents, stripes, mirrors),
+ ah->log_len = raid_rmeta_extents_delta(cmd, cur_rimage_extents, new_rimage_extents,
region_size, extent_size);
ah->metadata_area_count = metadata_area_count;
ah->alloc_and_split_meta = !!ah->log_len;
@@ -1760,8 +1900,7 @@ PFLA("area_count=%u metadata_area_count=%u total_extents=%u", area_count, metada
* We need 'log_len' extents for each
* RAID device's metadata_area
*/
- total_extents += ah->log_len * (ah->area_multiple > 1 ?
- area_count / (segtype_is_raid10(segtype) ? mirrors : 1) : 1);
+ total_extents += ah->log_len * (segtype_is_raid1(segtype) ? 1 : area_count);
PFLA("existing_extents=%u new_extents=%u ah->log_len=%u total_extents=%u", existing_extents, new_extents, ah->log_len, total_extents);
} else {
ah->log_area_count = 0;
@@ -1794,18 +1933,24 @@ PFLA("existing_extents=%u new_extents=%u ah->log_len=%u total_extents=%u", exist
}
} else {
ah->log_area_count = metadata_area_count;
- ah->log_len = !metadata_area_count ? 0 :
- mirror_log_extents(ah->region_size, extent_size,
- (existing_extents + total_extents) / ah->area_multiple);
+ ah->log_len = metadata_area_count ?
+ mirror_log_extents(ah->region_size, extent_size, existing_extents + total_extents) : 0;
+PFLA("ah->log_area_count=%u ah->log_len=%u existing_extents=%u total_extents=%u", ah->log_area_count, ah->log_len, existing_extents, total_extents);
}
log_debug("Adjusted allocation request to %" PRIu32 " logical extents. Existing size %" PRIu32 ". New size %" PRIu32 ".",
total_extents, existing_extents, total_extents + existing_extents);
- if (extend && (mirrors || stripes))
+ // if (extend && (mirrors || stripes))
+#if 1
+ if (mirrors || stripes)
total_extents += existing_extents;
-
+#endif
+#if 1
PFLA("total_extents=%u", total_extents);
+ if (total_extents < stripes)
+ return NULL;
+#endif
ah->new_extents = total_extents;
for (s = 0; s < alloc_count; s++)
@@ -1813,12 +1958,15 @@ PFLA("total_extents=%u", total_extents);
ah->parallel_areas = parallel_areas;
- ah->cling_tag_list_cn = find_config_tree_node(cmd, allocation_cling_tag_list_CFG, NULL);
+ if ((ah->cling_tag_list_cn = find_config_tree_node(cmd, allocation_cling_tag_list_CFG, NULL)))
+ (void) _validate_tag_list(ah->cling_tag_list_cn);
ah->maximise_cling = find_config_tree_bool(cmd, allocation_maximise_cling_CFG, NULL);
ah->approx_alloc = approx_alloc;
-
+#if 1
+ ah->segtype = segtype;
+#endif
return ah;
}
@@ -1832,12 +1980,21 @@ void alloc_destroy(struct alloc_handle *ah)
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
uint32_t allocated, uint32_t extents_still_needed)
{
+#if 1
+ uint32_t area_extents_needed =
+ raid_rimage_extents(ah->segtype, extents_still_needed - allocated, ah->area_count, ah->data_copies);
+ uint32_t parity_extents_needed = ah->parity_count ?
+ raid_rimage_extents(ah->segtype, extents_still_needed - allocated, ah->parity_count, ah->data_copies) : 0;
+#else
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
- uint32_t metadata_extents_needed = ah->metadata_area_count * ah->log_len;
+#endif
+ uint32_t metadata_extents_needed = ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * ah->log_len;
uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed;
uint32_t free_pes = pv_maps_size(pvms);
+PFLA("extents_still_needed=%u allocated=%u ah->area_count=%u ah->data_copies=%u ah->parity_count=%u ah->area_multiple=%u", extents_still_needed, allocated, ah->area_count, ah->data_copies, ah->parity_count, ah->area_multiple);
+PFLA("free_pes=%u area_extents_needed=%u parity_extents_needed=%u metadata_extents_needed=%u total_extents_needed=%u", free_pes, area_extents_needed, parity_extents_needed, metadata_extents_needed, total_extents_needed);
if (total_extents_needed > free_pes) {
log_error("Insufficient free space: %" PRIu32 " extents needed,"
" but only %" PRIu32 " available",
@@ -1904,6 +2061,11 @@ static void _init_alloc_parms(struct alloc_handle *ah,
if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
alloc_parms->flags |= A_CLING_BY_TAGS;
+ if (!(alloc_parms->alloc & A_POSITIONAL_FILL) &&
+ (alloc_parms->alloc == ALLOC_CONTIGUOUS) &&
+ ah->cling_tag_list_cn)
+ alloc_parms->flags |= A_PARTITION_BY_TAGS;
+
/*
* For normal allocations, if any extents have already been found
* for allocation, prefer to place further extents on the same disks as
@@ -1964,24 +2126,39 @@ static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_are
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
uint32_t area_count,
uint32_t stripe_size,
+ uint32_t data_copies,
const struct segment_type *segtype,
struct alloced_area *aa,
uint32_t region_size)
{
- uint32_t s, extents, area_multiple;
+ uint32_t s, extents, stripes = area_count - segtype->parity_devs;
struct lv_segment *seg;
- area_multiple = _calc_area_multiple(segtype, area_count, 0);
-PFLA("area_multiple=%u", area_multiple);
- extents = aa[0].len * area_multiple;
+PFLA("area_count=%u data_copies=%u segtype=%s", area_count, data_copies, segtype->name);
+ if (segtype_is_raid(segtype))
+ extents = raid_rimage_extents(segtype, aa[0].len * stripes, stripes, data_copies);
+ else {
+#if 1
+ extents = raid_total_extents(segtype, aa[0].len * stripes, stripes, data_copies);
+#else
+ area_multiple = _calc_area_multiple(segtype, area_count, stripes);
+PFLA("aa[0]=%p area_multiple=%u", (void *) aa, area_multiple);
+ extents = aa[0].len * area_multiple;
+#endif
+ }
+PFLA("aa[0].len=%u stripes=%u, data_copies=%u", aa[0].len, stripes, data_copies);
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
status, stripe_size, NULL, area_count,
- aa[0].len, 0u, region_size, 0u, NULL))) {
+ aa[0].len, data_copies, 0u, region_size, 0u, NULL))) {
log_error("Couldn't allocate new LV segment.");
return 0;
}
+#if 1
+ seg->data_copies = data_copies;
+#endif
+
for (s = 0; s < area_count; s++)
if (!set_lv_segment_area_pv(seg, s, aa[s].pv, aa[s].pe))
return_0;
@@ -1997,6 +2174,7 @@ PFLA("area_multiple=%u", area_multiple);
static int _setup_alloced_segments(struct logical_volume *lv,
struct dm_list *alloced_areas,
uint32_t area_count,
+ uint32_t data_copies,
uint64_t status,
uint32_t stripe_size,
const struct segment_type *segtype,
@@ -2005,8 +2183,8 @@ static int _setup_alloced_segments(struct logical_volume *lv,
struct alloced_area *aa;
dm_list_iterate_items(aa, &alloced_areas[0]) {
- if (!_setup_alloced_segment(lv, status, area_count,
- stripe_size, segtype, aa,
+ if (!_setup_alloced_segment(lv, status, area_count, stripe_size,
+ data_copies, segtype, aa,
region_size))
return_0;
}
@@ -2035,7 +2213,12 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
return 1;
}
+#if 1
+ area_len = raid_rimage_extents(ah->segtype, max_to_allocate, ah->area_count, 1 /* data_copies */);
+#else
area_len = max_to_allocate / ah->area_multiple;
+#endif
+PFLA("area_len=%u raid_rimage_extents=%u", area_len, raid_rimage_extents(ah->segtype, max_to_allocate, ah->area_count, ah->data_copies));
/* Reduce area_len to the smallest of the areas */
for (s = 0; s < ah->area_count + ah->parity_count; s++)
@@ -2119,7 +2302,15 @@ PFLA("aa[s=%u].len=%u", s, aa[s].len);
ah->total_area_len += area_len;
+#if 1
+ if (segtype_is_striped(ah->segtype) ||
+ segtype_is_striped_raid(ah->segtype))
+ area_len *= ah->area_count;
+
+ alloc_state->allocated += area_len;
+#else
alloc_state->allocated += area_len * ah->area_multiple;
+#endif
PFLA("alloc_state->allocated=%u", alloc_state->allocated);
return 1;
@@ -2145,13 +2336,15 @@ static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
void *data)
{
uint32_t s;
- uint32_t remaining_seg_len, area_len, area_multiple;
+ uint32_t remaining_seg_len, area_len;
uint32_t stripes_per_mimage = 1;
int r = 1;
if (!seg && !(seg = find_seg_by_le(lv, le))) {
+#if 0
log_error("Failed to find segment for %s extent %" PRIu32,
lv->name, le);
+#endif
return 0;
}
@@ -2164,9 +2357,15 @@ static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
if (max_seg_len && *max_seg_len > remaining_seg_len)
*max_seg_len = remaining_seg_len;
+ /* HM FIXME: get rid of area_multiple, because it does not cut raid10* */
+#if 0
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
-PFLA("area_multiple=%u", area_multiple);
- area_len = remaining_seg_len / (area_multiple ? : 1);
+PFLA("area_multiple=%u remaining_seg_len=%u", area_multiple, remaining_seg_len);
+
+ area_len = remaining_seg_len / (area_multiple ?: 1);
+#else
+ area_len = _seg_area_len(seg, remaining_seg_len);
+#endif
/* For striped mirrors, all the areas are counted, through the mirror layer */
if (top_level_area_index == -1)
@@ -2176,9 +2375,17 @@ PFLA("area_multiple=%u", area_multiple);
s < seg->area_count && (!max_areas || s <= max_areas);
s++) {
if (seg_type(seg, s) == AREA_LV) {
+ uint32_t image_le = raid_rimage_extents(seg->segtype, le - seg->le,
+ seg->area_count - seg->segtype->parity_devs,
+ 1 /* seg->data_copies */);
+
if (!(r = _for_each_pv(cmd, seg_lv(seg, s),
+#if 1
+ seg_le(seg, s) + image_le,
+#else
seg_le(seg, s) +
(le - seg->le) / area_multiple,
+#endif
area_len, NULL, max_seg_len, 0,
(stripes_per_mimage == 1) && only_single_area_segments ? 1U : 0U,
(top_level_area_index != -1) ? top_level_area_index : (int) (s * stripes_per_mimage),
@@ -2240,6 +2447,7 @@ static int _comp_area(const void *l, const void *r)
struct pv_match {
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
+ struct alloc_handle *ah;
struct alloc_state *alloc_state;
struct pv_area *pva;
const struct dm_config_node *cling_tag_list_cn;
@@ -2259,65 +2467,170 @@ static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv
/*
* Does PV area have a tag listed in allocation/cling_tag_list that
- * matches a tag of the PV of the existing segment?
+ * matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags?
+ * If tags_list_str is set, then instead we generate a list of matching tags for printing.
*/
-static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn, struct physical_volume *pv1, struct physical_volume *pv2)
+static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
+ struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only,
+ struct dm_pool *mem, const char **tags_list_str)
{
const struct dm_config_value *cv;
const char *str;
const char *tag_matched;
+ struct dm_list *tags_to_match = tags_list_str ? NULL : pv_tags ? : &pv2->tags;
+ struct dm_str_list *sl;
+ unsigned first_tag = 1;
+
+ if (tags_list_str && !dm_pool_begin_object(mem, 256)) {
+ log_error("PV tags string allocation failed");
+ return 0;
+ }
for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
- log_error("Ignoring invalid string in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring invalid string in config file entry "
+ "allocation/cling_tag_list");
continue;
}
str = cv->v.str;
if (!*str) {
- log_error("Ignoring empty string in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring empty string in config file entry "
+ "allocation/cling_tag_list");
continue;
}
if (*str != '@') {
- log_error("Ignoring string not starting with @ in config file entry "
- "allocation/cling_tag_list: %s", str);
+ if (validate_only)
+ log_warn("WARNING: Ignoring string not starting with @ in config file entry "
+ "allocation/cling_tag_list: %s", str);
continue;
}
str++;
if (!*str) {
- log_error("Ignoring empty tag in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring empty tag in config file entry "
+ "allocation/cling_tag_list");
continue;
}
+ if (validate_only)
+ continue;
+
/* Wildcard matches any tag against any tag. */
if (!strcmp(str, "*")) {
- if (!str_list_match_list(&pv1->tags, &pv2->tags, &tag_matched))
+ if (tags_list_str) {
+ dm_list_iterate_items(sl, &pv1->tags) {
+ if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
+ dm_pool_abandon_object(mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ first_tag = 0;
+ if (!dm_pool_grow_object(mem, sl->str, 0)) {
+ dm_pool_abandon_object(mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ }
+ continue;
+ }
+ if (!str_list_match_list(&pv1->tags, tags_to_match, &tag_matched))
continue;
else {
- log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
- tag_matched, pv_dev_name(pv1), pv_dev_name(pv2));
+ if (!pv_tags)
+ log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
+ tag_matched, pv_dev_name(pv1), pv_dev_name(pv2));
+ else
+ log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
+ " from consideration: PV tag %s already used.",
+ area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched);
return 1;
}
}
if (!str_list_match_item(&pv1->tags, str) ||
- !str_list_match_item(&pv2->tags, str))
+ (tags_to_match && !str_list_match_item(tags_to_match, str)))
continue;
else {
- log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
- str, pv_dev_name(pv1), pv_dev_name(pv2));
+ if (tags_list_str) {
+ if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
+ dm_pool_abandon_object(mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ first_tag = 0;
+ if (!dm_pool_grow_object(mem, str, 0)) {
+ dm_pool_abandon_object(mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ continue;
+ }
+ if (!pv_tags)
+ log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
+ str, pv_dev_name(pv1), pv_dev_name(pv2));
+ else
+ log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
+ " from consideration: PV tag %s already used.",
+ area_num, pv_dev_name(pv1), pv1_start_pe, str);
return 1;
}
}
+ if (tags_list_str) {
+ if (!dm_pool_grow_object(mem, "\0", 1)) {
+ dm_pool_abandon_object(mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ *tags_list_str = dm_pool_end_object(mem);
+ return 1;
+ }
+
return 0;
}
+static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn)
+{
+ return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, NULL);
+}
+
+static const char *_tags_list_str(struct alloc_handle *ah, struct physical_volume *pv1)
+{
+ const char *tags_list_str;
+
+ if (!_match_pv_tags(ah->cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, ah->mem, &tags_list_str))
+ return_NULL;
+
+ return tags_list_str;
+}
+
+/*
+ * Does PV area have a tag listed in allocation/cling_tag_list that
+ * matches a tag in the pv_tags list?
+ */
+static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
+ struct dm_list *pv_tags)
+{
+ return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, NULL);
+}
+
+/*
+ * Does PV area have a tag listed in allocation/cling_tag_list that
+ * matches a tag of the PV of the existing segment?
+ */
+static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, struct physical_volume *pv2)
+{
+ return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, NULL);
+}
+
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
{
return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv);
@@ -2337,23 +2650,32 @@ static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct
return 1;
}
-static void _reserve_area(struct alloc_state *alloc_state, struct pv_area *pva, uint32_t required,
- uint32_t ix_pva, uint32_t unreserved)
+static void _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
+ uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
struct pv_area_used *area_used = &alloc_state->areas[ix_pva];
+ const char *pv_tag_list = NULL;
+
+ if (ah->cling_tag_list_cn)
+ pv_tag_list = _tags_list_str(ah, pva->map->pv);
log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
- " length %" PRIu32 " leaving %" PRIu32 ".",
+ " length %" PRIu32 " leaving %" PRIu32 "%s%s.",
area_used->pva ? "Changing " : "Considering",
ix_pva, area_used->pva ? "to" : "as",
- dev_name(pva->map->pv->dev), pva->start, required, unreserved);
+ dev_name(pva->map->pv->dev), pva->start, required, unreserved,
+ pv_tag_list ? " with PV tags: " : "",
+ pv_tag_list ? : "");
+
+ if (pv_tag_list)
+ dm_pool_free(ah->mem, (void *)pv_tag_list);
area_used->pva = pva;
area_used->used = required;
}
-static int _reserve_required_area(struct alloc_state *alloc_state, struct pv_area *pva, uint32_t required,
- uint32_t ix_pva, uint32_t unreserved)
+static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
+ uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
uint32_t s;
@@ -2368,7 +2690,7 @@ static int _reserve_required_area(struct alloc_state *alloc_state, struct pv_are
alloc_state->areas[s].pva = NULL;
}
- _reserve_area(alloc_state, pva, required, ix_pva, unreserved);
+ _reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved);
return 1;
}
@@ -2386,6 +2708,10 @@ static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
return 1; /* Continue */
+ if (positional && (s >= pvmatch->alloc_state->num_positional_areas))
+ return 1;
+
+ /* FIXME The previous test should make this one redundant. */
if (positional && (s >= pvmatch->alloc_state->areas_size))
return 1;
@@ -2394,7 +2720,7 @@ static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
* so it's safe to say all the available space is used.
*/
if (positional)
- _reserve_required_area(pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0);
+ _reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0);
return 2; /* Finished */
}
@@ -2411,6 +2737,7 @@ static int _check_cling(struct alloc_handle *ah,
int r;
uint32_t le, len;
+ pvmatch.ah = ah;
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
@@ -2441,20 +2768,21 @@ static int _check_cling(struct alloc_handle *ah,
/*
* Is pva contiguous to any existing areas or on the same PV?
*/
-static int _check_contiguous(struct cmd_context *cmd,
+static int _check_contiguous(struct alloc_handle *ah,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
+ pvmatch.ah = ah;
pvmatch.condition = _is_contiguous;
pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = NULL;
/* FIXME Cope with stacks by flattening */
- if (!(r = _for_each_pv(cmd, prev_lvseg->lv,
+ if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv,
prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
0, 0, -1, 1,
_is_condition, &pvmatch)))
@@ -2490,7 +2818,7 @@ static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_conf
if ((!cling_tag_list_cn && (pva->map->pv == aa[0].pv)) ||
(cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv))) {
if (positional)
- _reserve_required_area(alloc_state, pva, pva->count, s, 0);
+ _reserve_required_area(ah, alloc_state, pva, pva->count, s, 0);
return 1;
}
}
@@ -2539,7 +2867,7 @@ static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint3
/* Contiguous? */
if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) ||
(ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) &&
- _check_contiguous(ah->cmd, alloc_parms->prev_lvseg, pva, alloc_state))
+ _check_contiguous(ah, alloc_parms->prev_lvseg, pva, alloc_state))
goto found;
/* Try next area on same PV if looking for contiguous space */
@@ -2593,7 +2921,12 @@ found:
*/
static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *pva, unsigned ix_pva, uint32_t max_to_allocate, alloc_policy_t alloc)
{
+#if 1
+ uint32_t required = raid_rimage_extents(ah->segtype, max_to_allocate, ah->area_count, ah->data_copies);
+#else
uint32_t required = max_to_allocate / ah->area_multiple;
+#endif
+PFLA("segtype=%s max_to_allocate=%u area_count=%u data_copies=%u reuired=%u required=%u", ah->segtype->name, max_to_allocate, ah->area_count, ah->data_copies, required, max_to_allocate / ah->area_multiple);
/*
* Update amount unreserved - effectively splitting an area
@@ -2621,6 +2954,8 @@ static void _clear_areas(struct alloc_state *alloc_state)
{
uint32_t s;
+ alloc_state->num_positional_areas = 0;
+
for (s = 0; s < alloc_state->areas_size; s++)
alloc_state->areas[s].pva = NULL;
}
@@ -2646,8 +2981,13 @@ static void _report_needed_allocation_space(struct alloc_handle *ah,
uint32_t parallel_areas_count, parallel_area_size;
uint32_t metadata_count, metadata_size;
+#if 1
+ parallel_area_size = raid_rimage_extents(ah->segtype, ah->new_extents - alloc_state->allocated,
+ ah->area_count, ah->data_copies);
+#else
parallel_area_size = ah->new_extents - alloc_state->allocated;
parallel_area_size /= ah->area_multiple;
+#endif
parallel_area_size -= (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0;
parallel_areas_count = ah->area_count + ah->parity_count;
@@ -2663,9 +3003,10 @@ static void _report_needed_allocation_space(struct alloc_handle *ah,
metadata_count = alloc_state->log_area_count_still_needed;
}
- log_debug_alloc("Still need %s%" PRIu32 " total extents from %" PRIu32 " remaining:",
+ log_debug_alloc("Still need %s%" PRIu32 " total extents from %" PRIu32 " remaining (%" PRIu32 " positional slots):",
ah->approx_alloc ? "up to " : "",
- parallel_area_size * parallel_areas_count + metadata_size * metadata_count, pv_maps_size(pvms));
+ parallel_area_size * parallel_areas_count + metadata_size * metadata_count, pv_maps_size(pvms),
+ alloc_state->num_positional_areas);
log_debug_alloc(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
" parity) parallel areas of %" PRIu32 " extents each",
parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
@@ -2674,6 +3015,38 @@ static void _report_needed_allocation_space(struct alloc_handle *ah,
(metadata_count == 1) ? "" : "s",
metadata_size);
}
+
+/* Work through the array, removing any entries with tags already used by previous areas. */
+static int _limit_to_one_area_per_tag(struct alloc_handle *ah, struct alloc_state *alloc_state,
+ uint32_t ix_log_offset, unsigned *ix)
+{
+ uint32_t s = 0, u = 0;
+ DM_LIST_INIT(pv_tags);
+
+ while (s < alloc_state->areas_size && alloc_state->areas[s].pva) {
+ /* Start again with an empty tag list when we reach the log devices */
+ if (u == ix_log_offset)
+ dm_list_init(&pv_tags);
+ if (!_pv_has_matching_tag(ah->cling_tag_list_cn, alloc_state->areas[s].pva->map->pv, alloc_state->areas[s].pva->start, s, &pv_tags)) {
+ /* The comparison fn will ignore any non-cling tags so just add everything */
+ if (!str_list_add_list(ah->mem, &pv_tags, &alloc_state->areas[s].pva->map->pv->tags))
+ return_0;
+
+ if (s != u)
+ alloc_state->areas[u] = alloc_state->areas[s];
+
+ u++;
+ } else
+ (*ix)--; /* One area removed */
+
+ s++;
+ }
+
+ alloc_state->areas[u].pva = NULL;
+
+ return 1;
+}
+
/*
* Returns 1 regardless of whether any space was found, except on error.
*/
@@ -2688,7 +3061,6 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
struct pv_area *pva;
unsigned preferred_count = 0;
unsigned already_found_one;
- unsigned ix_offset = 0; /* Offset for non-preferred allocations */
unsigned ix_log_offset; /* Offset to start of areas to use for log */
unsigned too_small_for_log_count; /* How many too small for log? */
unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
@@ -2698,27 +3070,28 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
uint32_t devices_needed = ah->area_count + ah->parity_count;
uint32_t required;
- /* ix_offset holds the number of parallel allocations that must be contiguous/cling */
+ _clear_areas(alloc_state);
+ _reset_unreserved(pvms);
+
+ /* num_positional_areas holds the number of parallel allocations that must be contiguous/cling */
+ /* These appear first in the array, so it is also the offset to the non-preferred allocations */
/* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
if (!(alloc_parms->flags & A_POSITIONAL_FILL))
- ix_offset = 0;
+ alloc_state->num_positional_areas = 0;
else if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG))
- ix_offset = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
+ alloc_state->num_positional_areas = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
else if (alloc_parms->flags & A_CLING_TO_ALLOCED)
- ix_offset = ah->area_count;
+ alloc_state->num_positional_areas = ah->area_count;
if (alloc_parms->alloc == ALLOC_NORMAL || (alloc_parms->flags & A_CLING_TO_ALLOCED))
log_debug_alloc("Cling_to_allocated is %sset",
alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
if (alloc_parms->flags & A_POSITIONAL_FILL)
- log_debug_alloc("%u preferred area(s) to be filled positionally.", ix_offset);
+ log_debug_alloc("%u preferred area(s) to be filled positionally.", alloc_state->num_positional_areas);
else
log_debug_alloc("Areas to be sorted and filled sequentially.");
- _clear_areas(alloc_state);
- _reset_unreserved(pvms);
-
_report_needed_allocation_space(ah, alloc_state, pvms);
/* ix holds the number of areas found on other PVs */
@@ -2726,7 +3099,7 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
if (log_iteration_count) {
log_debug_alloc("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
} else if (iteration_count)
- log_debug_alloc("Filled %u out of %u preferred areas so far.", preferred_count, ix_offset);
+ log_debug_alloc("Filled %u out of %u preferred areas so far.", preferred_count, alloc_state->num_positional_areas);
/*
* Provide for escape from the loop if no progress is made.
@@ -2767,7 +3140,7 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
* not enough for the logs.
*/
if (log_iteration_count) {
- for (s = devices_needed; s < ix + ix_offset; s++)
+ for (s = devices_needed; s < ix + alloc_state->num_positional_areas; s++)
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
goto next_pv;
/* On a second pass, avoid PVs already used in an uncommitted area */
@@ -2815,8 +3188,8 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
}
/* Reserve required amount of pva */
- required = _calc_required_extents(ah, pva, ix + ix_offset - 1, max_to_allocate, alloc_parms->alloc);
- if (!_reserve_required_area(alloc_state, pva, required, ix + ix_offset - 1, pva->unreserved))
+ required = _calc_required_extents(ah, pva, ix + alloc_state->num_positional_areas - 1, max_to_allocate, alloc_parms->alloc);
+ if (!_reserve_required_area(ah, alloc_state, pva, required, ix + alloc_state->num_positional_areas - 1, pva->unreserved))
return_0;
}
@@ -2827,23 +3200,23 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
/* With cling and contiguous we stop if we found a match for *all* the areas */
/* FIXME Rename these variables! */
if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
- ix + ix_offset >= devices_needed + alloc_state->log_area_count_still_needed) ||
- (preferred_count == ix_offset &&
- (ix_offset == devices_needed + alloc_state->log_area_count_still_needed)))
+ ix + alloc_state->num_positional_areas >= devices_needed + alloc_state->log_area_count_still_needed) ||
+ (preferred_count == alloc_state->num_positional_areas &&
+ (alloc_state->num_positional_areas == devices_needed + alloc_state->log_area_count_still_needed)))
break;
}
} while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) ||
/* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
- (preferred_count < ix_offset || alloc_state->log_area_count_still_needed) &&
+ (preferred_count < alloc_state->num_positional_areas || alloc_state->log_area_count_still_needed) &&
(alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
/* Extra iteration needed to fill log areas on PVs already used? */
- (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == ix_offset && !ah->mirror_logs_separate &&
+ (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == alloc_state->num_positional_areas && !ah->mirror_logs_separate &&
(ix + preferred_count >= devices_needed) &&
(ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
/* Non-zero ix means at least one USE_AREA was returned */
- if (preferred_count < ix_offset && !(alloc_parms->flags & A_CLING_TO_ALLOCED) && !ix)
+ if (preferred_count < alloc_state->num_positional_areas && !(alloc_parms->flags & A_CLING_TO_ALLOCED) && !ix)
return 1;
if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed)
@@ -2858,17 +3231,17 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
}
} else if (ix > 1) {
log_debug_alloc("Sorting %u areas", ix);
- qsort(alloc_state->areas + ix_offset, ix, sizeof(*alloc_state->areas),
+ qsort(alloc_state->areas + alloc_state->num_positional_areas, ix, sizeof(*alloc_state->areas),
_comp_area);
}
- /* If there are gaps in our preferred areas, fill then from the sorted part of the array */
- if (preferred_count && preferred_count != ix_offset) {
+ /* If there are gaps in our preferred areas, fill them from the sorted part of the array */
+ if (preferred_count && preferred_count != alloc_state->num_positional_areas) {
for (s = 0; s < devices_needed; s++)
if (!alloc_state->areas[s].pva) {
- alloc_state->areas[s].pva = alloc_state->areas[ix_offset].pva;
- alloc_state->areas[s].used = alloc_state->areas[ix_offset].used;
- alloc_state->areas[ix_offset++].pva = NULL;
+ alloc_state->areas[s].pva = alloc_state->areas[alloc_state->num_positional_areas].pva;
+ alloc_state->areas[s].used = alloc_state->areas[alloc_state->num_positional_areas].used;
+ alloc_state->areas[alloc_state->num_positional_areas++].pva = NULL;
}
}
@@ -2882,19 +3255,56 @@ static int _find_some_parallel_space(struct alloc_handle *ah,
/* FIXME This logic is due to its heritage and can be simplified! */
if (alloc_state->log_area_count_still_needed) {
/* How many areas are too small for the log? */
- while (too_small_for_log_count < ix_offset + ix &&
- (*(alloc_state->areas + ix_offset + ix - 1 -
+ while (too_small_for_log_count < alloc_state->num_positional_areas + ix &&
+ (*(alloc_state->areas + alloc_state->num_positional_areas + ix - 1 -
too_small_for_log_count)).used < ah->log_len)
too_small_for_log_count++;
- ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
+ ix_log_offset = alloc_state->num_positional_areas + ix - too_small_for_log_count - ah->log_area_count;
}
- if (ix + ix_offset < devices_needed +
+ if (ix + alloc_state->num_positional_areas < devices_needed +
(alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
too_small_for_log_count : 0))
return 1;
/*
+ * FIXME We should change the code to do separate calls for the log allocation
+ * and the data allocation so that _limit_to_one_area_per_tag doesn't have to guess
+ * where the split is going to occur.
+ */
+
+ /*
+ * This code covers the initial allocation - after that there is something to 'cling' to
+ * and we shouldn't get this far.
+ * alloc_state->num_positional_areas is assumed to be 0 with A_PARTITION_BY_TAGS.
+ *
+ * FIXME Consider a second attempt with A_PARTITION_BY_TAGS if, for example, the largest area
+ * had all the tags set, but other areas don't.
+ */
+ if ((alloc_parms->flags & A_PARTITION_BY_TAGS) && !alloc_state->num_positional_areas) {
+ if (!_limit_to_one_area_per_tag(ah, alloc_state, ix_log_offset, &ix))
+ return_0;
+
+ /* Recalculate log position because we might have removed some areas from consideration */
+ if (alloc_state->log_area_count_still_needed) {
+ /* How many areas are too small for the log? */
+ too_small_for_log_count = 0;
+ while (too_small_for_log_count < ix &&
+ (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).pva &&
+ (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).used < ah->log_len)
+ too_small_for_log_count++;
+ if (ix < too_small_for_log_count + ah->log_area_count)
+ return 1;
+ ix_log_offset = ix - too_small_for_log_count - ah->log_area_count;
+ }
+
+ if (ix < devices_needed +
+ (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
+ too_small_for_log_count : 0))
+ return 1;
+ }
+
+ /*
* Finally add the space identified to the list of areas to be used.
*/
if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
@@ -2936,7 +3346,12 @@ PFLA("max_to_allocate=%u alloc_parms->extents_still_needed=%u alloc_state->alloc
* the maximum we can allocate in one go accordingly.
*/
if (ah->parallel_areas) {
+#if 0
+ next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) +
+ raid_rimage_extents(ah->segtype, alloc_state->allocated, ah->area_count, ah->data_copies);
+#else
next_le = (alloc_parms->prev_lvseg ? alloc_parms->prev_lvseg->le + alloc_parms->prev_lvseg->len : 0) + alloc_state->allocated / ah->area_multiple;
+#endif
dm_list_iterate_items(spvs, ah->parallel_areas) {
if (next_le >= spvs->le + spvs->len)
continue;
@@ -3005,7 +3420,7 @@ static int _allocate(struct alloc_handle *ah,
unsigned can_split,
struct dm_list *allocatable_pvs)
{
- uint32_t old_allocated;
+ uint32_t old_allocated, extents_still_needed, rimage_extents;
struct lv_segment *prev_lvseg = NULL;
int r = 0;
struct dm_list *pvms;
@@ -3020,14 +3435,29 @@ static int _allocate(struct alloc_handle *ah,
return 1;
}
- if (ah->area_multiple_check &&
- ah->area_multiple > 1 &&
+#if 1
+ extents_still_needed = ah->new_extents - alloc_state.allocated;
+ rimage_extents = raid_rimage_extents(ah->segtype, extents_still_needed, ah->area_count, 1);
+
+PFLA("ah->new_extents=%u extents_still_needed=%u rimage_extents=%u", ah->new_extents, ah->new_extents - alloc_state.allocated, rimage_extents);
+ if (extents_still_needed > rimage_extents &&
+ (extents_still_needed % rimage_extents)) {
+ log_error("Number of extents requested (%u) needs to be divisible by %d.",
+ ah->new_extents - alloc_state.allocated,
+ rimage_extents);
+ return 0;
+ }
+#else
+ if (ah->area_multiple > 1 &&
(ah->new_extents - alloc_state.allocated) % ah->area_multiple) {
log_error("Number of extents requested (%u) needs to be divisible by %d.",
ah->new_extents - alloc_state.allocated,
ah->area_multiple);
return 0;
}
+#endif
+
+PFLA("ah->new_extents=%u", ah->new_extents);
alloc_state.log_area_count_still_needed = ah->log_area_count;
@@ -3101,14 +3531,23 @@ PFLA("ah->new_extents=%u", ah->new_extents);
break;
}
+PFLA("alloc_stats.allocated=%u ah->new_extents=%u", alloc_state.allocated, ah->new_extents);
if (alloc_state.allocated != ah->new_extents) {
+#if 1
+ rimage_extents = raid_total_extents(ah->segtype, ah->new_extents - alloc_state.allocated,
+ ah->area_count, ah->data_copies);
+#endif
if (!ah->approx_alloc) {
log_error("Insufficient suitable %sallocatable extents "
"for logical volume %s: %u more required",
can_split ? "" : "contiguous ",
lv ? lv->name : "",
+#if 1
+ rimage_extents);
+#else
(ah->new_extents - alloc_state.allocated) *
ah->area_count / ah->area_multiple);
+#endif
goto out;
}
if (!alloc_state.allocated) {
@@ -3123,7 +3562,11 @@ PFLA("ah->new_extents=%u", ah->new_extents);
can_split ? "" : "contiguous ",
lv ? lv->name : "",
alloc_state.allocated,
+#if 1
+ rimage_extents);
+#else
(ah->new_extents - alloc_state.allocated) * ah->area_count / ah->area_multiple);
+#endif
ah->new_extents = alloc_state.allocated;
}
@@ -3153,7 +3596,7 @@ int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
} else {
if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
status, 0, NULL, 0,
- extents, 0, 0, 0, NULL))) {
+ extents, 1, 0, 0, 0, NULL))) {
log_error("Couldn't allocate new %s segment.", segtype->name);
return 0;
}
@@ -3167,6 +3610,7 @@ int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
return 1;
}
+
/*
* Entry point for all extent allocations.
*/
@@ -3180,8 +3624,17 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
alloc_policy_t alloc, int approx_alloc,
struct dm_list *parallel_areas)
{
- int extend = lv ? 1 : 0;
struct alloc_handle *ah;
+ uint32_t areas;
+
+ if (lv &&
+ first_seg(lv))
+ areas = first_seg(lv)->area_count;
+ else
+ areas = 0;
+
+PFLA("segtype=%s stripes=%u mirrors=%u, log_count=%u, region_size=%u extents=%u",
+ segtype->name, stripes, mirrors, log_count, region_size, extents);
if (segtype_is_virtual(segtype)) {
log_error("allocate_extents does not handle virtual segments");
@@ -3206,8 +3659,9 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
if (alloc >= ALLOC_INHERIT)
alloc = vg->alloc;
- if (!(ah = _alloc_init(vg->cmd, vg->vgmem, segtype, alloc, approx_alloc, extend,
- lv ? lv->le_count : 0, extents, mirrors, stripes, log_count,
+ if (!(ah = _alloc_init(vg->cmd, vg->vgmem, segtype, alloc, approx_alloc,
+ lv ? lv->le_count : 0, extents, areas,
+ mirrors, stripes, log_count,
vg->extent_size, region_size,
parallel_areas)))
return_NULL;
@@ -3224,7 +3678,9 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
* Add new segments to an LV from supplied list of areas.
*/
int lv_add_segment(struct alloc_handle *ah,
- uint32_t first_area, uint32_t num_areas,
+ uint32_t first_area,
+ uint32_t num_areas,
+ uint32_t data_copies,
struct logical_volume *lv,
const struct segment_type *segtype,
uint32_t stripe_size,
@@ -3247,7 +3703,7 @@ int lv_add_segment(struct alloc_handle *ah,
}
if (!_setup_alloced_segments(lv, &ah->alloced_areas[first_area],
- num_areas, status,
+ num_areas, data_copies, status,
stripe_size, segtype, region_size))
return_0;
@@ -3287,12 +3743,12 @@ static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg,
return NULL;
}
- if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
+ if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, SEG_TYPE_NAME_MIRROR),
seg->lv, seg->le, seg->len, 0,
seg->status, seg->stripe_size,
log_lv,
seg->area_count, seg->area_len,
- seg->chunk_size, region_size,
+ seg->data_copies, seg->chunk_size, region_size,
seg->extents_copied, NULL))) {
log_error("Couldn't allocate converted LV segment");
return NULL;
@@ -3392,7 +3848,7 @@ int lv_add_segmented_mirror_image(struct alloc_handle *ah,
if (!(new_seg = alloc_lv_segment(segtype, copy_lv,
seg->le, seg->len, 0, PVMOVE, 0,
- NULL, 1, seg->len,
+ NULL, 1, seg->len, 1,
0, 0, 0, NULL)))
return_0;
@@ -3494,7 +3950,7 @@ int lv_add_mirror_lvs(struct logical_volume *lv,
return 0;
}
- mirror_segtype = get_segtype_from_string(lv->vg->cmd, "mirror");
+ mirror_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_MIRROR);
if (seg->segtype != mirror_segtype)
if (!(seg = _convert_seg_to_mirror(seg, region_size, NULL)))
return_0;
@@ -3552,16 +4008,16 @@ int lv_add_log_segment(struct alloc_handle *ah, uint32_t first_area,
struct logical_volume *log_lv, uint64_t status)
{
- return lv_add_segment(ah, ah->area_count + first_area, 1, log_lv,
- get_segtype_from_string(log_lv->vg->cmd,
- "striped"),
+ return lv_add_segment(ah, ah->area_count + first_area, 1, 1 /* data_copies */,
+ log_lv, get_segtype_from_string(log_lv->vg->cmd,
+ "striped"),
0, status, 0);
}
-static int _lv_insert_empty_sublvs(struct logical_volume *lv,
- const struct segment_type *segtype,
- uint32_t stripe_size, uint32_t region_size,
- uint32_t devices)
+static int _lv_insert_empty_data_sublvs(struct logical_volume *lv,
+ const struct segment_type *segtype,
+ uint32_t stripe_size, uint32_t region_size,
+ uint32_t devices)
{
struct logical_volume *sub_lv;
uint32_t i;
@@ -3593,7 +4049,7 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
*/
if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, 0, lv->status,
stripe_size, NULL,
- devices, 0, 0, region_size, 0, NULL))) {
+ devices, 0, 1, 0, region_size, 0, NULL))) {
log_error("Failed to create mapping segment for %s", lv->name);
return 0;
}
@@ -3621,37 +4077,86 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, sub_lv_status))
return_0;
+ }
- /* Metadata LVs for raid */
- /* HM FIXME raid0 optionally w/o rmeta */
- if (segtype_is_raid(segtype) && !segtype_is_raid0(segtype)) {
- if (dm_snprintf(img_name, len, "%s_rmeta_%u", lv->name, i) < 0)
- return_0;
- } else
- continue;
+ dm_list_add(&lv->segments, &mapseg->list);
- /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
- if (!(sub_lv = lv_create_empty(img_name, NULL,
- LVM_READ | LVM_WRITE,
- lv->alloc, lv->vg)))
- return_0;
+ return 1;
+}
- if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, RAID_META))
- return_0;
+#if 1
+/* Wipe first sector of metadata LV @meta_lv */
+static int _clear_raid_meta_lv(struct logical_volume *meta_lv)
+{
+ uint64_t offset;
+ struct volume_group *vg = meta_lv->vg;
+ struct lv_segment *seg = first_seg(meta_lv);
+ struct physical_volume *pv;
+
+ if (seg->area_count != 1 ||
+ seg_type(seg, 0) != AREA_PV)
+ return 0;
+
+ pv = seg_pv(seg, 0);
+ offset = (pv->pe_start + seg_pe(seg, 0) * vg->extent_size) << 9;
+
+ /*
+ * Rather than wiping meta_lv->size, we can simply
+ * wipe '1' to remove the superblock of any previous
+ * RAID devices. It is much quicker.
+ */
+ log_verbose("Clearing metadata area of %s/%s", vg->name, meta_lv->name);
+ lv_set_hidden(meta_lv);
+ return dev_set(pv->dev, offset, 4096, 0);
+}
+#endif
+
+/* Wipe first sector of metadata LV @meta_lv */
+static int _clear_meta_lv(struct logical_volume *meta_lv)
+{
+ struct volume_group *vg = meta_lv->vg;
+#if 1
+ struct lv_segment *seg = first_seg(meta_lv);
+
+ /* If this is a segment with a single AREA_PV -> avoid wiping overhead */
+ if (seg->area_count == 1 && seg_type(seg, 0) == AREA_PV)
+ return _clear_raid_meta_lv(meta_lv);
+#endif
+
+ /* For clearing, simply activate locally */
+ if (!activate_lv_local(vg->cmd, meta_lv)) {
+ log_error("Failed to activate %s/%s for clearing", vg->name, meta_lv->name);
+ return 0;
}
- dm_list_add(&lv->segments, &mapseg->list);
+ /*
+ * Rather than wiping meta_lv->size, we can simply
+ * wipe '1' to remove the superblock of any previous
+ * RAID devices. It is much quicker.
+ */
+ log_verbose("Clearing metadata area of %s/%s", vg->name, meta_lv->name);
+ if (!wipe_lv(meta_lv, (struct wipe_params) { .do_zero = 1, .zero_sectors = 1 })) {
+ log_error("Failed to zero %s/%s", vg->name, meta_lv->name);
+ return 0;
+ }
+
+ if (!deactivate_lv(vg->cmd, meta_lv)) {
+ log_error("Failed to deactivate %s/%s", vg->name, meta_lv->name);
+ return 0;
+ }
+
+ lv_set_hidden(meta_lv);
return 1;
}
-/* Wipe first sector of all metadata LVs of @lv) */
+#if 0
+/* Wipe first sector of all metadata LVs of @lv */
static int _clear_metadata(struct logical_volume *lv)
{
unsigned s;
struct lv_segment *seg = first_seg(lv);
struct logical_volume *meta_lv;
- struct volume_group *vg = lv->vg;
/* Should be ensured by caller, but.. */
if (!seg->meta_areas)
@@ -3665,55 +4170,190 @@ static int _clear_metadata(struct logical_volume *lv)
continue;
}
- /* For clearing, simply activate locally */
- if (!activate_lv_local(vg->cmd, meta_lv)) {
- log_error("Failed to activate %s/%s for clearing", vg->name, meta_lv->name);
+ if (!_clear_meta_lv(meta_lv))
return 0;
- }
+ }
- log_verbose("Clearing metadata area of %s/%s", vg->name, meta_lv->name);
+ return 1;
+}
+#endif
- /*
- * Rather than wiping meta_lv->size, we can simply
- * wipe '1' to remove the superblock of any previous
- * RAID devices. It is much quicker.
- */
- if (!wipe_lv(meta_lv, (struct wipe_params) { .do_zero = 1, .zero_sectors = 1 })) {
- log_error("Failed to zero %s/%s", vg->name, meta_lv->name);
+/*
+ * Create @sub_lv_countfor @stripes raid metadata images from allocation
+ * handle @ah, commit and clear them passing them back listed on @meta_lvs
+ */
+static int _lv_create_and_clear_metadata_lvs(struct logical_volume *lv,
+ uint32_t extents,
+ uint32_t sub_lv_count,
+ uint32_t stripes,
+ struct alloc_handle *ah,
+ struct dm_list *meta_lvs)
+{
+ uint32_t fa, s;
+ size_t len = strlen(lv->name) + 32;
+ char img_name[len];
+ struct logical_volume *meta_lv;
+ struct segment_type *striped_segtype;
+ struct lv_list *lv_list, *lvl;
+
+ /* Create and wipe metadata devices first if any (raid0 w/o) */
+ if (!(striped_segtype = get_segtype_from_string(lv->vg->cmd, "striped")))
+ return_0;
+
+ if (!(lv_list = dm_pool_alloc(lv->vg->vgmem, sub_lv_count * sizeof(*lv_list)))) {
+ log_error("Failed to allocate space metadata LV list");
+ return 0;
+ }
+
+ for (fa = s = 0; s < sub_lv_count; s++) {
+ if (dm_snprintf(img_name, len, "%s_rmeta_%u", lv->name, s) < 0)
+ return_0;
+
+ if (!(meta_lv = lv_create_empty(img_name, NULL, LVM_READ | LVM_WRITE,
+ lv->alloc, lv->vg)))
+ return_0;
+
+ lv_set_visible(meta_lv);
+
+ if (!lv_add_segment(ah, s + sub_lv_count,
+ 1, 1 /* data_copies */,
+ meta_lv, striped_segtype, 0,
+ meta_lv->status, 0)) {
+ log_error("Failed to extend %s", display_lvname(meta_lv));
return 0;
}
- if (!deactivate_lv(vg->cmd, meta_lv)) {
- log_error("Failed to deactivate %s/%s", vg->name, meta_lv->name);
+ lv_list[s].lv = meta_lv;
+ dm_list_add(meta_lvs, &lv_list[s].list);
+
+ fa += stripes;
+ }
+
+#if 1
+ if (first_seg(lv)) {
+ log_error(INTERNAL_ERROR "Called for %s with segment", display_lvname(lv));
+ return 0;
+ }
+
+ lv->status &= ~(MIRROR|MIRRORED|RAID|RAID_IMAGE|RAID_META|PVMOVE|LOCKED);
+ if (!lv_add_virtual_segment(lv, 0, extents, get_segtype_from_string(lv->vg->cmd, "error")))
+ return_0;
+#else
+ if (!replace_lv_with_error_segment(lv))
+ return_0;
+#endif
+
+#if 1
+ /*
+ * If we have a non-linear metadata LV we need to write+commit the vg metadata,
+ * because _clear_metadata() will activate the respective metadata LV(s).
+ * It can cope with mixtures of linear and non-linear metadata LVs,
+ * thus optimizing qiping of the former.
+ */
+ dm_list_iterate_items(lvl, meta_lvs) {
+ struct lv_segment *seg = first_seg(lvl->lv);
+
+ if (seg->area_count != 1 ||
+ seg_type(seg, 0) != AREA_PV) {
+ /* Write and commit vg with @lv wit one error segment and the sub_lv_count metadata LVs */
+ if (!vg_write(lv->vg) || !vg_commit(lv->vg))
+ return_0;
+
+ break;
+ }
+ }
+#else
+ /* Write and commit vg with @lv wit one error segment and the sub_lv_count metadata LVs */
+ if (!vg_write(lv->vg) || !vg_commit(lv->vg))
+ return_0;
+#endif
+
+ /*
+ * We must clear the metadata areas only upon creation.
+ */
+ dm_list_iterate_items(lvl, meta_lvs)
+ if (!_clear_meta_lv(lvl->lv))
return 0;
+
+ return lv_empty(lv);
+}
+
+/*
+ * The MD bitmap is limited to being able to track 2^21 regions.
+ * The region_size must be adjusted to meet that criteria.
+ *
+ * the "raid0" personality does not utilize a bitmap.
+ */
+static uint64_t max_raid_bitmap_entries = 1 << 21;
+static inline uint64_t _max_lv_size_for_region_size(uint32_t region_size)
+{
+ return max_raid_bitmap_entries * region_size;
+}
+
+static void _adjust_region_size(struct logical_volume *lv,
+ const struct segment_type *segtype,
+ uint64_t lv_size,
+ uint32_t *region_size)
+{
+ /*
+ * The MD bitmap is limited to being able to track 2^21 regions.
+ * The region_size must be adjusted to meet that criteria.
+ */
+ if (segtype_is_raid(segtype) && !segtype_is_any_raid0(segtype)) {
+ int adjusted = 0;
+ uint32_t prev_region_size;
+ uint64_t min_region_size = lv_size / max_raid_bitmap_entries;
+
+ if (!*region_size)
+ *region_size = get_default_region_size(lv->vg->cmd);
+
+ prev_region_size = *region_size;
+
+ /* HM FIXME: make it larger than just to suit the LV size? */
+ while (*region_size < min_region_size) {
+ *region_size *= 2;
+ adjusted = 1;
}
- lv_set_hidden(meta_lv);
+ if (adjusted) {
+ log_print_unless_silent("Adjusting RAID region_size from %s to %s"
+ " to support requested LV size of %s.",
+ display_size(lv->vg->cmd, prev_region_size),
+ display_size(lv->vg->cmd, *region_size),
+ display_size(lv->vg->cmd, lv_size));
+ if (!lv->size) {
+ log_print_unless_silent("If you want to grow your LV past the"
+ " possible maximum of %s later,",
+ display_size(lv->vg->cmd, _max_lv_size_for_region_size(*region_size)));
+ log_print_unless_silent("please request an even larger region size (lvcreate -R ...)");
+ }
+ }
}
-
- return 1;
}
static int _lv_extend_layered_lv(struct alloc_handle *ah,
struct logical_volume *lv,
uint32_t extents, uint32_t first_area,
- uint32_t stripes, uint32_t stripe_size)
+ uint32_t mirrors,
+ uint32_t stripes, uint32_t stripe_size,
+ struct dm_list *meta_lvs)
{
- const struct segment_type *segtype;
+ uint32_t fa, s;
+ uint32_t old_extents = lv->le_count;
+ const struct segment_type *striped_segtype;
struct logical_volume *sub_lv, *meta_lv;
struct lv_segment *seg = first_seg(lv);
- uint32_t fa, s;
- int clear_metadata = lv->le_count ? 0 : 1;
- if (!(segtype = get_segtype_from_string(lv->vg->cmd, "striped")))
+ if (!(striped_segtype = get_segtype_from_string(lv->vg->cmd, "striped")))
return_0;
-PFLA("extents=%u", extents);
+
+PFLA("lv=%s extents=%u mirrors=%u stripes=%u", display_lvname(lv), extents, mirrors, stripes);
/*
* The component devices of a "striped" LV all go in the same
* LV. However, RAID has an LV for each device - making the
- * 'stripes' and 'stripe_size' parameters meaningless.
+ * 'stripes' and 'stripe_size' parameters meaningless unless raid01.
*/
- if (seg_is_raid(seg)) {
+ if (seg_is_raid(seg) && !seg_is_raid01(seg)) {
stripes = 1;
stripe_size = 0;
}
@@ -3721,88 +4361,60 @@ PFLA("extents=%u", extents);
for (fa = first_area, s = 0; s < seg->area_count; s++) {
if (is_temporary_mirror_layer(seg_lv(seg, s))) {
if (!_lv_extend_layered_lv(ah, seg_lv(seg, s), extents,
- fa, stripes, stripe_size))
+ fa, mirrors, stripes, stripe_size, meta_lvs))
return_0;
fa += lv_mirror_count(seg_lv(seg, s));
continue;
}
sub_lv = seg_lv(seg, s);
-PFLA("extending %s in %s, stripes=%u", sub_lv->name, lv->name, stripes);
- if (!lv_add_segment(ah, fa, stripes, sub_lv, segtype,
+
+PFLA("extending %s in %s, stripes=%u", display_lvname(sub_lv), lv->name, stripes);
+
+ if (!lv_add_segment(ah, fa, stripes, 1 /* data_copies */,
+ sub_lv, striped_segtype,
stripe_size, sub_lv->status, 0)) {
log_error("Aborting. Failed to extend %s in %s.",
sub_lv->name, lv->name);
return 0;
}
- if (seg_is_raid(seg) &&
- seg->meta_areas &&
- ah->log_len &&
- (meta_lv = seg_metalv(seg, s))) {
- if (!lv_add_segment(ah, fa + seg->area_count, 1,
- meta_lv, segtype, 0,
- meta_lv->status, 0)) {
- log_error("Failed to extend %s in %s.",
- meta_lv->name, lv->name);
- return 0;
- }
+PFLA("last_seg(seg_lv(seg, s))->len=%u last_seg(seg_lv(seg, s))->area_len=%u", last_seg(seg_lv(seg, s))->len, last_seg(seg_lv(seg, s))->area_len);
- if (clear_metadata)
- lv_set_visible(meta_lv);
+ /* Add any pre-allocated and pre-wiped metadata LVs */
+ if (!dm_list_empty(meta_lvs)) {
+ struct lv_list *lvl = (struct lv_list *) dm_list_first(meta_lvs);
+
+ dm_list_del(&lvl->list);
+ meta_lv = lvl->lv;
+
+ if (!set_lv_segment_area_lv(seg, s, meta_lv, 0, RAID_META))
+ return_0;
}
fa += stripes;
}
-
- /* Top-level area_len is equal to len */
- seg = first_seg(lv);
- seg->area_len += extents;
+#if 1
seg->len += extents;
-PFLA("lv->le_count=%u", lv->le_count);
+ seg->area_len = _seg_area_len(seg, seg->len);
+#endif
+PFLA("segtye=%s lv->le_count=%u seg->len=%u seg->area_len=%u", lvseg_name(seg), lv->le_count, seg->len, seg->area_len);
lv->le_count += extents;
- lv->size += (uint64_t) extents * lv->vg->extent_size;
+ lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
PFLA("lv->le_count=%u", lv->le_count);
- if (!vg_write(lv->vg) || !vg_commit(lv->vg))
- return_0;
-
/*
- * We must clear the metadata areas only upon creation.
+ * The extended image LVs have to be split in #data_copies
+ * and newly allocated @extents/@data_copies extents have
+ * to be added to the splits to suit the raid10_far layout
+ * expecting growth of each stripe zone
*/
- if (clear_metadata &&
- seg->meta_areas &&
- !_clear_metadata(lv))
+ if (old_extents &&
+ seg_is_raid10_far(seg) &&
+ !lv_raid10_far_reorder_segments(lv, extents , 1 /* extend */))
return 0;
- /* Top-level area_len is equal to len */
- // seg->area_len += extents;
- // seg->len += extents;
- // lv->le_count += extents;
- // lv->size += (uint64_t) extents * lv->vg->extent_size;
-
- /*
- * The MD bitmap is limited to being able to track 2^21 regions.
- * The region_size must be adjusted to meet that criteria.
- *
- * the "raid0" personality does not utilize a bitmap.
- */
- if (seg_is_striped_raid(seg)) {
- int adjusted = 0;
-
- /* HM FIXME: make it larger than just to suit the LV size */
- while (seg->region_size < (lv->size / (1 << 21))) {
- seg->region_size *= 2;
- adjusted = 1;
- }
-
- if (adjusted)
- log_very_verbose("Adjusting RAID region_size from %uS to %uS"
- " to support large LV size",
- seg->region_size/2, seg->region_size);
- }
-
return 1;
}
@@ -3825,53 +4437,148 @@ int lv_extend(struct logical_volume *lv,
{
int r = 1;
int log_count = 0;
+ int alloc_mirrors = 1;
+ uint32_t area_count, s;
struct alloc_handle *ah;
uint32_t sub_lv_count;
uint32_t old_extents;
+ uint32_t prev_region_size;
+ uint64_t lv_size;
+ struct lv_segment *seg = last_seg(lv);
- log_very_verbose("Adding segment of type %s to LV %s.", segtype->name, lv->name);
+ if (!lv_raid_in_sync(lv)) {
+ log_error("RAID LV %s has to be in-sync to extend its size!", display_lvname(lv));
+ return 0;
+ }
+
+ if (segtype_is_mirrored(segtype) || segtype_is_raid1(segtype))
+ stripes = 1;
+
+ log_very_verbose("Adding segment of type %s to LV %s.", segtype->name, display_lvname(lv));
+PFLA("extents=%u", extents);
+#if 1
+ /* Check for multi-level stack (e.g. extension of a duplicated LV stack) */
+ if (seg) {
+ int extended = 0;
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (_is_layered_lv(lv, s)) {
+ struct logical_volume *lv1 = seg_lv(seg, s);
+ struct lv_segment *seg1 = last_seg(lv1);
+
+PFLA("recursive seg_lv(seg, %u)=%s extents=%u", s, display_lvname(lv1), extents);
+ if (extents + lv->le_count > lv1->le_count &&
+ !lv_extend(lv1, seg1->segtype, seg1->area_count, seg1->stripe_size, seg1->data_copies, seg1->region_size, extents, allocatable_pvs, alloc, approx_alloc))
+ return_0;
+
+ extended++;
+ }
+ }
+
+ if (extended) {
+ seg->len = _seg_smallest_sub_lv(seg); // + seg->reshape_len;
+ seg->area_len = _seg_area_len(seg, seg->len); // + seg->reshape_len;
+ // seg->len += seg->reshape_len;
+ lv->le_count = seg->len;
+ lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
+ return 1;
+ }
+#endif
+ /* Caller should ensure... */
+ if (seg_is_striped(seg) ||
+ seg_is_striped_raid(seg)) {
+ area_count = seg->area_count;
+ stripes = area_count - seg->segtype->parity_devs;
+ mirrors = seg->data_copies;
+
+ } else {
+ area_count = seg->data_copies;
+ stripes = 1;
+ mirrors = seg->data_copies;
+ }
+
+ } else
+ area_count = max(stripes + segtype->parity_devs, mirrors);
+
+PFLA("area_count=%u mirrors=%u stripes=%u", area_count, mirrors, stripes);
if (segtype_is_virtual(segtype))
return lv_add_virtual_segment(lv, 0u, extents, segtype);
-PFLA("extents=%u", extents);
- if (segtype_is_pool(segtype) && !lv->le_count) {
+PFLA("extents=%u stripe_size=%u", extents, stripe_size);
+ if (!lv->le_count && segtype_is_pool(segtype)) {
/*
* Thinpool and cache_pool allocations treat the metadata
* device like a mirror log.
*/
/* FIXME Support striped metadata pool */
log_count = 1;
- // } else if (segtype_is_striped(segtype) || (segtype_is_raid(segtype) && !segtype_is_raid1(segtype))) {
- } else if (segtype_is_striped(segtype) || segtype_is_raid(segtype)) {
- extents = _round_to_stripe_boundary(lv, extents, stripes, 1);
-
-PFL();
- /* Make sure metadata LVs are being extended as well */
- if (!(segtype_is_striped(segtype) || segtype_is_raid0(segtype)))
- log_count = (mirrors ?: 1) * stripes + segtype->parity_devs;
+ } else if (segtype_is_striped_raid(segtype)) {
+ /* If only one extent is being requested on a striped raid set, it's reshape space being requested */
+ alloc_mirrors = extents > 1 ? 1 : 0;
+ extents = _round_to_stripe_boundary(lv, extents, stripes, 1 /* extend */);
}
-PFLA("extents=%u mirrors=%u stripes=%u log_count=%u", extents, mirrors, stripes, log_count);
+PFLA("extents=%u segtype=%s mirrors=%u stripes=%u log_count=%u", extents, segtype->name, mirrors, stripes, log_count);
+
+ /* Special creation case for raid01 (mirror(s) on top of raid0 stripes */
+ if (!lv->le_count && segtype_is_raid01(segtype))
+ return lv_create_raid01(lv, segtype, mirrors, stripes, stripe_size,
+ region_size, extents, allocatable_pvs);
#if 1
/* FIXME log_count should be 1 for mirrors */
if (segtype_is_mirror(segtype))
log_count = 1;
+ else if (segtype_is_raid(segtype) && !segtype_is_raid0(segtype))
+ log_count = (segtype_is_striped_raid(segtype) ? stripes : mirrors) + segtype->parity_devs;
#endif
- if (!(ah = allocate_extents(lv->vg, lv, segtype, stripes, mirrors,
+PFLA("extents=%u mirrors=%u stripes=%u log_count=%u", extents, mirrors, stripes, log_count);
+
+ lv_size = lv->size + (uint64_t) extents * lv->vg->extent_size;
+
+ prev_region_size = region_size;
+ _adjust_region_size(lv, segtype, lv_size, &region_size);
+
+ if (lv->le_count && (region_size != prev_region_size)) {
+ log_error("Can't extend LV %s past maximum of %s; maximum"
+ " of raid bitmap entries exceeded for region size %s",
+ display_lvname(lv),
+ display_size(lv->vg->cmd, _max_lv_size_for_region_size(prev_region_size)),
+ display_size(lv->vg->cmd, region_size));
+ return 0;
+ }
+
+ if (!(ah = allocate_extents(lv->vg, lv, segtype, stripes, alloc_mirrors ? mirrors : 1,
log_count, region_size, extents,
allocatable_pvs, alloc, approx_alloc, NULL)))
return_0;
+ // extents = ah->new_extents - seg->len;
+
+{
+struct alloced_area *aa;
+
+for (s = 0; s < ah->area_count + log_count; s++) {
+dm_list_iterate_items(aa, ah->alloced_areas + s)
+{
+PFLA("%u aa->len=%u", s >= ah->log_area_count ? s - ah->log_area_count : s, aa->len);
+}
+}
+}
+
if (segtype_is_pool(segtype)) {
if (!(r = create_pool(lv, segtype, ah, stripes, stripe_size)))
stack;
} else if (!segtype_is_mirrored(segtype) && !segtype_is_raid(segtype)) {
- if (!(r = lv_add_segment(ah, 0, ah->area_count, lv, segtype,
- stripe_size, 0u, 0)))
+ if (!(r = lv_add_segment(ah, 0, ah->area_count, 1 /* data_copies */,
+ lv, segtype, stripe_size, 0u, 0)))
stack;
} else {
+ struct dm_list meta_lvs;
+
+ dm_list_init(&meta_lvs);
+
/*
* For RAID, all the devices are AREA_LV.
* However, for 'mirror on stripe' using non-RAID targets,
@@ -3879,31 +4586,47 @@ PFLA("extents=%u mirrors=%u stripes=%u log_count=%u", extents, mirrors, stripes,
* are AREA_PV.
*/
if (segtype_is_raid(segtype))
- sub_lv_count = mirrors * stripes + segtype->parity_devs;
+ sub_lv_count = (stripes < 2 ? mirrors : stripes) + segtype->parity_devs;
else
sub_lv_count = mirrors;
old_extents = lv->le_count;
- if (!lv->le_count &&
- !(r = _lv_insert_empty_sublvs(lv, segtype, stripe_size,
- region_size, sub_lv_count))) {
- log_error("Failed to insert layer for %s", lv->name);
- goto out;
- }
+ /*
+ * Create and wipe metadata devices first if any (raid0 w/o)
+ * in order to commit consistent VG metadata doing so
+ */
+ if (!old_extents) {
+ if (segtype_is_raid(segtype) &&
+ !segtype_is_raid0(segtype) &&
+ !_lv_create_and_clear_metadata_lvs(lv, extents, sub_lv_count, stripes, ah, &meta_lvs))
+ return 0;
+
+ if (!(r = _lv_insert_empty_data_sublvs(lv, segtype, stripe_size,
+ region_size, sub_lv_count))) {
+ log_error("Failed to insert layer for %s", lv->name);
+ goto out;
+ }
-PFLA("extents=%u ah->new_extents=%u lv->le_count=%u", extents, ah->new_extents, lv->le_count);
- if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
- stripes, stripe_size)))
+ first_seg(lv)->data_copies = mirrors;
+ }
+PFLA("extents=%u ah->new_extents=%u lv->le_count=%u stripes=%u sub_lv_count=%u", extents, ah->new_extents, lv->le_count, stripes, sub_lv_count);
+ if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0, mirrors, stripes,
+ stripe_size, &meta_lvs)))
goto_out;
/*
- * If we are expanding an existing mirror, we can skip the
+ * If we are expanding an existing mirror/raid1, we can skip the
* resync of the extension if the LV is currently in-sync
* and the LV has the LV_NOTSYNCED flag set.
*/
if (old_extents &&
+#if 1
+
+ (segtype_is_mirror(segtype) || segtype_is_raid1(segtype)) &&
+#else
segtype_is_mirrored(segtype) &&
+#endif
(lv->status & LV_NOTSYNCED)) {
dm_percent_t sync_percent = DM_PERCENT_INVALID;
@@ -3988,13 +4711,14 @@ static int _rename_sub_lv(struct logical_volume *lv,
* The suffix follows lv_name_old and includes '_'.
*/
len = strlen(lv_name_old);
+PFLA("lv=%s lv_name_old=%s lv_name_new=%s len=%u", display_lvname(lv), lv_name_old, lv_name_new, len);
if (strncmp(lv->name, lv_name_old, len) || lv->name[len] != '_') {
log_error("Cannot rename \"%s\": name format not recognized "
"for internal LV \"%s\"",
lv_name_old, lv->name);
return 0;
}
- suffix = (char *) lv->name + len;
+ suffix = lv->name + len;
/*
* Compose a new name for sub lv:
@@ -4035,9 +4759,9 @@ static int _rename_cb(struct logical_volume *lv, void *data)
* Loop down sub LVs and call fn for each.
* fn is responsible to log necessary information on failure.
*/
-int for_each_sub_lv(struct logical_volume *lv,
- int (*fn)(struct logical_volume *lv, void *data),
- void *data)
+static int _for_each_sub_lv(struct logical_volume *lv, int skip_pools,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data)
{
struct logical_volume *org;
struct lv_segment *seg;
@@ -4052,20 +4776,33 @@ int for_each_sub_lv(struct logical_volume *lv,
dm_list_iterate_items(seg, &lv->segments) {
if (seg->log_lv) {
+PFL();
if (!fn(seg->log_lv, data))
return_0;
if (!for_each_sub_lv(seg->log_lv, fn, data))
return_0;
}
- if (seg->metadata_lv) {
- if (!fn(seg->metadata_lv, data))
- return_0;
- if (!for_each_sub_lv(seg->metadata_lv, fn, data))
- return_0;
+ if (!seg_is_thin(seg) && !seg_is_raid(seg)) {
+ if (seg->metadata_lv) {
+PFL();
+ if (!fn(seg->metadata_lv, data))
+ return_0;
+ if (!for_each_sub_lv(seg->metadata_lv, fn, data))
+ return_0;
+ }
+
+ if (seg->pool_lv && !skip_pools) {
+PFL();
+ if (!fn(seg->pool_lv, data))
+ return_0;
+ if (!for_each_sub_lv(seg->pool_lv, fn, data))
+ return_0;
+ }
}
for (s = 0; s < seg->area_count; s++) {
+PFL();
if (seg_type(seg, s) != AREA_LV)
continue;
if (!fn(seg_lv(seg, s), data))
@@ -4073,12 +4810,14 @@ int for_each_sub_lv(struct logical_volume *lv,
if (!for_each_sub_lv(seg_lv(seg, s), fn, data))
return_0;
}
-
+PFL();
if (!seg_is_raid(seg) || !seg->meta_areas)
continue;
+PFL();
/* RAID has meta_areas */
for (s = 0; s < seg->area_count; s++) {
+PFL();
if (seg_metatype(seg, s) != AREA_LV)
continue;
if (!fn(seg_metalv(seg, s), data))
@@ -4087,10 +4826,25 @@ int for_each_sub_lv(struct logical_volume *lv,
return_0;
}
}
+PFL();
return 1;
}
+int for_each_sub_lv(struct logical_volume *lv,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data)
+{
+ return _for_each_sub_lv(lv, 0, fn, data);
+}
+
+int for_each_sub_lv_except_pools(struct logical_volume *lv,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data)
+{
+ return _for_each_sub_lv(lv, 1, fn, data);
+}
+
/*
* Core of LV renaming routine.
* VG must be locked by caller.
@@ -4118,6 +4872,18 @@ int lv_rename_update(struct cmd_context *cmd, struct logical_volume *lv,
return 0;
}
+ /*
+ * The lvmlockd LV lock is only acquired here to ensure the LV is not
+ * active on another host. This requests a transient LV lock.
+ * If the LV is active, a persistent LV lock already exists in
+ * lvmlockd, and the transient lock request does nothing.
+ * If the LV is not active, then no LV lock exists and the transient
+ * lock request acquires the LV lock (or fails). The transient lock
+ * is automatically released when the command exits.
+ */
+ if (!lockd_lv(cmd, lv, "ex", 0))
+ return_0;
+
if (update_mda && !archive(vg))
return_0;
@@ -4127,7 +4893,7 @@ int lv_rename_update(struct cmd_context *cmd, struct logical_volume *lv,
}
/* rename sub LVs */
- if (!for_each_sub_lv(lv, _rename_cb, (void *) &lv_names))
+ if (!for_each_sub_lv_except_pools(lv, _rename_cb, (void *) &lv_names))
return_0;
/* rename main LV */
@@ -4277,7 +5043,7 @@ static int _fsadm_cmd(struct cmd_context *cmd,
argv[i++] = lv_path;
if (fcmd == FSADM_CMD_RESIZE) {
- if (dm_snprintf(size_buf, sizeof(size_buf), "%" PRIu64 "K",
+ if (dm_snprintf(size_buf, sizeof(size_buf), FMTu64 "K",
(uint64_t) lp->extents * (vg->extent_size / 2)) < 0) {
log_error("Couldn't generate new LV size string");
return 0;
@@ -4475,7 +5241,7 @@ static int _lvresize_check_lv(struct cmd_context *cmd, struct logical_volume *lv
return 0;
}
- if (lv_is_raid_image(lv) || lv_is_raid_metadata(lv)) {
+ if (lv_is_raid_metadata(lv) || lv_is_raid_image(lv)) {
log_error("Cannot resize a RAID %s directly",
(lv->status & RAID_IMAGE) ? "image" :
"metadata area");
@@ -4510,7 +5276,9 @@ static int _lvresize_check_lv(struct cmd_context *cmd, struct logical_volume *lv
return 0;
}
- if (!lv_is_visible(lv) && !lv_is_thin_pool_metadata(lv)) {
+ /* FIXME: use a status flag instead of the name "lvmlock". */
+
+ if (!lv_is_visible(lv) && !lv_is_thin_pool_metadata(lv) && strcmp(lv->name, "lvmlock")) {
log_error("Can't resize internal logical volume %s", lv->name);
return 0;
}
@@ -4666,14 +5434,14 @@ static int _lvresize_adjust_extents(struct cmd_context *cmd, struct logical_volu
uint32_t physical_extents_used = 0;
uint32_t seg_stripes = 0, seg_stripesize = 0;
uint32_t seg_mirrors = 0;
- struct lv_segment *seg, *mirr_seg;
+ struct lv_segment *seg = first_seg(lv), *mirr_seg;
uint32_t sz, str;
uint32_t seg_logical_extents;
uint32_t seg_physical_extents;
uint32_t area_multiple;
uint32_t stripesize_extents;
uint32_t size_rest;
- uint32_t existing_logical_extents = lv->le_count - first_seg(lv)->reshape_len;
+ uint32_t existing_logical_extents = lv->le_count; // - seg->reshape_len * (seg->area_count - seg->segtype->parity_devs);
uint32_t existing_physical_extents, saved_existing_physical_extents;
uint32_t seg_size = 0;
uint32_t new_extents;
@@ -4712,7 +5480,7 @@ static int _lvresize_adjust_extents(struct cmd_context *cmd, struct logical_volu
/* If extending, find properties of last segment */
if (!reducing) {
mirr_seg = last_seg(lv);
- seg_mirrors = seg_is_mirrored(mirr_seg) ? lv_mirror_count(mirr_seg->lv) : 0;
+ seg_mirrors = mirr_seg->data_copies;
if (!lp->ac_mirrors && seg_mirrors) {
log_print_unless_silent("Extending %" PRIu32 " mirror images.", seg_mirrors);
@@ -4730,21 +5498,26 @@ static int _lvresize_adjust_extents(struct cmd_context *cmd, struct logical_volu
lp->stripe_size = mirr_seg->stripe_size;
} else if (!strcmp(mirr_seg->segtype->name, _lv_type_names[LV_TYPE_RAID10])) {
/* FIXME Warn if command line values are being overridden? */
- lp->stripes = mirr_seg->area_count / seg_mirrors;
+ lp->stripes = mirr_seg->area_count;
lp->stripe_size = mirr_seg->stripe_size;
+ } else if (!strcmp(mirr_seg->segtype->name, _lv_type_names[LV_TYPE_RAID01])) {
+ /* FIXME Warn if command line values are being overridden? */
+ lp->stripes = first_seg(seg_lv(mirr_seg, 0))->area_count;
+ lp->stripe_size = first_seg(seg_lv(mirr_seg, 0))->stripe_size;
} else if (!(lp->stripes == 1 || (lp->stripes > 1 && lp->stripe_size))) {
/* If extending, find stripes, stripesize & size of last segment */
/* FIXME Don't assume mirror seg will always be AREA_LV */
/* FIXME We will need to support resize for metadata LV as well,
* and data LV could be any type (i.e. mirror)) */
- dm_list_iterate_items(seg, seg_mirrors ? &seg_lv(mirr_seg, 0)->segments : &lv->segments) {
+ dm_list_iterate_items(seg, seg_mirrors > 1 ? &seg_lv(mirr_seg, 0)->segments : &lv->segments) {
/* Allow through "striped" and RAID 0/10/4/5/6 */
+PFLA("seg->segtype=%s", seg->segtype->name);
if (!seg_is_striped(seg) &&
(!seg_is_raid(seg) || seg_is_mirrored(seg)) &&
strcmp(seg->segtype->name, _lv_type_names[LV_TYPE_RAID0]) &&
strcmp(seg->segtype->name, _lv_type_names[LV_TYPE_RAID10]))
continue;
-
+PFL();
sz = seg->stripe_size;
str = seg->area_count - lp->segtype->parity_devs;
@@ -4829,11 +5602,14 @@ static int _lvresize_adjust_extents(struct cmd_context *cmd, struct logical_volu
/* Check for underlying stripe sizes */
seg_stripes = lvseg_get_stripes(seg, &seg_stripesize);
+#if 0
if (seg_is_mirrored(seg))
seg_mirrors = lv_mirror_count(seg->lv);
else
seg_mirrors = 0;
-
+#else
+ seg_mirrors = seg->data_copies;
+#endif
/* Have we reached the final segment of the new LV? */
if (lp->extents_are_pes) {
if (new_extents <= physical_extents_used + seg_physical_extents) {
@@ -4911,8 +5687,20 @@ static int _lvresize_adjust_extents(struct cmd_context *cmd, struct logical_volu
lp->resize = LV_EXTEND; /* lets pretend zero size extension */
}
+#if 1
+ /* HM FIXME: sufficient for RAID? */
+ if (seg_is_striped_raid(seg)) {
+ unsigned stripes = seg->area_count - seg->segtype->parity_devs;
+
+ lp->extents = _round_to_stripe_boundary(lv, lp->extents, stripes,
+ lp->extents < existing_physical_extents);
+ lp->extents = raid_total_extents(seg->segtype, lp->extents, stripes,
+ seg->data_copies) / seg->data_copies;
+} else
+#endif
/* Perform any rounding to produce complete stripes. */
if (lp->stripes > 1) {
+PFLA("lp->stripes=%u", lp->stripes);
if (lp->stripe_size < STRIPE_SIZE_MIN) {
log_error("Invalid stripe size %s",
display_size(cmd, (uint64_t) lp->stripe_size));
@@ -5008,18 +5796,13 @@ static int _lvresize_check_type(struct cmd_context *cmd, const struct logical_vo
}
}
+ if ((lp->resize == LV_REDUCE) && lv_is_thin_pool_metadata(lv)) {
+ log_error("Thin pool metadata volumes cannot be reduced.");
+ return 0;
+ }
+
if (lv_is_thin_volume(lv) && first_seg(lv)->external_lv &&
(lp->resize == LV_EXTEND)) {
- /*
- * TODO: currently we do not support extension of already reduced thin volume.
- * But it might be possible to create combined mapping of some part of
- * the external origin followed by zero target.
- */
- if (first_seg(lv)->external_lv->size > lv->size) {
- log_error("Extension of reduced thin volume with external origin is unsupported.");
- return 0;
- }
-
/* Validate thin target supports bigger size of thin volume then external origin */
if (first_seg(lv)->external_lv->size <= lv->size &&
!thin_pool_feature_supported(first_seg(lv)->pool_lv, THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND)) {
@@ -5090,16 +5873,19 @@ static struct logical_volume *_lvresize_volume(struct cmd_context *cmd,
display_lvname(lv), lp->approx_alloc ? "up to " : "",
display_size(cmd, (uint64_t) lp->extents * vg->extent_size));
+PFLA("lp->extents=%u, lv->le_count=%u", lp->extents, lv->le_count);
if (lp->resize == LV_REDUCE) {
- if (!lv_reduce(lv, lv->le_count - first_seg(lv)->reshape_len - lp->extents))
+ if (!lv_reduce(lv, lv->le_count - lp->extents))
return_NULL;
- } else if ((lp->extents > lv->le_count - first_seg(lv)->reshape_len) && /* Ensure we extend */
+ } else if (lp->extents > lv->le_count && /* Ensure we extend */
!lv_extend(lv, lp->segtype,
lp->stripes, lp->stripe_size,
lp->mirrors, first_seg(lv)->region_size,
- lp->extents - (lv->le_count - first_seg(lv)->reshape_len),
+ lp->extents - lv->le_count,
pvh, alloc, lp->approx_alloc))
return_NULL;
+ else if (!pool_check_overprovisioning(lv))
+ return_NULL;
if (old_extents == lv->le_count)
log_print_unless_silent("Size of logical volume %s unchanged from %s (%" PRIu32 " extents).",
@@ -5174,6 +5960,13 @@ int lv_resize(struct cmd_context *cmd, struct logical_volume *lv,
return 0;
}
+ /*
+ * If the LV is locked from activation, this lock call is a no-op.
+ * Otherwise, this acquires a transient lock on the lv (not PERSISTENT).
+ */
+ if (!lockd_lv(cmd, lv, "ex", 0))
+ return_0;
+
if (lp->sizeargs &&
!(lock_lv = _lvresize_volume(cmd, lv, lp, pvh)))
return_0;
@@ -5200,7 +5993,7 @@ int lv_resize(struct cmd_context *cmd, struct logical_volume *lv,
*/
inactive = 1;
if (!activate_lv_excl(cmd, lock_lv)) {
- log_error("Failed to activate %s.", lock_lv->name);
+ log_error("Failed to activate %s.", display_lvname(lock_lv));
return 0;
}
}
@@ -5221,12 +6014,12 @@ int lv_resize(struct cmd_context *cmd, struct logical_volume *lv,
backup(vg);
if (inactive && !deactivate_lv(cmd, lock_lv)) {
- log_error("Problem deactivating %s.", lock_lv->name);
+ log_error("Problem deactivating %s.", display_lvname(lock_lv));
return 0;
}
}
- log_print_unless_silent("Logical volume %s successfully resized", lp->lv_name);
+ log_print_unless_silent("Logical volume %s successfully resized.", lp->lv_name);
if (lp->resizefs && (lp->resize == LV_EXTEND) &&
!_fsadm_cmd(cmd, vg, lp, FSADM_CMD_RESIZE, NULL))
@@ -5436,6 +6229,7 @@ PFLA("current_le=%u seg->le=%u seg->len=%u seg->area_len=%u", current_le, seg->l
lv->name, current_le);
return 0;
}
+PFL();
/* Find next segment end */
/* FIXME Unnecessary nesting! */
@@ -5445,6 +6239,7 @@ PFLA("current_le=%u seg->le=%u seg->len=%u seg->area_len=%u", current_le, seg->l
use_pvmove_parent_lv ? seg->pvmove_source_seg : NULL,
&spvs->len,
0, 0, -1, 0, _add_pvs, (void *) spvs)) {
+PFL();
return_NULL;
}
@@ -5486,6 +6281,7 @@ int link_lv_to_vg(struct volume_group *vg, struct logical_volume *lv)
lvl->lv = lv;
lv->vg = vg;
dm_list_add(&vg->lvs, &lvl->list);
+ lv->status &= ~LV_REMOVED;
return 1;
}
@@ -5497,7 +6293,8 @@ int unlink_lv_from_vg(struct logical_volume *lv)
if (!(lvl = find_lv_in_vg(lv->vg, lv->name)))
return_0;
- dm_list_del(&lvl->list);
+ dm_list_move(&lv->vg->removed_lvs, &lvl->list);
+ lv->status |= LV_REMOVED;
return 1;
}
@@ -5530,6 +6327,7 @@ int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
int format1_reload_required = 0;
int visible;
struct logical_volume *pool_lv = NULL;
+ struct logical_volume *lock_lv = lv;
struct lv_segment *cache_seg = NULL;
int ask_discard;
struct lv_list *lvl;
@@ -5576,14 +6374,19 @@ int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
log_error("Can't remove logical volume %s used by a pool.",
lv->name);
return 0;
- } else if (lv_is_thin_volume(lv))
+ } else if (lv_is_thin_volume(lv)) {
pool_lv = first_seg(lv)->pool_lv;
+ lock_lv = pool_lv;
+ }
if (lv_is_locked(lv)) {
log_error("Can't remove locked LV %s", lv->name);
return 0;
}
+ if (!lockd_lv(cmd, lock_lv, "ex", LDLV_PERSISTENT))
+ return_0;
+
/* FIXME Ensure not referred to by another existing LVs */
ask_discard = find_config_tree_bool(cmd, devices_issue_discards_CFG, NULL);
@@ -5758,6 +6561,9 @@ int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
backup(vg);
+ lockd_lv(cmd, lock_lv, "un", LDLV_PERSISTENT);
+ lockd_free_lv(cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
+
if (!suppress_remove_message && visible)
log_print_unless_silent("Logical volume \"%s\" successfully removed", lv->name);
@@ -5967,14 +6773,10 @@ static int _split_parent_area(struct lv_segment *seg, uint32_t s,
struct dm_list *layer_seg_pvs)
{
uint32_t parent_area_len, parent_le, layer_le;
- uint32_t area_multiple;
+ uint32_t area_multiple = (seg_is_striped(seg) || seg_is_striped_raid(seg)) ?
+ seg->area_count - seg->segtype->parity_devs : 1;
struct seg_pvs *spvs;
- if (seg_is_striped(seg))
- area_multiple = seg->area_count;
- else
- area_multiple = 1;
-
parent_area_len = seg->area_len;
parent_le = seg->le;
layer_le = seg_le(seg, s);
@@ -6091,7 +6893,7 @@ int remove_layers_for_segments(struct cmd_context *cmd,
log_error("Layer boundary mismatch: "
"%s:%" PRIu32 "-%" PRIu32 " on "
"%s:%" PRIu32 " / "
- "%" PRIu32 "-%" PRIu32 " / ",
+ FMTu32 "-" FMTu32 " / ",
lv->name, seg->le, seg->area_len,
layer_lv->name, seg_le(seg, s),
lseg->le, lseg->area_len);
@@ -6187,7 +6989,7 @@ int move_lv_segments(struct logical_volume *lv_to,
int remove_layer_from_lv(struct logical_volume *lv,
struct logical_volume *layer_lv)
{
- static const char _suffixes[][8] = { "_tdata", "_cdata", "_corig" };
+ static const char _suffixes[][8] = { "_tdata", "_cdata", "_corig", "_dup_" };
struct logical_volume *parent_lv;
struct lv_segment *parent_seg;
struct segment_type *segtype;
@@ -6209,11 +7011,11 @@ PFL();
return 0;
}
PFL();
-
/*
* Before removal, the layer should be cleaned up,
* i.e. additional segments and areas should have been removed.
*/
+PFLA("lv=%s parent_lv=%s layer_lv=%s segments=%u area_count=%u layer_lv!=%u parent_lv->le_count=%u layer_lv->le_count=%u", display_lvname(lv), display_lvname(parent_lv), layer_lv ? display_lvname(layer_lv) : NULL, dm_list_size(&parent_lv->segments), parent_seg->area_count, layer_lv != seg_lv(parent_seg, 0), parent_lv->le_count, layer_lv->le_count)
if (dm_list_size(&parent_lv->segments) != 1 ||
parent_seg->area_count != 1 ||
seg_type(parent_seg, 0) != AREA_LV ||
@@ -6221,19 +7023,19 @@ PFL();
parent_lv->le_count != layer_lv->le_count)
return_0;
PFL();
-
if (!lv_empty(parent_lv))
return_0;
-
+PFL();
if (!move_lv_segments(parent_lv, layer_lv, 0, 0))
return_0;
-
+PFL();
/* Replace the empty layer with error segment */
if (!(segtype = get_segtype_from_string(lv->vg->cmd, "error")))
return_0;
+PFL();
if (!lv_add_virtual_segment(layer_lv, 0, parent_lv->le_count, segtype))
return_0;
-
+PFLA("parent_lv=%s layer_lv=%s", display_lvname(lv), display_lvname(parent_lv));
/*
* recuresively rename sub LVs
* currently supported only for thin data layer
@@ -6241,14 +7043,13 @@ PFL();
*/
if (!strstr(layer_lv->name, "_mimage"))
for (r = 0; r < DM_ARRAY_SIZE(_suffixes); ++r)
- if (strstr(layer_lv->name, _suffixes[r]) == 0) {
+ if (strstr(layer_lv->name, _suffixes[r])) {
lv_names.old = layer_lv->name;
lv_names.new = parent_lv->name;
if (!for_each_sub_lv(parent_lv, _rename_cb, (void *) &lv_names))
return_0;
break;
}
-
PFLA("%s", layer_lv->name);
return 1;
}
@@ -6264,8 +7065,9 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
uint64_t status,
const char *layer_suffix)
{
- static const char _suffixes[][8] = { "_tdata", "_cdata", "_corig" };
+ static const char _suffixes[][8] = { "_tdata", "_cdata", "_corig", "_dup_" };
int r;
+ uint32_t le_count = lv_where->le_count;
char name[NAME_LEN];
struct dm_str_list *sl;
struct logical_volume *layer_lv;
@@ -6291,12 +7093,14 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
if (lv_is_active_exclusive_locally(lv_where))
exclusive = 1;
- if (lv_is_active(lv_where) && strstr(name, "_mimagetmp")) {
+PFLA("lv_is_active()=%d", lv_is_active(lv_where));
+ if (lv_is_active(lv_where) &&
+ (strstr(name, "_mimagetmp"))) {
log_very_verbose("Creating transient LV %s for mirror conversion in VG %s.", name, lv_where->vg->name);
segtype = get_segtype_from_string(cmd, "error");
- if (!lv_add_virtual_segment(layer_lv, 0, lv_where->le_count, segtype)) {
+ if (!lv_add_virtual_segment(layer_lv, 0, le_count, segtype)) {
log_error("Creation of transient LV %s for mirror conversion in VG %s failed.", name, lv_where->vg->name);
return NULL;
}
@@ -6308,12 +7112,13 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
" transient mirror layer.");
return NULL;
}
-
+PFLA("%s", "vg_write");
if (!vg_write(lv_where->vg)) {
log_error("Failed to write intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
return NULL;
}
+PFLA("%s", "vg_commit");
if (!vg_commit(lv_where->vg)) {
log_error("Failed to commit intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
return NULL;
@@ -6346,27 +7151,27 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
return_NULL;
/* allocate a new linear segment */
- if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count, 0,
- status, 0, NULL, 1, layer_lv->le_count,
+ if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, le_count, 0,
+ status, 0, NULL, 1, le_count, 1,
0, 0, 0, NULL)))
return_NULL;
- /* map the new segment to the original underlying are */
+ /* map the new segment to the original underlying area */
if (!set_lv_segment_area_lv(mapseg, 0, layer_lv, 0, 0))
return_NULL;
/* add the new segment to the layer LV */
dm_list_add(&lv_where->segments, &mapseg->list);
- lv_where->le_count = layer_lv->le_count;
+ lv_where->le_count = le_count;
lv_where->size = (uint64_t) lv_where->le_count * lv_where->vg->extent_size;
/*
* recuresively rename sub LVs
- * currently supported only for thin data layer
- * FIXME: without strcmp it breaks mirrors....
+ * currently supported for thin data layer and copy conversions
+ * FIXME: without strstr it breaks mirrors....
*/
for (r = 0; r < DM_ARRAY_SIZE(_suffixes); ++r)
- if (strcmp(layer_suffix, _suffixes[r]) == 0) {
+ if (strstr(layer_suffix, _suffixes[r])) {
lv_names.old = lv_where->name;
lv_names.new = layer_lv->name;
if (!for_each_sub_lv(layer_lv, _rename_cb, (void *) &lv_names))
@@ -6404,7 +7209,7 @@ static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv,
/* allocate a new segment */
if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count, seg->area_len,
0, status, 0,
- NULL, 1, seg->area_len, 0, 0, 0, seg)))
+ NULL, 1, seg->area_len, 1, 0, 0, 0, seg)))
return_0;
/* map the new segment to the original underlying are */
@@ -6576,7 +7381,7 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
}
/*
- * Initialize the LV with 'value'.
+ * Initialize the LV with @wp.zero_value, length wp.zero_sectors or 4KiB default if 0
*/
int wipe_lv(struct logical_volume *lv, struct wipe_params wp)
{
@@ -6588,7 +7393,12 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp)
/* nothing to do */
return 1;
- sync_local_dev_names(lv->vg->cmd); /* Wait until devices are available */
+ /* Wait until devices are available */
+ if (!sync_local_dev_names(lv->vg->cmd)) {
+ log_error("Failed to sync local devices before wiping LV %s.",
+ display_lvname(lv));
+ return 0;
+ }
if (!lv_is_active_locally(lv)) {
log_error("Volume \"%s/%s\" is not active locally.",
@@ -6622,7 +7432,7 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp)
lv->vg->name, lv->name);
if (!wipe_known_signatures(lv->vg->cmd, dev, name, 0,
TYPE_DM_SNAPSHOT_COW,
- wp.yes, wp.force))
+ wp.yes, wp.force, NULL))
stack;
}
@@ -6829,6 +7639,7 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
return NULL;
}
+PFLA("lp->region_size=%u", lp->region_size);
if (!_vg_check_features(vg, lp))
return_NULL;
@@ -6857,18 +7668,20 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
}
}
- if (seg_is_raid(lp) && (vg->extent_size < STRIPE_SIZE_MIN)) {
- /*
- * FIXME: RAID will simply fail to load the table if
- * this is the case, but we should probably
- * honor the stripe minimum for regular stripe
- * volumes as well. Avoiding doing that now
- * only to minimize the change.
- */
- log_error("The extent size in volume group %s is too "
- "small to support striped RAID volumes.",
- vg->name);
- return NULL;
+ if (lp->stripe_size > vg->extent_size) {
+ if (seg_is_raid(lp) && (vg->extent_size < STRIPE_SIZE_MIN)) {
+ /*
+ * FIXME: RAID will simply fail to load the table if
+ * this is the case, but we should probably
+ * honor the stripe minimum for regular stripe
+ * volumes as well. Avoiding doing that now
+ * only to minimize the change.
+ */
+ log_error("The extent size in volume group %s is too "
+ "small to support striped RAID volumes.",
+ vg->name);
+ return NULL;
+ }
}
if (lp->stripe_size > vg->extent_size) {
@@ -6879,7 +7692,7 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
lp->stripe_size = vg->extent_size;
}
- if ((size_rest = lp->extents % lp->stripes)) {
+ if (!seg_is_any_raid10(lp) && (size_rest = lp->extents % lp->stripes)) {
log_print_unless_silent("Rounding size (%d extents) up to stripe boundary "
"size (%d extents).", lp->extents,
lp->extents - size_rest + lp->stripes);
@@ -6907,7 +7720,7 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
if (seg_is_pool(lp))
status |= LVM_WRITE; /* Pool is always writable */
- else if (seg_is_cache(lp) || seg_is_thin_volume(lp)) {
+ else if (seg_is_cache(lp) || seg_is_thin_volume(lp)) {
/* Resolve pool volume */
if (!lp->pool_name) {
/* Should be already checked */
@@ -6963,6 +7776,7 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
return NULL;
}
+PFLA("lp->region_size=%u", lp->region_size);
if (origin_lv && seg_is_cache_pool(lp)) {
/* Converting exiting origin and creating cache pool */
if (!validate_lv_cache_create_origin(origin_lv))
@@ -6983,7 +7797,11 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
display_lvname(origin_lv));
return NULL;
}
- } else if (pool_lv && seg_is_cache(lp)) {
+ } else if (seg_is_cache(lp)) {
+ if (!pool_lv) {
+ log_error(INTERNAL_ERROR "Pool LV for cache is missing.");
+ return NULL;
+ }
if (!lv_is_cache_pool(pool_lv)) {
log_error("Logical volume %s is not a cache pool.",
display_lvname(pool_lv));
@@ -6994,7 +7812,7 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
if (!(create_segtype = get_segtype_from_string(vg->cmd, "striped")))
return_0;
} else if (seg_is_mirrored(lp) || seg_is_raid(lp)) {
- if (!(seg_is_raid0(lp) || seg_is_raid0_meta(lp))) {
+ if (!seg_is_any_raid0(lp)) {
/* FIXME: this will not pass cluster lock! */
init_mirror_in_sync(lp->nosync);
@@ -7004,14 +7822,24 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
lp->segtype->name);
status |= LV_NOTSYNCED;
}
+ }
-PFLA("region_size=%u", lp->region_size);
- lp->region_size = adjusted_mirror_region_size(vg->extent_size,
- lp->extents,
- lp->region_size, 0);
-PFLA("region_size=%u", lp->region_size);
+ if (is_change_activating(lp->activate) && (lp->activate != CHANGE_AEY) &&
+ vg_is_clustered(vg) && seg_is_mirrored(lp) && !seg_is_raid(lp) &&
+ !cluster_mirror_is_available(vg->cmd)) {
+ log_error("Shared cluster mirrors are not available.");
+ return NULL;
}
-PFLA("region_size=%u", lp->region_size);
+
+ /* FIXME This will not pass cluster lock! */
+ init_mirror_in_sync(lp->nosync);
+
+PFLA("lp->region_size=%u", lp->region_size);
+ lp->region_size = adjusted_mirror_region_size(vg->extent_size,
+ lp->extents,
+ lp->region_size, 0,
+ vg_is_clustered(vg));
+PFLA("lp->region_size=%u", lp->region_size);
} else if (pool_lv && seg_is_thin_volume(lp)) {
if (!lv_is_thin_pool(pool_lv)) {
log_error("Logical volume %s is not a thin pool.",
@@ -7124,6 +7952,7 @@ PFLA("region_size=%u", lp->region_size);
if (!archive(vg))
return_NULL;
+
if (pool_lv && seg_is_thin_volume(lp)) {
/* Ensure all stacked messages are submitted */
if ((pool_is_active(pool_lv) || is_change_activating(lp->activate)) &&
@@ -7148,8 +7977,17 @@ PFLA("region_size=%u", lp->region_size);
lv->major, lv->minor);
}
+ /*
+ * The specific LV may not use a lock. lockd_init_lv() sets
+ * lv->lock_args to NULL if this LV does not use its own lock.
+ */
+
+ if (!lockd_init_lv(vg->cmd, vg, lv, lp))
+ return_NULL;
+
dm_list_splice(&lv->tags, &lp->tags);
+PFLA("lp->stripes=%u lp->stripe_size=%u lp->mirrors=%u lp->region_size=%u", lp->stripes, lp->stripe_size, lp->mirrors, lp->region_size);
if (!lv_extend(lv, create_segtype,
lp->stripes, lp->stripe_size,
lp->mirrors,
@@ -7162,20 +8000,33 @@ PFLA("region_size=%u", lp->region_size);
memlock_unlock(vg->cmd);
if (seg_is_cache_pool(lp) || seg_is_cache(lp)) {
- pool_lv = pool_lv ? : lv;
- first_seg(pool_lv)->chunk_size = lp->chunk_size;
- first_seg(pool_lv)->feature_flags = lp->feature_flags;
- /* TODO: some calc_policy solution for cache ? */
- if (!recalculate_pool_chunk_size_with_dev_hints(pool_lv, lp->passed_args,
- THIN_CHUNK_SIZE_CALC_METHOD_GENERIC)) {
+ if (!cache_set_mode(first_seg(lv), lp->cache_mode)) {
+ stack;
+ goto revert_new_lv;
+ }
+
+ if (!cache_set_policy(first_seg(lv), lp->policy_name, lp->policy_settings)) {
stack;
goto revert_new_lv;
}
+
+ pool_lv = pool_lv ? : lv;
+ if (lp->chunk_size) {
+ first_seg(pool_lv)->chunk_size = lp->chunk_size;
+ /* TODO: some calc_policy solution for cache ? */
+ if (!recalculate_pool_chunk_size_with_dev_hints(pool_lv, lp->passed_args,
+ THIN_CHUNK_SIZE_CALC_METHOD_GENERIC)) {
+ stack;
+ goto revert_new_lv;
+ }
+ }
} else if (seg_is_raid(lp)) {
- if (!(seg_is_raid0(lp) || seg_is_raid0_meta(lp))) {
+ if (!seg_is_any_raid0(lp)) {
first_seg(lv)->min_recovery_rate = lp->min_recovery_rate;
first_seg(lv)->max_recovery_rate = lp->max_recovery_rate;
}
+
+ first_seg(lv)->data_copies = lp->mirrors;
} else if (seg_is_thin_pool(lp)) {
first_seg(lv)->chunk_size = lp->chunk_size;
first_seg(lv)->zero_new_blocks = lp->zero ? 1 : 0;
@@ -7187,6 +8038,8 @@ PFLA("region_size=%u", lp->region_size);
stack;
goto revert_new_lv;
}
+ if (lp->error_when_full)
+ lv->status |= LV_ERROR_WHEN_FULL;
} else if (pool_lv && seg_is_thin_volume(lp)) {
seg = first_seg(lv);
pool_seg = first_seg(pool_lv);
@@ -7213,6 +8066,9 @@ PFLA("region_size=%u", lp->region_size);
return_NULL;
}
+ if (!pool_check_overprovisioning(lv))
+ return_NULL;
+
/* FIXME Log allocation and attachment should have happened inside lv_extend. */
if (lp->log_count &&
!seg_is_raid(first_seg(lv)) && seg_is_mirrored(first_seg(lv))) {
@@ -7287,11 +8143,8 @@ PFLA("region_size=%u", lp->region_size);
stack;
goto revert_new_lv;
}
- /* When change is activating, don't duplicate backup call */
- if (!is_change_activating(lp->activate))
- backup(vg);
}
- if (is_change_activating(lp->activate)) {
+ if (!dm_list_empty(&first_seg(pool_lv)->thin_messages)) {
/* Send message so that table preload knows new thin */
if (!lv_is_active(pool_lv)) {
/* Avoid multiple thin-pool activations in this case */
@@ -7309,25 +8162,24 @@ PFLA("region_size=%u", lp->region_size);
}
}
/* Keep thin pool active until thin volume is activated */
- if (!update_pool_lv(pool_lv, (thin_pool_was_active < 0) ? 1 : 0)) {
+ if (!update_pool_lv(pool_lv, 1)) {
stack;
goto revert_new_lv;
}
+ }
+ backup(vg);
- backup(vg);
-
- if (!lv_active_change(cmd, lv, lp->activate, 0)) {
- log_error("Failed to activate thin %s.", lv->name);
- goto deactivate_and_revert_new_lv;
- }
+ if (!lv_active_change(cmd, lv, lp->activate, 0)) {
+ log_error("Failed to activate thin %s.", lv->name);
+ goto deactivate_and_revert_new_lv;
+ }
- /* Restore inactive state if needed */
- if (!thin_pool_was_active &&
- !deactivate_lv(cmd, pool_lv)) {
- log_error("Failed to deactivate thin pool %s.",
- display_lvname(pool_lv));
- return NULL;
- }
+ /* Restore inactive state if needed */
+ if (!thin_pool_was_active &&
+ !deactivate_lv(cmd, pool_lv)) {
+ log_error("Failed to deactivate thin pool %s.",
+ display_lvname(pool_lv));
+ return NULL;
}
} else if (lp->snapshot) {
lv->status |= LV_TEMPORARY;
@@ -7356,13 +8208,6 @@ PFLA("region_size=%u", lp->region_size);
}
}
- if (lv_is_cache_pool(lv) && !origin_lv) {
- if (lp->cache_policy && !lv_cache_setpolicy(lv, lp->cache_policy))
- return NULL; /* revert? */
- if (!lv_update_and_reload(lv))
- return NULL; /* FIXME: revert */
- }
-
if (seg_is_cache(lp) || (origin_lv && lv_is_cache_pool(lv))) {
/* Finish cache conversion magic */
if (origin_lv) {
@@ -7382,8 +8227,11 @@ PFLA("region_size=%u", lp->region_size);
}
lv = tmp_lv;
- if (lp->cache_policy && !lv_cache_setpolicy(lv, lp->cache_policy))
- return NULL; /* revert? */
+ if (!cache_set_mode(first_seg(lv), lp->cache_mode))
+ return_NULL; /* revert? */
+
+ if (!cache_set_policy(first_seg(lv), lp->policy_name, lp->policy_settings))
+ return_NULL; /* revert? */
if (!lv_update_and_reload(lv)) {
/* FIXME Do a better revert */
@@ -7398,6 +8246,13 @@ PFLA("region_size=%u", lp->region_size);
goto deactivate_and_revert_new_lv; /* Let's retry on error path */
}
+ /* Get in sync with deactivation, before reusing LV as snapshot */
+ if (!sync_local_dev_names(lv->vg->cmd)) {
+ log_error("Failed to sync local devices before creating snapshot using %s.",
+ display_lvname(lv));
+ goto revert_new_lv;
+ }
+
/* Create zero origin volume for spare snapshot */
if (lp->virtual_extents &&
!(origin_lv = _create_virtual_origin(cmd, vg, lv->name,
@@ -7455,6 +8310,8 @@ deactivate_and_revert_new_lv:
}
revert_new_lv:
+ lockd_free_lv(vg->cmd, vg, lp->lv_name, &lv->lvid.id[1], lp->lock_args);
+
/* FIXME Better to revert to backup of metadata? */
if (!lv_remove(lv) || !vg_write(vg) || !vg_commit(vg))
log_error("Manual intervention may be required to remove "
@@ -7512,6 +8369,7 @@ struct logical_volume *lv_create_single(struct volume_group *vg,
lp->segtype = segtype;
}
+PFLA("lp->stripe_size=%u", lp->stripe_size);
if (!(lv = _lv_create_an_lv(vg, lp, lp->lv_name)))
return_NULL;
diff --git a/lib/metadata/merge.c b/lib/metadata/merge.c
index bb533e9ee..40c7c4aa0 100644
--- a/lib/metadata/merge.c
+++ b/lib/metadata/merge.c
@@ -20,15 +20,6 @@
#include "str_list.h"
#include "segtype.h"
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
/*
* Attempt to merge two adjacent segments.
* Currently only supports striped segments on AREA_PV.
@@ -102,6 +93,7 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
lv->name);
inc_error_count;
} else if (!seg_is_thin_volume(first_seg(lv))) {
+PFLA("segtype=%s", lvseg_name(first_seg(lv)));
log_error("LV %s is thin volume without first thin volume segment.",
lv->name);
inc_error_count;
@@ -139,6 +131,8 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
}
dm_list_iterate_items(seg, &lv->segments) {
+ uint32_t data_copies, data_rimage_count;
+
seg_count++;
if (seg->le != le) {
log_error("LV %s invalid: segment %u should begin at "
@@ -147,13 +141,27 @@ int check_lv_segments(struct logical_volume *lv, int complete_vg)
inc_error_count;
}
- area_multiplier = seg_is_striped(seg) ? seg->area_count - seg->segtype->parity_devs : 1;
+ area_multiplier = (seg_is_striped_raid(seg) || seg_is_striped(seg)) ? seg->area_count - seg->segtype->parity_devs : 1;
+ data_rimage_count = seg->area_count - seg->segtype->parity_devs;
+
+PFLA("lv=%s segtype=%s seg->len=%u seg->area_len=%u seg->area_count=%u data_rimage_count=%u parity_devs=%u area_multiplier=%u seg->data_copies=%u rimageextents=%u seg->reshape_len=%u", lv->name, seg->segtype->name, seg->len, seg->area_len, seg->area_count, data_rimage_count, seg->segtype->parity_devs, area_multiplier, seg->data_copies, raid_rimage_extents(seg->segtype, seg->len - data_rimage_count * seg->reshape_len, data_rimage_count, seg->data_copies), seg->reshape_len);
+#if 1
+ data_copies = seg_is_any_raid10(seg) ? seg->data_copies : 1;
+ if (raid_rimage_extents(seg->segtype, seg->len - data_rimage_count * seg->reshape_len,
+ data_rimage_count, seg->data_copies) != seg->area_len - data_copies * seg->reshape_len) {
+#else
+ if (seg->area_len * area_multiplier != seg->len) {
+#endif
+ log_error("LV %s: segment %u with len=%u "
+ " has inconsistent area_len %u",
+ lv->name, seg_count, seg->len, seg->area_len);
+ inc_error_count;
+ }
-PFLA("segtype=%s seg->area_len=%u seg->area_count=%u parity_devs=%u area_multiplier=%u seg->len=%u", seg->segtype->name, seg->area_len, seg->area_count, seg->segtype->parity_devs, area_multiplier, seg->len);
- if (seg->area_len * area_multiplier != seg->len) { // - seg->reshape_len) {
- log_error("LV %s: segment %u has inconsistent "
- "area_len %u",
- lv->name, seg_count, seg->area_len);
+ if (lv_is_error_when_full(lv) &&
+ !seg_can_error_when_full(seg)) {
+ log_error("LV %s: segment %u (%s) does not support flag "
+ "ERROR_WHEN_FULL.", lv->name, seg_count, seg->segtype->name);
inc_error_count;
}
@@ -211,6 +219,26 @@ PFLA("segtype=%s seg->area_len=%u seg->area_count=%u parity_devs=%u area_multipl
}
}
+ if (seg_is_cache_pool(seg) &&
+ !dm_list_empty(&seg->lv->segs_using_this_lv)) {
+ switch (seg->feature_flags &
+ (DM_CACHE_FEATURE_PASSTHROUGH |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_WRITEBACK)) {
+ case DM_CACHE_FEATURE_PASSTHROUGH:
+ case DM_CACHE_FEATURE_WRITETHROUGH:
+ case DM_CACHE_FEATURE_WRITEBACK:
+ break;
+ default:
+ log_error("LV %s has invalid cache's feature flag.",
+ lv->name);
+ inc_error_count;
+ }
+ if (!seg->policy_name) {
+ log_error("LV %s is missing cache policy name.", lv->name);
+ inc_error_count;
+ }
+ }
if (seg_is_pool(seg)) {
if (seg->area_count != 1 ||
seg_type(seg, 0) != AREA_LV) {
@@ -234,8 +262,7 @@ PFLA("segtype=%s seg->area_len=%u seg->area_count=%u parity_devs=%u area_multipl
inc_error_count;
}
- if (seg_is_pool(seg) &&
- !validate_pool_chunk_size(lv->vg->cmd, seg->segtype, seg->chunk_size)) {
+ if (!validate_pool_chunk_size(lv->vg->cmd, seg->segtype, seg->chunk_size)) {
log_error("LV %s: %s segment %u has invalid chunk size %u.",
lv->name, seg->segtype->name, seg_count, seg->chunk_size);
inc_error_count;
@@ -413,7 +440,6 @@ PFLA("segtype=%s seg->area_len=%u seg->area_count=%u parity_devs=%u area_multipl
for (s = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) != AREA_LV)
continue;
-PFLA("lv=%s s=%u seg->status=%lX seg_lv(seg, %u)=%s", lv->name, s, seg->status, s, seg_lv(seg, s)->name);
/* HM FIXME: TESTME */
if (seg_is_raid(seg) && seg->meta_areas && lv == seg_metalv(seg, s))
{
@@ -486,7 +512,7 @@ PFLA("lv=%s s=%u seg->status=%lX seg_lv(seg, %u)=%s", lv->name, s, seg->status,
inc_error_count;
}
}
-PFL();
+PFLA("error_count=%u", error_count);
if (le != lv->le_count) {
log_error("LV %s: inconsistent LE count %u != %u",
@@ -521,7 +547,7 @@ static int _lv_split_segment(struct logical_volume *lv, struct lv_segment *seg,
seg->lv, seg->le, seg->len, seg->reshape_len,
seg->status, seg->stripe_size,
seg->log_lv,
- seg->area_count, seg->area_len,
+ seg->area_count, seg->area_len, seg->data_copies,
seg->chunk_size, seg->region_size,
seg->extents_copied, seg->pvmove_source_seg))) {
log_error("Couldn't allocate cloned LV segment.");
@@ -533,9 +559,10 @@ static int _lv_split_segment(struct logical_volume *lv, struct lv_segment *seg,
return 0;
}
- /* In case of a striped segment, the offset has to be / stripes */
+ /* In case of a striped/raid10_far segment, the offset has to be / stripes */
area_offset = offset;
- if (seg_is_striped(seg))
+ if (seg_is_striped(seg) ||
+ seg_is_raid10_far(seg))
area_offset /= seg->area_count;
split_seg->area_len -= area_offset;
diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h
index c82251d40..7f1c395ff 100644
--- a/lib/metadata/metadata-exported.h
+++ b/lib/metadata/metadata-exported.h
@@ -27,7 +27,7 @@
#include "lv.h"
#include "lvm-percent.h"
-#define MAX_STRIPES 251U
+#define MAX_AREAS 253U
#define SECTOR_SHIFT 9L
#define SECTOR_SIZE ( 1L << SECTOR_SHIFT )
#define STRIPE_SIZE_MIN ( (unsigned) lvm_getpagesize() >> SECTOR_SHIFT) /* PAGESIZE in sectors */
@@ -58,6 +58,7 @@
#define LVM_READ UINT64_C(0x0000000000000100) /* LV, VG */
#define LVM_WRITE UINT64_C(0x0000000000000200) /* LV, VG */
+#define LVM_WRITE_LOCKED UINT64_C(0x0020000000000000) /* LV, VG */
#define CLUSTERED UINT64_C(0x0000000000000400) /* VG */
//#define SHARED UINT64_C(0x0000000000000800) /* VG */
@@ -100,7 +101,6 @@
#define THIN_POOL_DATA UINT64_C(0x0000004000000000) /* LV - Internal use only */
#define THIN_POOL_METADATA UINT64_C(0x0000008000000000) /* LV - Internal use only */
#define POOL_METADATA_SPARE UINT64_C(0x0000010000000000) /* LV - Internal use only */
-
#define LV_WRITEMOSTLY UINT64_C(0x0000020000000000) /* LV (RAID1) */
#define LV_ACTIVATION_SKIP UINT64_C(0x0000040000000000) /* LV */
@@ -126,7 +126,20 @@
e.g. to prohibit allocation of a RAID image
on a PV already holing an image of the RAID set */
-/* Next unused flag: UINT64_C(0x0040000000000000) */
+#define LV_REMOVED UINT64_C(0x0040000000000000) /* LV - Internal use only
+ This flag is used to mark an LV once it has
+ been removed from the VG. It might still
+ be referenced on internal lists of LVs.
+ Any remaining references should check for
+ this flag and ignore the LV is set.
+ FIXME: Remove this flag once we have indexed
+ vg->removed_lvs for quick lookup.
+ */
+#define LV_ERROR_WHEN_FULL UINT64_C(0x0080000000000000) /* LV - error when full */
+#define LOCKD_SANLOCK_LV UINT64_C(0x0100000000000000) /* LV - Internal use only */
+#define LV_RESHAPE_REMOVED UINT64_C(0x0200000000000000) /* LV got removed from raid set by shrinking reshape */
+#define LV_DUPLICATED UINT64_C(0x0400000000000000) /* LV - duplicated, internal use only */
+/* Next unused flag: UINT64_C(0x0800000000000000) */
/* Format features flags */
#define FMT_SEGMENTS 0x00000001U /* Arbitrary segment params? */
@@ -143,6 +156,9 @@
#define FMT_CONFIG_PROFILE 0x000000800U /* Supports configuration profiles? */
#define FMT_OBSOLETE 0x000001000U /* Obsolete format? */
#define FMT_NON_POWER2_EXTENTS 0x000002000U /* Non-power-of-2 extent sizes? */
+#define FMT_SYSTEMID_ON_PVS 0x000004000U /* System ID is stored on PVs not VG */
+
+#define systemid_on_pvs(vg) ((vg)->fid->fmt->features & FMT_SYSTEMID_ON_PVS)
/* Mirror conversion type flags */
#define MIRROR_BY_SEG 0x00000001U /* segment-by-segment mirror */
@@ -170,6 +186,9 @@
#define FAILED_ALLOCATION 0x00000080U
#define FAILED_EXIST 0x00000100U
#define FAILED_RECOVERY 0x00000200U
+#define FAILED_SYSTEMID 0x00000400U
+#define FAILED_LOCK_TYPE 0x00000800U
+#define FAILED_LOCK_MODE 0x00001000U
#define SUCCESS 0x00000000U
#define VGMETADATACOPIES_ALL UINT32_MAX
@@ -199,6 +218,7 @@
#define lv_is_mirror_type(lv) (((lv)->status & (MIRROR | MIRROR_LOG | MIRROR_IMAGE)) ? 1 : 0)
#define lv_is_pending_delete(lv) (((lv)->status & LV_PENDING_DELETE) ? 1 : 0)
+#define lv_is_error_when_full(lv) (((lv)->status & LV_ERROR_WHEN_FULL) ? 1 : 0)
#define lv_is_pvmove(lv) (((lv)->status & PVMOVE) ? 1 : 0)
#define lv_is_raid(lv) (((lv)->status & RAID) ? 1 : 0)
@@ -216,9 +236,14 @@
#define lv_is_pool_data(lv) (((lv)->status & (CACHE_POOL_DATA | THIN_POOL_DATA)) ? 1 : 0)
#define lv_is_pool_metadata(lv) (((lv)->status & (CACHE_POOL_METADATA | THIN_POOL_METADATA)) ? 1 : 0)
#define lv_is_pool_metadata_spare(lv) (((lv)->status & POOL_METADATA_SPARE) ? 1 : 0)
+#define lv_is_lockd_sanlock_lv(lv) (((lv)->status & LOCKD_SANLOCK_LV) ? 1 : 0)
#define lv_is_rlog(lv) (((lv)->status & REPLICATOR_LOG) ? 1 : 0)
+#define lv_is_removed(lv) (((lv)->status & LV_REMOVED) ? 1 : 0)
+#define lv_is_removed(lv) (((lv)->status & LV_REMOVED) ? 1 : 0)
+#define lv_is_duplicated(lv) (((lv)->status & LV_DUPLICATED) ? 1 : 0)
+
int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
struct dm_list **layout, struct dm_list **role);
@@ -248,6 +273,14 @@ typedef enum {
THIN_DISCARDS_PASSDOWN,
} thin_discards_t;
+typedef enum {
+ LOCK_TYPE_INVALID = -1,
+ LOCK_TYPE_NONE = 0,
+ LOCK_TYPE_CLVM = 1,
+ LOCK_TYPE_DLM = 2,
+ LOCK_TYPE_SANLOCK = 3,
+} lock_type_t;
+
struct cmd_context;
struct format_handler;
struct labeller;
@@ -413,8 +446,7 @@ struct lv_segment {
uint32_t writebehind; /* For RAID (RAID1 only) */
uint32_t min_recovery_rate; /* For RAID */
uint32_t max_recovery_rate; /* For RAID */
- uint32_t data_offset; /* For RAID, data offset in sectors on each data component image
- overloaded by setting 1 to cause emmiting 0 offset */
+ uint32_t data_offset; /* For RAID, data offset in sectors on each data component image */
uint32_t area_count;
uint32_t area_len;
uint32_t chunk_size; /* For snapshots/thin_pool. In sectors. */
@@ -423,7 +455,8 @@ struct lv_segment {
struct logical_volume *merge_lv; /* thin, merge descendent lv into this ancestor */
struct logical_volume *cow;
struct dm_list origin_list;
- uint32_t region_size; /* For mirrors, replicators - in sectors */
+ uint32_t region_size; /* For mirrors, raid, replicators - in sectors */
+ uint32_t data_copies; /* For RAID10 */
uint32_t extents_copied;
struct logical_volume *log_lv;
struct lv_segment *pvmove_source_seg;
@@ -484,6 +517,12 @@ struct vg_list {
struct volume_group *vg;
};
+struct vgnameid_list {
+ struct dm_list list;
+ const char *vg_name;
+ const char *vgid;
+};
+
#define PV_PE_START_CALC ((uint64_t) -1) /* Calculate pe_start value */
struct pvcreate_restorable_params {
@@ -596,6 +635,8 @@ void lv_set_hidden(struct logical_volume *lv);
struct dm_list *get_vgnames(struct cmd_context *cmd, int include_internal);
struct dm_list *get_vgids(struct cmd_context *cmd, int include_internal);
+int get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids,
+ const char *only_this_vgname, int include_internal);
int scan_vgs_for_pvs(struct cmd_context *cmd, uint32_t warn_flags);
int pv_write(struct cmd_context *cmd, struct physical_volume *pv, int allow_non_orphan);
@@ -622,9 +663,9 @@ int lv_resize(struct cmd_context *cmd, struct logical_volume *lv,
* Return a handle to VG metadata.
*/
struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
- const char *vgid, uint32_t flags);
+ const char *vgid, uint32_t flags, uint32_t lockd_state);
struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
- const char *vgid, uint32_t flags);
+ const char *vgid, uint32_t flags, uint32_t lockd_state);
/*
* Test validity of a VG handle.
@@ -646,7 +687,9 @@ struct physical_volume *pv_create(const struct cmd_context *cmd,
int pvremove_single(struct cmd_context *cmd, const char *pv_name,
void *handle __attribute__((unused)), unsigned force_count,
- unsigned prompt);
+ unsigned prompt, struct dm_list *pvslist);
+int pvremove_many(struct cmd_context *cmd, struct dm_list *pv_names,
+ unsigned force_count, unsigned prompt);
int pv_resize_single(struct cmd_context *cmd,
struct volume_group *vg,
@@ -665,6 +708,7 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name);
int vg_remove_mdas(struct volume_group *vg);
int vg_remove_check(struct volume_group *vg);
void vg_remove_pvs(struct volume_group *vg);
+int vg_remove_direct(struct volume_group *vg);
int vg_remove(struct volume_group *vg);
int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
const char *new_name);
@@ -718,6 +762,8 @@ int lv_empty(struct logical_volume *lv);
/* Empty an LV and add error segment */
int replace_lv_with_error_segment(struct logical_volume *lv);
+int lv_refresh_suspend_resume(struct cmd_context *cmd, struct logical_volume *lv);
+
/* Entry point for all LV extent allocations */
int lv_extend(struct logical_volume *lv,
const struct segment_type *segtype,
@@ -808,7 +854,8 @@ typedef enum activation_change {
CHANGE_AEY = 2, /* activate exclusively */
CHANGE_ALY = 3, /* activate locally */
CHANGE_ALN = 4, /* deactivate locally */
- CHANGE_AAY = 5 /* automatic activation */
+ CHANGE_AAY = 5, /* automatic activation */
+ CHANGE_ASY = 6 /* activate shared */
} activation_change_t;
/* Returns true, when change activates device */
@@ -817,6 +864,20 @@ static inline int is_change_activating(activation_change_t change)
return ((change != CHANGE_AN) && (change != CHANGE_ALN));
}
+struct lv_raid_convert_params {
+ struct segment_type *segtype;
+ int yes;
+ int force;
+ int duplicate;
+ int unduplicate;
+ const int data_copies; /* to be able to detect -m0; -1 if no data copy change requested */
+ const unsigned region_size;
+ const unsigned stripes;
+ const unsigned stripe_size;
+ const char *lv_name; /* sub-lv name to unduplicate or pool LV name to create a duplicated thin LV */
+ struct dm_list *allocate_pvs;
+};
+
/* FIXME: refactor and reduce the size of this struct! */
struct lvcreate_params {
/* flags */
@@ -827,7 +888,7 @@ struct lvcreate_params {
int32_t major; /* all */
int32_t minor; /* all */
int log_count; /* mirror */
- int nosync; /* mirror */
+ int nosync; /* mirror, raid */
int pool_metadata_spare; /* pools */
int type; /* type arg is given */
int temporary; /* temporary LV */
@@ -840,12 +901,15 @@ struct lvcreate_params {
#define THIN_CHUNK_SIZE_CALC_METHOD_GENERIC 0x01
#define THIN_CHUNK_SIZE_CALC_METHOD_PERFORMANCE 0x02
int thin_chunk_size_calc_policy;
+ unsigned needs_lockd_init : 1;
const char *vg_name; /* only-used when VG is not yet opened (in /tools) */
const char *lv_name; /* all */
const char *origin_name; /* snap */
const char *pool_name; /* thin */
+ const char *lock_args;
+
/* Keep args given by the user on command line */
/* FIXME: create some more universal solution here */
#define PASS_ARG_CHUNK_SIZE 0x01
@@ -864,8 +928,9 @@ struct lvcreate_params {
uint32_t min_recovery_rate; /* RAID */
uint32_t max_recovery_rate; /* RAID */
- uint64_t feature_flags; /* cache */
- struct dm_config_tree *cache_policy; /* cache */
+ const char *cache_mode; /* cache */
+ const char *policy_name; /* cache */
+ struct dm_config_tree *policy_settings; /* cache */
const struct segment_type *segtype; /* all */
unsigned target_attr; /* all */
@@ -880,6 +945,7 @@ struct lvcreate_params {
struct dm_list *pvh; /* all */
uint64_t permission; /* all */
+ unsigned error_when_full; /* when segment supports it */
uint32_t read_ahead; /* all */
int approx_alloc; /* all */
alloc_policy_t alloc; /* all */
@@ -1028,11 +1094,16 @@ int lv_remove_mirrors(struct cmd_context *cmd, struct logical_volume *lv,
const char *get_mirror_log_name(int log_count);
int set_mirror_log_count(int *log_count, const char *mirrorlog);
+int cluster_mirror_is_available(struct cmd_context *cmd);
int is_temporary_mirror_layer(const struct logical_volume *lv);
struct logical_volume * find_temporary_mirror(const struct logical_volume *lv);
uint32_t lv_mirror_count(const struct logical_volume *lv);
+
+/* Remove CMIRROR_REGION_COUNT_LIMIT when http://bugzilla.redhat.com/682771 is fixed */
+#define CMIRROR_REGION_COUNT_LIMIT (256*1024 * 8)
uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
- uint32_t region_size, int internal);
+ uint32_t region_size, int internal, int clustered);
+
int remove_mirrors_from_segments(struct logical_volume *lv,
uint32_t new_mirrors, uint64_t status_mask);
int add_mirrors_to_segments(struct cmd_context *cmd, struct logical_volume *lv,
@@ -1086,26 +1157,32 @@ struct logical_volume *first_replicator_dev(const struct logical_volume *lv);
uint32_t raid_rmeta_extents_delta(struct cmd_context *cmd,
uint32_t rimage_extents_cur, uint32_t rimage_extents_new,
uint32_t region_size, uint32_t extent_size);
+int lv_raid_in_sync(const struct logical_volume *lv);
int lv_is_raid_with_tracking(const struct logical_volume *lv);
uint32_t lv_raid_image_count(const struct logical_volume *lv);
-int lv_raid_change_image_count(struct logical_volume *lv,
- uint32_t new_count, struct dm_list *pvs);
-int lv_raid_split(struct logical_volume *lv, const char *split_name,
+int lv_raid_split(struct logical_volume *lv, int yes, const char *split_name,
uint32_t new_count, struct dm_list *splittable_pvs);
int lv_raid_split_and_track(struct logical_volume *lv,
+ int yes,
+ const char *sub_lv_name,
struct dm_list *splittable_pvs);
int lv_raid_merge(struct logical_volume *lv);
int lv_raid_convert(struct logical_volume *lv,
- const struct segment_type *new_segtype,
- int yes, int force,
- const unsigned image_count,
- const unsigned stripes,
- const unsigned new_stripe_size,
- struct dm_list *allocate_pvs);
-int lv_raid_replace(struct logical_volume *lv, struct dm_list *remove_pvs,
+ struct lv_raid_convert_params p);
+int lv_raid_replace(struct logical_volume *lv, int yes,
+ struct dm_list *remove_pvs,
struct dm_list *allocate_pvs);
int lv_raid_remove_missing(struct logical_volume *lv);
int partial_raid_lv_supports_degraded_activation(const struct logical_volume *lv);
+int lv_raid10_far_reorder_segments(struct logical_volume *lv, uint32_t extents, int extend);
+uint32_t raid_rimage_extents(const struct segment_type *segtype, uint32_t extents, uint32_t stripes, uint32_t data_copies);
+uint32_t raid_total_extents(const struct segment_type *segtype, uint32_t extents, uint32_t stripes, uint32_t data_copies);
+int lv_create_raid01(struct logical_volume *lv,
+ const struct segment_type *segtype,
+ unsigned mirrors, unsigned stripes,
+ unsigned stripe_size, unsigned region_size,
+ unsigned extents,
+ struct dm_list *allocate_pvs);
/* -- metadata/raid_manip.c */
/* ++ metadata/cache_manip.c */
@@ -1117,8 +1194,11 @@ struct lv_status_cache {
dm_percent_t dirty_usage;
};
-const char *get_cache_pool_cachemode_name(const struct lv_segment *seg);
-int set_cache_pool_feature(uint64_t *feature_flags, const char *str);
+const char *get_cache_mode_name(const struct lv_segment *cache_seg);
+int cache_mode_is_set(const struct lv_segment *seg);
+int cache_set_mode(struct lv_segment *cache_seg, const char *str);
+int cache_set_policy(struct lv_segment *cache_seg, const char *name,
+ const struct dm_config_tree *settings);
int update_cache_pool_params(const struct segment_type *segtype,
struct volume_group *vg, unsigned attr,
int passed_args, uint32_t pool_data_extents,
@@ -1129,7 +1209,6 @@ int validate_lv_cache_create_origin(const struct logical_volume *origin_lv);
struct logical_volume *lv_cache_create(struct logical_volume *pool,
struct logical_volume *origin);
int lv_cache_remove(struct logical_volume *cache_lv);
-int lv_cache_setpolicy(struct logical_volume *cache_lv, struct dm_config_tree *pol);
int wipe_cache_pool(struct logical_volume *cache_pool_lv);
/* -- metadata/cache_manip.c */
@@ -1170,6 +1249,7 @@ char *generate_lv_name(struct volume_group *vg, const char *format,
int pv_change_metadataignore(struct physical_volume *pv, uint32_t mda_ignore);
+int vg_flag_write_locked(struct volume_group *vg);
int vg_check_write_mode(struct volume_group *vg);
#define vg_is_clustered(vg) (vg_status((vg)) & CLUSTERED)
#define vg_is_exported(vg) (vg_status((vg)) & EXPORTED_VG)
@@ -1188,6 +1268,9 @@ struct vgcreate_params {
alloc_policy_t alloc;
int clustered; /* FIXME: put this into a 'status' variable instead? */
uint32_t vgmetadatacopies;
+ const char *system_id;
+ const char *lock_type;
+ const char *lock_args;
};
int validate_major_minor(const struct cmd_context *cmd,
@@ -1199,4 +1282,7 @@ int vgcreate_params_validate(struct cmd_context *cmd,
int validate_vg_rename_params(struct cmd_context *cmd,
const char *vg_name_old,
const char *vg_name_new);
+
+int is_lockd_type(const char *lock_type);
+
#endif
diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
index 1152e5538..6bc95d50c 100644
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -31,19 +31,11 @@
#include "locking.h"
#include "archiver.h"
#include "defaults.h"
+#include "lvmlockd.h"
#include <math.h>
#include <sys/param.h>
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
static struct physical_volume *_pv_read(struct cmd_context *cmd,
struct dm_pool *pvmem,
const char *pv_name,
@@ -566,20 +558,14 @@ void vg_remove_pvs(struct volume_group *vg)
}
}
-int vg_remove(struct volume_group *vg)
+int vg_remove_direct(struct volume_group *vg)
{
struct physical_volume *pv;
struct pv_list *pvl;
int ret = 1;
- if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
- log_error("Can't get lock for orphan PVs");
- return 0;
- }
-
if (!vg_remove_mdas(vg)) {
log_error("vg_remove_mdas %s failed", vg->name);
- unlock_vg(vg->cmd, VG_ORPHANS);
return 0;
}
@@ -613,6 +599,8 @@ int vg_remove(struct volume_group *vg)
if (!lvmetad_vg_remove(vg))
stack;
+ lockd_vg_update(vg);
+
if (!backup_remove(vg->cmd, vg->name))
stack;
@@ -621,6 +609,20 @@ int vg_remove(struct volume_group *vg)
else
log_error("Volume group \"%s\" not properly removed", vg->name);
+ return ret;
+}
+
+int vg_remove(struct volume_group *vg)
+{
+ int ret;
+
+ if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
+ log_error("Can't get lock for orphan PVs");
+ return 0;
+ }
+
+ ret = vg_remove_direct(vg);
+
unlock_vg(vg->cmd, VG_ORPHANS);
return ret;
}
@@ -1018,7 +1020,6 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
.context.vg_ref.vg_name = vg_name
};
struct format_instance *fid;
- int consistent = 0;
uint32_t rc;
if (!validate_name(vg_name)) {
@@ -1032,15 +1033,6 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
/* NOTE: let caller decide - this may be check for existence */
return _vg_make_handle(cmd, NULL, rc);
- /* FIXME: Is this vg_read_internal necessary? Move it inside
- vg_lock_newname? */
- /* is this vg name already in use ? */
- if ((vg = vg_read_internal(cmd, vg_name, NULL, WARN_PV_READ, &consistent))) {
- log_error("A volume group called '%s' already exists.", vg_name);
- unlock_and_release_vg(cmd, vg, vg_name);
- return _vg_make_handle(cmd, NULL, FAILED_EXIST);
- }
-
/* Strip dev_dir if present */
vg_name = strip_dir(vg_name, cmd->dev_dir);
@@ -1054,10 +1046,10 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
}
vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
- if (!(vg->system_id = dm_pool_zalloc(vg->vgmem, NAME_LEN + 1)))
+ vg->system_id = NULL;
+ if (!(vg->lvm1_system_id = dm_pool_zalloc(vg->vgmem, NAME_LEN + 1)))
goto_bad;
- *vg->system_id = '\0';
vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
vg->max_lv = DEFAULT_MAX_LV;
vg->max_pv = DEFAULT_MAX_PV;
@@ -1425,7 +1417,7 @@ int vg_split_mdas(struct cmd_context *cmd __attribute__((unused)),
* 0 indicates we may not.
*/
static int _pvcreate_check(struct cmd_context *cmd, const char *name,
- struct pvcreate_params *pp)
+ struct pvcreate_params *pp, int *wiped)
{
struct physical_volume *pv;
struct device *dev;
@@ -1435,6 +1427,8 @@ static int _pvcreate_check(struct cmd_context *cmd, const char *name,
/* FIXME Check partition type is LVM unless --force is given */
+ *wiped = 0;
+
/* Is there a pv here already? */
pv = find_pv_by_name(cmd, name, 1, 1);
@@ -1459,6 +1453,33 @@ static int _pvcreate_check(struct cmd_context *cmd, const char *name,
dev = dev_cache_get(name, cmd->full_filter);
+ /*
+ * Refresh+rescan at the end is needed if:
+ * - we don't obtain device list from udev,
+ * hence persistent cache file is used
+ * and we need to trash it and reevaluate
+ * for any changes done outside - adding
+ * any new foreign signature which may affect
+ * filtering - before we do pvcreate, we
+ * need to be sure that we have up-to-date
+ * view for filters
+ *
+ * - we have wiped existing foreign signatures
+ * from dev as this may affect what's filtered
+ * as well
+ *
+ *
+ * Only rescan at the end is needed if:
+ * - we've just checked whether dev is fileterd
+ * by MD filter. We do the refresh in-situ,
+ * so no need to require the refresh at the
+ * end of this fn. This is to allow for
+ * wiping MD signature during pvcreate for
+ * the dev - the dev would normally be
+ * filtered because of MD filter.
+ * This is an exception.
+ */
+
/* Is there an md superblock here? */
if (!dev && md_filtering()) {
if (!refresh_filters(cmd))
@@ -1469,7 +1490,8 @@ static int _pvcreate_check(struct cmd_context *cmd, const char *name,
init_md_filtering(1);
scan_needed = 1;
- }
+ } else if (!obtain_device_list_from_udev())
+ filter_refresh_needed = scan_needed = 1;
if (!dev) {
log_error("Device %s not found (or ignored by filtering).", name);
@@ -1488,12 +1510,13 @@ static int _pvcreate_check(struct cmd_context *cmd, const char *name,
if (!wipe_known_signatures(cmd, dev, name,
TYPE_LVM1_MEMBER | TYPE_LVM2_MEMBER,
- 0, pp->yes, pp->force)) {
+ 0, pp->yes, pp->force, wiped)) {
log_error("Aborting pvcreate on %s.", name);
goto out;
- } else
+ }
+
+ if (*wiped)
filter_refresh_needed = scan_needed = 1;
-
if (sigint_caught())
goto_out;
@@ -1514,11 +1537,12 @@ out:
r = 0;
}
- if (scan_needed)
+ if (scan_needed) {
if (!lvmcache_label_scan(cmd, 2)) {
stack;
r = 0;
}
+ }
free_pv_fid(pv);
return r;
@@ -1629,9 +1653,11 @@ struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_nam
{
struct physical_volume *pv = NULL;
struct device *dev;
+ int wiped = 0;
struct dm_list mdas;
struct pvcreate_params default_pp;
char buffer[64] __attribute__((aligned(8)));
+ dev_ext_t dev_ext_src;
pvcreate_params_set_defaults(&default_pp);
if (!pp)
@@ -1653,13 +1679,32 @@ struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_nam
}
}
- if (!_pvcreate_check(cmd, pv_name, pp))
+ if (!_pvcreate_check(cmd, pv_name, pp, &wiped))
goto_bad;
if (sigint_caught())
goto_bad;
- if (!(dev = dev_cache_get(pv_name, cmd->full_filter))) {
+ /*
+ * wipe_known_signatures called in _pvcreate_check fires
+ * WATCH event to update udev database. But at the moment,
+ * we have no way to synchronize with such event - we may
+ * end up still seeing the old info in udev db and pvcreate
+ * can fail to proceed because of the device still being
+ * filtered (because of the stale info in udev db).
+ * Disable udev dev-ext source temporarily here for
+ * this reason and rescan with DEV_EXT_NONE dev-ext
+ * source (so filters use DEV_EXT_NONE source).
+ */
+ dev_ext_src = external_device_info_source();
+ if (wiped && (dev_ext_src == DEV_EXT_UDEV))
+ init_external_device_info_source(DEV_EXT_NONE);
+
+ dev = dev_cache_get(pv_name, cmd->full_filter);
+
+ init_external_device_info_source(dev_ext_src);
+
+ if (!dev) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto bad;
@@ -2394,6 +2439,7 @@ struct validate_hash {
struct dm_hash_table *lvname;
struct dm_hash_table *lvid;
struct dm_hash_table *pvid;
+ struct dm_hash_table *lv_lock_args;
};
/*
@@ -2438,6 +2484,75 @@ static int _lv_validate_references_single(struct logical_volume *lv, void *data)
return r;
}
+/*
+ * Format is <version>:<info>
+ */
+static int _validate_lock_args_chars(const char *lock_args)
+{
+ int i;
+ char c;
+ int found_colon = 0;
+ int r = 1;
+
+ for (i = 0; i < strlen(lock_args); i++) {
+ c = lock_args[i];
+
+ if (!isalnum(c) && c != '.' && c != '_' && c != '-' && c != '+' && c != ':') {
+ log_error(INTERNAL_ERROR "Invalid character at index %d of lock_args \"%s\"",
+ i, lock_args);
+ r = 0;
+ }
+
+ if (c == ':' && found_colon) {
+ log_error(INTERNAL_ERROR "Invalid colon at index %d of lock_args \"%s\"",
+ i, lock_args);
+ r = 0;
+ }
+
+ if (c == ':')
+ found_colon = 1;
+ }
+
+ return r;
+}
+
+static int _validate_vg_lock_args(struct volume_group *vg)
+{
+ if (!_validate_lock_args_chars(vg->lock_args)) {
+ log_error(INTERNAL_ERROR "VG %s has invalid lock_args chars", vg->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * For lock_type sanlock, LV lock_args are <version>:<info>
+ * For lock_type dlm, LV lock_args are not used, and lock_args is
+ * just set to "dlm".
+ */
+static int _validate_lv_lock_args(struct logical_volume *lv)
+{
+ int r = 1;
+
+ if (!strcmp(lv->vg->lock_type, "sanlock")) {
+ if (!_validate_lock_args_chars(lv->lock_args)) {
+ log_error(INTERNAL_ERROR "LV %s/%s has invalid lock_args chars",
+ lv->vg->name, display_lvname(lv));
+ return 0;
+ }
+
+ } else if (!strcmp(lv->vg->lock_type, "dlm")) {
+ if (strcmp(lv->lock_args, "dlm")) {
+ log_error(INTERNAL_ERROR "LV %s/%s has invalid lock_args \"%s\"",
+ lv->vg->name, display_lvname(lv), lv->lock_args);
+ r = 0;
+ }
+ }
+
+ return r;
+}
+
int vg_validate(struct volume_group *vg)
{
struct pv_list *pvl;
@@ -2461,6 +2576,12 @@ int vg_validate(struct volume_group *vg)
r = 0;
}
+ if (vg->status & LVM_WRITE_LOCKED) {
+ log_error(INTERNAL_ERROR "VG %s has external flag LVM_WRITE_LOCKED set internally.",
+ vg->name);
+ r = 0;
+ }
+
/* FIXME Also check there's no data/metadata overlap */
if (!(vhash.pvid = dm_hash_create(vg->pv_count))) {
log_error("Failed to allocate pvid hash.");
@@ -2528,12 +2649,32 @@ int vg_validate(struct volume_group *vg)
r = 0;
}
+ dm_list_iterate_items(lvl, &vg->removed_lvs) {
+ if (!(lvl->lv->status & LV_REMOVED)) {
+ log_error(INTERNAL_ERROR "LV %s is not marked as removed while it's part "
+ "of removed LV list for VG %s", lvl->lv->name, vg->name);
+ r = 0;
+ }
+ }
+
/*
* Count all non-snapshot invisible LVs
*/
dm_list_iterate_items(lvl, &vg->lvs) {
lv_count++;
+ if (lvl->lv->status & LV_REMOVED) {
+ log_error(INTERNAL_ERROR "LV %s is marked as removed while it's "
+ "still part of the VG %s", lvl->lv->name, vg->name);
+ r = 0;
+ }
+
+ if (lvl->lv->status & LVM_WRITE_LOCKED) {
+ log_error(INTERNAL_ERROR "LV %s has external flag LVM_WRITE_LOCKED set internally.",
+ lvl->lv->name);
+ r = 0;
+ }
+
dev_name_len = strlen(lvl->lv->name) + vg_name_len + 3;
if (dev_name_len >= NAME_LEN) {
log_error(INTERNAL_ERROR "LV name \"%s/%s\" length %"
@@ -2726,6 +2867,129 @@ int vg_validate(struct volume_group *vg)
if (vg_max_lv_reached(vg))
stack;
+
+ if (!(vhash.lv_lock_args = dm_hash_create(lv_count))) {
+ log_error("Failed to allocate lv_lock_args hash");
+ r = 0;
+ goto out;
+ }
+
+ if (is_lockd_type(vg->lock_type)) {
+ if (!vg->lock_args) {
+ log_error(INTERNAL_ERROR "VG %s with lock_type %s without lock_args",
+ vg->name, vg->lock_type);
+ r = 0;
+ }
+
+ if (vg_is_clustered(vg)) {
+ log_error(INTERNAL_ERROR "VG %s with lock_type %s is clustered",
+ vg->name, vg->lock_type);
+ r = 0;
+ }
+
+ if (vg->system_id && vg->system_id[0]) {
+ log_error(INTERNAL_ERROR "VG %s with lock_type %s has system_id %s",
+ vg->name, vg->lock_type, vg->system_id);
+ r = 0;
+ }
+
+ if (strcmp(vg->lock_type, "sanlock") && strcmp(vg->lock_type, "dlm")) {
+ log_error(INTERNAL_ERROR "VG %s has unknown lock_type %s",
+ vg->name, vg->lock_type);
+ r = 0;
+ }
+
+ if (!_validate_vg_lock_args(vg))
+ r = 0;
+ } else {
+ if (vg->lock_args) {
+ log_error(INTERNAL_ERROR "VG %s has lock_args %s without lock_type",
+ vg->name, vg->lock_args);
+ r = 0;
+ }
+ }
+
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (is_lockd_type(vg->lock_type)) {
+ if (lockd_lv_uses_lock(lvl->lv)) {
+ if (vg->skip_validate_lock_args)
+ continue;
+
+ /*
+ * FIXME: make missing lock_args an error.
+ * There are at least two cases where this
+ * check doesn't work correctly:
+ *
+ * 1. When creating a cow snapshot,
+ * (lvcreate -s -L1M -n snap1 vg/lv1),
+ * lockd_lv_uses_lock() uses lv_is_cow()
+ * which depends on lv->snapshot being
+ * set, but it's not set at this point,
+ * so lockd_lv_uses_lock() cannot identify
+ * the LV as a cow_lv, and thinks it needs
+ * a lock when it doesn't. To fix this we
+ * probably need to validate by finding the
+ * origin LV, then finding all its snapshots
+ * which will have no lock_args.
+ *
+ * 2. When converting an LV to a thin pool
+ * without using an existing metadata LV,
+ * (lvconvert --type thin-pool vg/poolX),
+ * there is an intermediate LV created,
+ * probably for the metadata LV, and
+ * validate is called on the VG in this
+ * intermediate state, which finds the
+ * newly created LV which is not yet
+ * identified as a metadata LV, and
+ * does not have any lock_args. To fix
+ * this we might be able to find the place
+ * where the intermediate LV is created,
+ * and set new variable on it like for vgs,
+ * lv->skip_validate_lock_args.
+ */
+ if (!lvl->lv->lock_args) {
+ /*
+ log_verbose("LV %s/%s missing lock_args",
+ vg->name, lvl->lv->name);
+ r = 0;
+ */
+ continue;
+ }
+
+ if (!_validate_lv_lock_args(lvl->lv)) {
+ r = 0;
+ continue;
+ }
+
+ if (!strcmp(vg->lock_type, "sanlock")) {
+ if (dm_hash_lookup(vhash.lv_lock_args, lvl->lv->lock_args)) {
+ log_error(INTERNAL_ERROR "LV %s/%s has duplicate lock_args %s.",
+ vg->name, lvl->lv->name, lvl->lv->lock_args);
+ r = 0;
+ }
+
+ if (!dm_hash_insert(vhash.lv_lock_args, lvl->lv->lock_args, lvl)) {
+ log_error("Failed to hash lvname.");
+ r = 0;
+ }
+
+ }
+ } else {
+ if (lvl->lv->lock_args) {
+ log_error(INTERNAL_ERROR "LV %s/%s shouldn't have lock_args",
+ vg->name, lvl->lv->name);
+ r = 0;
+ }
+ }
+ } else {
+ if (lvl->lv->lock_args) {
+ log_error(INTERNAL_ERROR "LV %s/%s with no lock_type has lock_args %s",
+ vg->name, lvl->lv->name, lvl->lv->lock_args);
+ r = 0;
+ }
+ }
+ }
+
out:
if (vhash.lvid)
dm_hash_destroy(vhash.lvid);
@@ -2733,6 +2997,8 @@ out:
dm_hash_destroy(vhash.lvname);
if (vhash.pvid)
dm_hash_destroy(vhash.pvid);
+ if (vhash.lv_lock_args)
+ dm_hash_destroy(vhash.lv_lock_args);
return r;
}
@@ -2746,8 +3012,19 @@ int vg_write(struct volume_group *vg)
struct dm_list *mdah;
struct pv_to_create *pv_to_create;
struct metadata_area *mda;
+ struct lv_list *lvl;
int revert = 0, wrote = 0;
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (lvl->lv->lock_args && !strcmp(lvl->lv->lock_args, "pending")) {
+ if (!lockd_init_lv_args(vg->cmd, vg, lvl->lv, vg->lock_type, &lvl->lv->lock_args)) {
+ log_error("Cannot allocate lock for new LV.");
+ return 0;
+ }
+ lvl->lv->new_lock_args = 1;
+ }
+ }
+
if (!vg_validate(vg))
return_0;
@@ -2816,6 +3093,7 @@ int vg_write(struct volume_group *vg)
}
if (revert || !wrote) {
+ log_error("Failed to write VG %s.", vg->name);
dm_list_uniterate(mdah, &vg->fid->metadata_areas_in_use, &mda->list) {
mda = dm_list_item(mdah, struct metadata_area);
@@ -2913,6 +3191,8 @@ int vg_commit(struct volume_group *vg)
cache_updated = _vg_commit_mdas(vg);
+ lockd_vg_update(vg);
+
if (cache_updated) {
/* Instruct remote nodes to upgrade cached metadata. */
if (!remote_commit_cached_metadata(vg))
@@ -2946,6 +3226,14 @@ int vg_commit(struct volume_group *vg)
void vg_revert(struct volume_group *vg)
{
struct metadata_area *mda;
+ struct lv_list *lvl;
+
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (lvl->lv->new_lock_args) {
+ lockd_free_lv(vg->cmd, vg, lvl->lv->name, &lvl->lv->lvid.id[1], lvl->lv->lock_args);
+ lvl->lv->new_lock_args = 0;
+ }
+ }
release_vg(vg->vg_precommitted); /* VG is no longer needed */
vg->vg_precommitted = NULL;
@@ -3163,6 +3451,33 @@ static int _check_mda_in_use(struct metadata_area *mda, void *_in_use)
return 1;
}
+static int _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg, struct dm_list *to_check)
+{
+ struct pv_list *pvl, *pvl2;
+ char uuid[64] __attribute__((aligned(8)));
+ dm_list_iterate_items(pvl, to_check) {
+ dm_list_iterate_items(pvl2, &vg->pvs) {
+ if (pvl->pv->dev == pvl2->pv->dev)
+ goto next_pv;
+ }
+ if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid)))
+ return_0;
+ log_warn("WARNING: Removing PV %s (%s) that no longer belongs to VG %s",
+ pv_dev_name(pvl->pv), uuid, vg->name);
+ if (!pv_write_orphan(cmd, pvl->pv))
+ return_0;
+
+ /* Refresh metadata after orphan write */
+ if (!drop_cached_metadata(vg)) {
+ log_error("Unable to drop cached metadata for VG %s while wiping outdated PVs.", vg->name);
+ return 0;
+ }
+next_pv:
+ ;
+ }
+ return 1;
+}
+
/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
* inconsistent metadata on disk (i.e. the VG write lock is held).
* This guarantees only consistent metadata is returned.
@@ -3196,11 +3511,12 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
int inconsistent_mda_count = 0;
unsigned use_precommitted = precommitted;
struct dm_list *pvids;
- struct pv_list *pvl, *pvl2;
+ struct pv_list *pvl;
struct dm_list all_pvs;
- char uuid[64] __attribute__((aligned(8)));
unsigned seqno = 0;
int reappeared = 0;
+ struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */
+ unsigned use_previous_vg;
if (is_orphan_vg(vgname)) {
if (use_precommitted) {
@@ -3221,6 +3537,11 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
*consistent = _repair_inconsistent_vg(correct_vg);
else
*consistent = !reappeared;
+ if (_wipe_outdated_pvs(cmd, correct_vg, &correct_vg->pvs_outdated)) {
+ /* clear the list */
+ dm_list_init(&correct_vg->pvs_outdated);
+ lvmetad_vg_clear_outdated_pvs(correct_vg);
+ }
}
return correct_vg;
}
@@ -3287,12 +3608,20 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
/* Ensure contents of all metadata areas match - else do recovery */
inconsistent_mda_count=0;
dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
+ use_previous_vg = 0;
if ((use_precommitted &&
- !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
+ !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
(!use_precommitted &&
- !(vg = mda->ops->vg_read(fid, vgname, mda, 0)))) {
+ !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) {
inconsistent = 1;
+ vg_fmtdata = NULL;
+ continue;
+ }
+
+ /* Use previous VG because checksum matches */
+ if (!vg) {
+ vg = correct_vg;
continue;
}
@@ -3319,8 +3648,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
}
}
- if (vg != correct_vg)
+ if (vg != correct_vg) {
release_vg(vg);
+ vg_fmtdata = NULL;
+ }
}
fid->ref_count--;
@@ -3436,6 +3767,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
* but we failed to do so (so there's a dangling fid now).
*/
_destroy_fid(&fid);
+ vg_fmtdata = NULL;
inconsistent = 0;
@@ -3466,14 +3798,23 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
/* Ensure contents of all metadata areas match - else recover */
inconsistent_mda_count=0;
dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
+ use_previous_vg = 0;
+
if ((use_precommitted &&
- !(vg = mda->ops->vg_read_precommit(fid, vgname,
- mda))) ||
+ !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) ||
(!use_precommitted &&
- !(vg = mda->ops->vg_read(fid, vgname, mda, 0)))) {
+ !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) {
inconsistent = 1;
+ vg_fmtdata = NULL;
continue;
}
+
+ /* Use previous VG because checksum matches */
+ if (!vg) {
+ vg = correct_vg;
+ continue;
+ }
+
if (!correct_vg) {
correct_vg = vg;
if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
@@ -3516,8 +3857,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
}
}
- if (vg != correct_vg)
+ if (vg != correct_vg) {
release_vg(vg);
+ vg_fmtdata = NULL;
+ }
}
fid->ref_count--;
@@ -3599,28 +3942,10 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
return NULL;
}
- dm_list_iterate_items(pvl, &all_pvs) {
- dm_list_iterate_items(pvl2, &correct_vg->pvs) {
- if (pvl->pv->dev == pvl2->pv->dev)
- goto next_pv;
- }
- if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
- _free_pv_list(&all_pvs);
- release_vg(correct_vg);
- return_NULL;
- }
- log_warn("WARNING: Removing PV %s (%s) that no longer belongs to VG %s",
- pv_dev_name(pvl->pv), uuid, correct_vg->name);
- if (!pv_write_orphan(cmd, pvl->pv)) {
- _free_pv_list(&all_pvs);
- release_vg(correct_vg);
- return_NULL;
- }
-
- /* Refresh metadata after orphan write */
- drop_cached_metadata(correct_vg);
- next_pv:
- ;
+ if (!_wipe_outdated_pvs(cmd, correct_vg, &all_pvs)) {
+ _free_pv_list(&all_pvs);
+ release_vg(correct_vg);
+ return_NULL;
}
}
@@ -3723,6 +4048,16 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
release_vg(vg);
}
+ /*
+ * When using lvmlockd we should never reach this point.
+ * The VG is locked, then vg_read() is done, which gets
+ * the latest VG from lvmetad, or disk if lvmetad has
+ * been invalidated. When we get here the VG should
+ * always be cached and returned above.
+ */
+ if (lvmlockd_use())
+ log_error(INTERNAL_ERROR "vg_read_by_vgid failed with lvmlockd");
+
/* Mustn't scan if memory locked: ensure cache gets pre-populated! */
if (critical_section())
return_NULL;
@@ -3932,6 +4267,54 @@ struct dm_list *get_vgids(struct cmd_context *cmd, int include_internal)
return lvmcache_get_vgids(cmd, include_internal);
}
+int get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids,
+ const char *only_this_vgname, int include_internal)
+{
+ struct vgnameid_list *vgnl;
+ struct format_type *fmt;
+
+ if (only_this_vgname) {
+ if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+ log_error("vgnameid_list allocation failed.");
+ return 0;
+ }
+
+ vgnl->vg_name = dm_pool_strdup(cmd->mem, only_this_vgname);
+ vgnl->vgid = NULL;
+ dm_list_add(vgnameids, &vgnl->list);
+ return 1;
+ }
+
+ if (lvmetad_active()) {
+ /*
+ * This just gets the list of names/ids from lvmetad
+ * and does not populate lvmcache.
+ */
+ lvmetad_get_vgnameids(cmd, vgnameids);
+
+ if (include_internal) {
+ dm_list_iterate_items(fmt, &cmd->formats) {
+ if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+ log_error("vgnameid_list allocation failed.");
+ return 0;
+ }
+
+ vgnl->vg_name = dm_pool_strdup(cmd->mem, fmt->orphan_vg_name);
+ vgnl->vgid = NULL;
+ dm_list_add(vgnameids, &vgnl->list);
+ }
+ }
+ } else {
+ /*
+ * The non-lvmetad case. This function begins by calling
+ * lvmcache_label_scan() to populate lvmcache.
+ */
+ lvmcache_get_vgnameids(cmd, include_internal, vgnameids);
+ }
+
+ return 1;
+}
+
static int _get_pvs(struct cmd_context *cmd, uint32_t warn_flags,
struct dm_list *pvslist, struct dm_list *vgslist)
{
@@ -4079,6 +4462,7 @@ int scan_vgs_for_pvs(struct cmd_context *cmd, uint32_t warn_flags)
int pv_write(struct cmd_context *cmd __attribute__((unused)),
struct physical_volume *pv, int allow_non_orphan)
{
+PFL();
if (!pv->fmt->ops->pv_write) {
log_error("Format does not support writing physical volumes");
return 0;
@@ -4212,6 +4596,37 @@ int vg_check_write_mode(struct volume_group *vg)
}
/*
+ * Return 1 if the VG metadata should be written
+ * *without* the LVM_WRITE flag in the status line, and
+ * *with* the LVM_WRITE_LOCKED flag in the flags line.
+ *
+ * If this is done for a VG, it forces previous versions
+ * of lvm (before the LVM_WRITE_LOCKED flag was added), to view
+ * the VG and its LVs as read-only (because the LVM_WRITE flag
+ * is missing). Versions of lvm that understand the
+ * LVM_WRITE_LOCKED flag know to check the other methods of
+ * access control for the VG, specifically system_id and lock_type.
+ *
+ * So, if a VG has a system_id or lock_type, then the
+ * system_id and lock_type control access to the VG in
+ * addition to its basic writable status. Because previous
+ * lvm versions do not know about system_id or lock_type,
+ * VGs depending on either of these should have LVM_WRITE_LOCKED
+ * instead of LVM_WRITE to prevent the previous lvm versions from
+ * assuming they can write the VG and its LVs.
+ */
+int vg_flag_write_locked(struct volume_group *vg)
+{
+ if (vg->system_id && vg->system_id[0])
+ return 1;
+
+ if (vg->lock_type && vg->lock_type[0] && strcmp(vg->lock_type, "none"))
+ return 1;
+
+ return 0;
+}
+
+/*
* Performs a set of checks against a VG according to bits set in status
* and returns FAILED_* bits for those that aren't acceptable.
*
@@ -4293,6 +4708,218 @@ static struct volume_group *_recover_vg(struct cmd_context *cmd,
return (struct volume_group *)vg;
}
+static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id)
+{
+ const struct dm_config_node *cn;
+ const struct dm_config_value *cv;
+ const char *str;
+
+ if (!(cn = find_config_tree_array(cmd, local_extra_system_ids_CFG, NULL)))
+ return 0;
+
+ for (cv = cn->v; cv; cv = cv->next) {
+ if (cv->type == DM_CFG_EMPTY_ARRAY)
+ break;
+ /* Ignore invalid data: Warning message already issued by config.c */
+ if (cv->type != DM_CFG_STRING)
+ continue;
+ str = cv->v.str;
+ if (!*str)
+ continue;
+
+ if (!strcmp(str, system_id))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int _access_vg_clustered(struct cmd_context *cmd, struct volume_group *vg)
+{
+ if (vg_is_clustered(vg) && !locking_is_clustered()) {
+ if (!cmd->ignore_clustered_vgs)
+ log_error("Skipping clustered volume group %s", vg->name);
+ else
+ log_verbose("Skipping clustered volume group %s", vg->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg,
+ uint32_t lockd_state, uint32_t *failure)
+{
+ if (!is_real_vg(vg->name))
+ return 1;
+
+ if (cmd->lockd_vg_disable)
+ return 1;
+
+ /*
+ * Local VG requires no lock from lvmlockd.
+ */
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+
+ /*
+ * When lvmlockd is not used, lockd VGs are ignored by lvm
+ * and cannot be used, with two exceptions:
+ *
+ * . The --shared option allows them to be revealed with
+ * reporting/display commands.
+ *
+ * . If a command asks to operate on one specifically
+ * by name, then an error is printed.
+ */
+ if (!lvmlockd_use()) {
+ /*
+ * Some reporting/display commands have the --shared option
+ * (like --foreign) to allow them to reveal lockd VGs that
+ * are otherwise ignored. The --shared option must only be
+ * permitted in commands that read the VG for report or display,
+ * not any that write the VG or activate LVs.
+ */
+ if (cmd->include_shared_vgs)
+ return 1;
+
+ /*
+ * Some commands want the error printed by vg_read, others by ignore_vg.
+ * Those using ignore_vg may choose to skip the error.
+ */
+ if (cmd->vg_read_print_access_error) {
+ log_error("Cannot access VG %s with lock type %s that requires lvmlockd.",
+ vg->name, vg->lock_type);
+ }
+
+ *failure |= FAILED_LOCK_TYPE;
+ return 0;
+ }
+
+ /*
+ * The lock request from lvmlockd failed. If the lock was ex,
+ * we cannot continue. If the lock was sh, we could also fail
+ * to continue but since the lock was sh, it means the VG is
+ * only being read, and it doesn't hurt to allow reading with
+ * no lock.
+ */
+ if (lockd_state & LDST_FAIL) {
+ if ((lockd_state & LDST_EX) || cmd->lockd_vg_enforce_sh) {
+ log_error("Cannot access VG %s due to failed lock.", vg->name);
+ *failure |= FAILED_LOCK_MODE;
+ return 0;
+ } else {
+ log_warn("Reading VG %s without a lock.", vg->name);
+ return 1;
+ }
+ }
+
+ return 1;
+}
+
+static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg)
+{
+ /*
+ * LVM1 VGs must not be accessed if a new-style LVM2 system ID is set.
+ */
+ if (cmd->system_id && systemid_on_pvs(vg)) {
+ log_error("Cannot access VG %s with LVM1 system ID %s when host system ID is set.",
+ vg->name, vg->lvm1_system_id);
+ return 0;
+ }
+
+ /*
+ * A VG without a system_id can be accessed by anyone.
+ */
+ if (!vg->system_id || !vg->system_id[0])
+ return 1;
+
+ /*
+ * A few commands allow read-only access to foreign VGs.
+ */
+ if (cmd->include_foreign_vgs)
+ return 1;
+
+ /*
+ * A host can access a VG with a matching system_id.
+ */
+ if (cmd->system_id && !strcmp(vg->system_id, cmd->system_id))
+ return 1;
+
+ /*
+ * A host can access a VG if the VG's system_id is in extra_system_ids list.
+ */
+ if (cmd->system_id && _allow_extra_system_id(cmd, vg->system_id))
+ return 1;
+
+ /*
+ * Allow VG access if the local host has active LVs in it.
+ */
+ if (lvs_in_vg_activated(vg)) {
+ log_warn("WARNING: Found LVs active in VG %s with foreign system ID %s. Possible data corruption.",
+ vg->name, vg->system_id);
+ if (cmd->include_active_foreign_vgs)
+ return 1;
+ return 0;
+ }
+
+ /*
+ * A host without a system_id cannot access a VG with a system_id.
+ */
+ if (!cmd->system_id || cmd->unknown_system_id) {
+ log_error("Cannot access VG %s with system ID %s with unknown local system ID.",
+ vg->name, vg->system_id);
+ return 0;
+ }
+
+ /*
+ * Some commands want the error printed by vg_read, others by ignore_vg.
+ * Those using ignore_vg may choose to skip the error.
+ */
+ if (cmd->vg_read_print_access_error) {
+ log_error("Cannot access VG %s with system ID %s with local system ID %s.",
+ vg->name, vg->system_id, cmd->system_id);
+ return 0;
+ }
+
+ /* Silently ignore foreign vgs. */
+
+ return 0;
+}
+
+/*
+ * FIXME: move _vg_bad_status_bits() checks in here.
+ */
+static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg,
+ uint32_t lockd_state, uint32_t *failure)
+{
+ if (!is_real_vg(vg->name)) {
+ /* Disallow use of LVM1 orphans when a host system ID is set. */
+ if (cmd->system_id && *cmd->system_id && systemid_on_pvs(vg)) {
+ *failure |= FAILED_SYSTEMID;
+ return_0;
+ }
+ return 1;
+ }
+
+ if (!_access_vg_clustered(cmd, vg)) {
+ *failure |= FAILED_CLUSTERED;
+ return 0;
+ }
+
+ if (!_access_vg_lock_type(cmd, vg, lockd_state, failure)) {
+ /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */
+ return 0;
+ }
+
+ if (!_access_vg_systemid(cmd, vg)) {
+ *failure |= FAILED_SYSTEMID;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Consolidated locking, reading, and status flag checking.
*
@@ -4306,7 +4933,8 @@ static struct volume_group *_recover_vg(struct cmd_context *cmd,
*/
static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
const char *vgid, uint32_t lock_flags,
- uint64_t status_flags, uint32_t misc_flags)
+ uint64_t status_flags, uint32_t misc_flags,
+ uint32_t lockd_state)
{
struct volume_group *vg = NULL;
int consistent = 1;
@@ -4352,14 +4980,8 @@ static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const cha
goto bad;
}
- if (vg_is_clustered(vg) && !locking_is_clustered()) {
- if (!cmd->ignore_clustered_vgs)
- log_error("Skipping clustered volume group %s", vg->name);
- else
- log_verbose("Skipping clustered volume group %s", vg->name);
- failure |= FAILED_CLUSTERED;
+ if (!_vg_access_permitted(cmd, vg, lockd_state, &failure))
goto bad;
- }
/* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
if (!consistent && !failure) {
@@ -4434,7 +5056,7 @@ bad_no_unlock:
* *consistent = 1.
*/
struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
- const char *vgid, uint32_t flags)
+ const char *vgid, uint32_t flags, uint32_t lockd_state)
{
uint64_t status = UINT64_C(0);
uint32_t lock_flags = LCK_VG_READ;
@@ -4447,7 +5069,7 @@ struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
if (flags & READ_ALLOW_EXPORTED)
status &= ~EXPORTED_VG;
- return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
+ return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags, lockd_state);
}
/*
@@ -4456,9 +5078,9 @@ struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
* request the new metadata to be written and committed).
*/
struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
- const char *vgid, uint32_t flags)
+ const char *vgid, uint32_t flags, uint32_t lockd_state)
{
- return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
+ return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE, lockd_state);
}
/*
@@ -4890,3 +5512,21 @@ const struct logical_volume *lv_ondisk(const struct logical_volume *lv)
return lvl->lv;
}
+
+/*
+ * Check if a lock_type uses lvmlockd.
+ * If not (none, clvm), return 0.
+ * If so (dlm, sanlock), return 1.
+ */
+
+int is_lockd_type(const char *lock_type)
+{
+ if (!lock_type)
+ return 0;
+ if (!strcmp(lock_type, "dlm"))
+ return 1;
+ if (!strcmp(lock_type, "sanlock"))
+ return 1;
+ return 0;
+}
+
diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h
index 48c1b3c85..1a5306bde 100644
--- a/lib/metadata/metadata.h
+++ b/lib/metadata/metadata.h
@@ -72,6 +72,7 @@ struct dm_config_tree;
struct metadata_area;
struct alloc_handle;
struct lvmcache_info;
+struct cached_vg_fmtdata;
/* Per-format per-metadata area operations */
struct metadata_area_ops {
@@ -79,10 +80,14 @@ struct metadata_area_ops {
struct volume_group *(*vg_read) (struct format_instance * fi,
const char *vg_name,
struct metadata_area * mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg,
int single_device);
struct volume_group *(*vg_read_precommit) (struct format_instance * fi,
const char *vg_name,
- struct metadata_area * mda);
+ struct metadata_area * mda,
+ struct cached_vg_fmtdata **vg_fmtdata,
+ unsigned *use_previous_vg);
/*
* Write out complete VG metadata. You must ensure internal
* consistency before calling. eg. PEs can't refer to PVs not
@@ -430,9 +435,13 @@ int lv_split_segment(struct logical_volume *lv, uint32_t le);
int add_seg_to_segs_using_this_lv(struct logical_volume *lv, struct lv_segment *seg);
int remove_seg_from_segs_using_this_lv(struct logical_volume *lv, struct lv_segment *seg);
+int for_each_sub_lv_except_pools(struct logical_volume *lv,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data);
int for_each_sub_lv(struct logical_volume *lv,
- int (*fn)(struct logical_volume *lv, void *data),
- void *data);
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data);
+
int move_lv_segments(struct logical_volume *lv_to,
struct logical_volume *lv_from,
uint64_t set_status, uint64_t reset_status);
@@ -451,6 +460,8 @@ struct volume_group *import_vg_from_buffer(const char *buf,
struct format_instance *fid);
struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
struct format_instance *fid);
+struct volume_group *import_vg_from_lvmetad_config_tree(const struct dm_config_tree *cft,
+ struct format_instance *fid);
/*
* Mirroring functions
@@ -474,6 +485,7 @@ int lv_is_merging_thin_snapshot(const struct logical_volume *lv);
int pool_has_message(const struct lv_segment *seg,
const struct logical_volume *lv, uint32_t device_id);
int pool_below_threshold(const struct lv_segment *pool_seg);
+int pool_check_overprovisioning(const struct logical_volume *lv);
int create_pool(struct logical_volume *lv, const struct segment_type *segtype,
struct alloc_handle *ah, uint32_t stripes, uint32_t stripe_size);
diff --git a/lib/metadata/mirror.c b/lib/metadata/mirror.c
index b4854b06e..c517114b6 100644
--- a/lib/metadata/mirror.c
+++ b/lib/metadata/mirror.c
@@ -78,19 +78,18 @@ struct logical_volume *find_temporary_mirror(const struct logical_volume *lv)
*
* Returns: 1 if available, 0 otherwise
*/
-static int _cluster_mirror_is_available(struct logical_volume *lv)
+int cluster_mirror_is_available(struct cmd_context *cmd)
{
unsigned attr = 0;
- struct cmd_context *cmd = lv->vg->cmd;
const struct segment_type *segtype;
- if (!(segtype = get_segtype_from_string(cmd, "mirror")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_MIRROR)))
return_0;
if (!segtype->ops->target_present)
return_0;
- if (!segtype->ops->target_present(lv->vg->cmd, NULL, &attr))
+ if (!segtype->ops->target_present(cmd, NULL, &attr))
return_0;
if (!(attr & MIRROR_LOG_CLUSTERED))
@@ -112,9 +111,8 @@ uint32_t lv_mirror_count(const struct logical_volume *lv)
seg = first_seg(lv);
- /* FIXME: RAID10 only supports 2 copies right now */
- if (seg_is_raid10(seg))
- return 2;
+ if (seg_is_any_raid10(seg))
+ return seg->data_copies;
if (lv_is_pvmove(lv))
return seg->area_count;
@@ -160,11 +158,12 @@ struct lv_segment *find_mirror_seg(struct lv_segment *seg)
* For internal use only log only in verbose mode
*/
uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
- uint32_t region_size, int internal)
+ uint32_t region_size, int internal, int clustered)
{
uint64_t region_max;
+ uint64_t region_min, region_min_pow2;
- region_max = (1 << (ffs((int)extents) - 1)) * (uint64_t) (1 << (ffs((int)extent_size) - 1));
+ region_max = (UINT64_C(1) << (ffs((int)extents) - 1)) * (UINT64_C(1) << (ffs((int)extent_size) - 1));
if (region_max < UINT32_MAX && region_size > region_max) {
region_size = (uint32_t) region_max;
@@ -176,6 +175,42 @@ uint32_t adjusted_mirror_region_size(uint32_t extent_size, uint32_t extents,
PRIu32 " sectors.", region_size);
}
+#ifdef CMIRROR_REGION_COUNT_LIMIT
+ if (clustered) {
+ /*
+ * The CPG code used by cluster mirrors can only handle a
+ * payload of < 1MB currently. (This deficiency is tracked by
+ * http://bugzilla.redhat.com/682771.) The region size for cluster
+ * mirrors must be restricted in such a way as to limit the
+ * size of the bitmap to < 512kB, because there are two bitmaps
+ * which get sent around during checkpointing while a cluster
+ * mirror starts up. Ergo, the number of regions must not
+ * exceed 512k * 8. We also need some room for the other
+ * checkpointing structures as well, so we reduce by another
+ * factor of two.
+ *
+ * This code should be removed when the CPG restriction is
+ * lifted.
+ */
+ region_min = (uint64_t) extents * extent_size / CMIRROR_REGION_COUNT_LIMIT;
+ region_min_pow2 = 1;
+ while (region_min_pow2 < region_min)
+ region_min_pow2 *= 2;
+
+ if (region_size < region_min_pow2) {
+ if (internal)
+ log_print_unless_silent("Increasing mirror region size from %"
+ PRIu32 " to %" PRIu64 " sectors.",
+ region_size, region_min_pow2);
+ else
+ log_verbose("Increasing mirror region size from %"
+ PRIu32 " to %" PRIu64 " sectors.",
+ region_size, region_min_pow2);
+ region_size = region_min_pow2;
+ }
+ }
+#endif /* CMIRROR_REGION_COUNT_LIMIT */
+
return region_size;
}
@@ -332,7 +367,11 @@ static int _init_mirror_log(struct cmd_context *cmd,
backup(log_lv->vg);
/* Wait for events following any deactivation before reactivating */
- sync_local_dev_names(cmd);
+ if (!sync_local_dev_names(cmd)) {
+ log_error("Aborting. Failed to sync local devices before initialising mirror log %s.",
+ display_lvname(log_lv));
+ goto revert_new_lv;
+ }
if (!activate_lv(cmd, log_lv)) {
log_error("Aborting. Failed to activate mirror log.");
@@ -421,7 +460,8 @@ static int _activate_lv_like_model(struct logical_volume *model,
/*
* Delete independent/orphan LV, it must acquire lock.
*/
-static int _delete_lv(struct logical_volume *mirror_lv, struct logical_volume *lv)
+static int _delete_lv(struct logical_volume *mirror_lv, struct logical_volume *lv,
+ int reactivate)
{
struct cmd_context *cmd = mirror_lv->vg->cmd;
struct dm_str_list *sl;
@@ -441,15 +481,21 @@ static int _delete_lv(struct logical_volume *mirror_lv, struct logical_volume *l
}
}
- /* FIXME: the 'model' should be 'mirror_lv' not 'lv', I think. */
- if (!_activate_lv_like_model(lv, lv))
- return_0;
+ if (reactivate) {
+ /* FIXME: the 'model' should be 'mirror_lv' not 'lv', I think. */
+ if (!_activate_lv_like_model(lv, lv))
+ return_0;
- /* FIXME Is this superfluous now? */
- sync_local_dev_names(cmd);
+ /* FIXME Is this superfluous now? */
+ if (!sync_local_dev_names(cmd)) {
+ log_error("Failed to sync local devices when reactivating %s.",
+ display_lvname(lv));
+ return 0;
+ }
- if (!deactivate_lv(cmd, lv))
- return_0;
+ if (!deactivate_lv(cmd, lv))
+ return_0;
+ }
if (!lv_remove(lv))
return_0;
@@ -800,11 +846,11 @@ static int _split_mirror_images(struct logical_volume *lv,
}
/* Remove original mirror layer if it has been converted to linear */
- if (sub_lv && !_delete_lv(lv, sub_lv))
+ if (sub_lv && !_delete_lv(lv, sub_lv, 1))
return_0;
/* Remove the log if it has been converted to linear */
- if (detached_log_lv && !_delete_lv(lv, detached_log_lv))
+ if (detached_log_lv && !_delete_lv(lv, detached_log_lv, 1))
return_0;
return 1;
@@ -853,6 +899,7 @@ static int _remove_mirror_images(struct logical_volume *lv,
struct lv_list *lvl;
struct dm_list tmp_orphan_lvs;
uint32_t orig_removed = num_removed;
+ int reactivate;
if (removed)
*removed = 0;
@@ -865,6 +912,7 @@ static int _remove_mirror_images(struct logical_volume *lv,
if (collapse && (old_area_count - num_removed != 1)) {
log_error("Incompatible parameters to _remove_mirror_images");
return 0;
+
}
num_removed = 0;
@@ -1094,16 +1142,17 @@ static int _remove_mirror_images(struct logical_volume *lv,
}
/* Save or delete the 'orphan' LVs */
+ reactivate = lv_is_active(lv_lock_holder(lv));
if (!collapse) {
dm_list_iterate_items(lvl, &tmp_orphan_lvs)
- if (!_delete_lv(lv, lvl->lv))
+ if (!_delete_lv(lv, lvl->lv, reactivate))
return_0;
}
- if (temp_layer_lv && !_delete_lv(lv, temp_layer_lv))
+ if (temp_layer_lv && !_delete_lv(lv, temp_layer_lv, reactivate))
return_0;
- if (detached_log_lv && !_delete_lv(lv, detached_log_lv))
+ if (detached_log_lv && !_delete_lv(lv, detached_log_lv, reactivate))
return_0;
/* Mirror with only 1 area is 'in sync'. */
@@ -1121,7 +1170,7 @@ static int _remove_mirror_images(struct logical_volume *lv,
if (removed)
*removed = old_area_count - new_area_count;
- log_very_verbose("%" PRIu32 " image(s) removed from %s",
+ log_very_verbose(FMTu32 " image(s) removed from %s",
old_area_count - new_area_count, lv->name);
return 1;
@@ -1442,7 +1491,7 @@ static int _create_mimage_lvs(struct alloc_handle *ah,
return 0;
}
} else {
- if (!lv_add_segment(ah, m * stripes, stripes, img_lvs[m],
+ if (!lv_add_segment(ah, m * stripes, stripes, 1, img_lvs[m],
get_segtype_from_string(lv->vg->cmd,
"striped"),
stripe_size, 0, 0)) {
@@ -1670,7 +1719,7 @@ int fixup_imported_mirrors(struct volume_group *vg)
dm_list_iterate_items(lvl, &vg->lvs) {
dm_list_iterate_items(seg, &lvl->lv->segments) {
if (seg->segtype !=
- get_segtype_from_string(vg->cmd, "mirror"))
+ get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_MIRROR))
continue;
if (seg->log_lv && !add_seg_to_segs_using_this_lv(seg->log_lv, seg))
@@ -1698,15 +1747,15 @@ static int _add_mirrors_that_preserve_segments(struct logical_volume *lv,
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 1, 0)))
return_0;
- if (!(segtype = get_segtype_from_string(cmd, "mirror")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_MIRROR)))
return_0;
adjusted_region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count,
- region_size, 1);
+ region_size, 1,
+ vg_is_clustered(lv->vg));
- if (!(ah = allocate_extents(lv->vg, NULL, segtype,
- 1, mirrors, 0, 0,
+ if (!(ah = allocate_extents(lv->vg, NULL, segtype, 1, mirrors, 0, 0,
lv->le_count, allocatable_pvs, alloc, 0,
parallel_areas))) {
log_error("Unable to allocate mirror extents for %s.", lv->name);
@@ -1862,7 +1911,7 @@ static int _form_mirror(struct cmd_context *cmd, struct alloc_handle *ah,
}
if (!_create_mimage_lvs(ah, mirrors, stripes, stripe_size, lv, img_lvs, log))
- return 0;
+ return_0;
if (!lv_add_mirror_lvs(lv, img_lvs, mirrors,
MIRROR_IMAGE | (lv->status & LOCKED),
@@ -1931,6 +1980,9 @@ static struct logical_volume *_set_up_mirror_log(struct cmd_context *cmd,
return NULL;
}
+
+ first_seg(log_lv)->data_copies = log_count;
+
return log_lv;
}
@@ -1983,7 +2035,7 @@ int add_mirror_log(struct cmd_context *cmd, struct logical_volume *lv,
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 0)))
return_0;
- if (!(segtype = get_segtype_from_string(cmd, "mirror")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_MIRROR)))
return_0;
if (activation() && segtype->ops->target_present &&
@@ -2056,7 +2108,7 @@ int add_mirror_images(struct cmd_context *cmd, struct logical_volume *lv,
if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 0)))
return_0;
- if (!(segtype = get_segtype_from_string(cmd, "mirror")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_MIRROR)))
return_0;
ah = allocate_extents(lv->vg, NULL, segtype,
@@ -2128,7 +2180,7 @@ int lv_add_mirrors(struct cmd_context *cmd, struct logical_volume *lv,
if (!lv_is_pvmove(lv) && !lv_is_locked(lv) &&
lv_is_active(lv) &&
!lv_is_active_exclusive_locally(lv) && /* lv_is_active_remotely */
- !_cluster_mirror_is_available(lv)) {
+ !cluster_mirror_is_available(lv->vg->cmd)) {
log_error("Shared cluster mirrors are not available.");
return 0;
}
diff --git a/lib/metadata/pool_manip.c b/lib/metadata/pool_manip.c
index 95b5867a3..754e0b492 100644
--- a/lib/metadata/pool_manip.c
+++ b/lib/metadata/pool_manip.c
@@ -449,7 +449,7 @@ int create_pool(struct logical_volume *pool_lv,
}
/* Metadata segment */
- if (!lv_add_segment(ah, stripes, 1, pool_lv, striped, 1, 0, 0))
+ if (!lv_add_segment(ah, stripes, 1, 1, pool_lv, striped, 1, 0, 0))
return_0;
if (!activation())
@@ -501,7 +501,7 @@ int create_pool(struct logical_volume *pool_lv,
goto_bad;
/* Pool data segment */
- if (!lv_add_segment(ah, 0, stripes, pool_lv, striped, stripe_size, 0, 0))
+ if (!lv_add_segment(ah, 0, stripes, 1, pool_lv, striped, stripe_size, 0, 0))
goto_bad;
if (!(data_lv = insert_layer_for_lv(pool_lv->vg->cmd, pool_lv,
diff --git a/lib/metadata/pv_manip.c b/lib/metadata/pv_manip.c
index 6499e361d..ce7f66140 100644
--- a/lib/metadata/pv_manip.c
+++ b/lib/metadata/pv_manip.c
@@ -24,6 +24,7 @@
#include "display.h"
#include "label.h"
#include "archiver.h"
+#include "lvm-signal.h"
static struct pv_segment *_alloc_pv_segment(struct dm_pool *mem,
struct physical_volume *pv,
@@ -694,12 +695,11 @@ const char _really_wipe[] =
* 0 indicates we may not.
*/
static int pvremove_check(struct cmd_context *cmd, const char *name,
- unsigned force_count, unsigned prompt)
+ unsigned force_count, unsigned prompt, struct dm_list *pvslist)
{
struct device *dev;
struct label *label;
struct pv_list *pvl;
- struct dm_list *pvslist;
struct physical_volume *pv = NULL;
int r = 0;
@@ -720,10 +720,6 @@ static int pvremove_check(struct cmd_context *cmd, const char *name,
return 0;
}
- lvmcache_seed_infos_from_lvmetad(cmd);
- if (!(pvslist = get_pvs(cmd)))
- return_0;
-
dm_list_iterate_items(pvl, pvslist)
if (pvl->pv->dev == dev)
pv = pvl->pv;
@@ -765,26 +761,18 @@ static int pvremove_check(struct cmd_context *cmd, const char *name,
r = 1;
out:
- if (pvslist)
- dm_list_iterate_items(pvl, pvslist)
- free_pv_fid(pvl->pv);
return r;
}
int pvremove_single(struct cmd_context *cmd, const char *pv_name,
void *handle __attribute__((unused)), unsigned force_count,
- unsigned prompt)
+ unsigned prompt, struct dm_list *pvslist)
{
struct device *dev;
struct lvmcache_info *info;
int r = 0;
- if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
- log_error("Can't get lock for orphan PVs");
- return 0;
- }
-
- if (!pvremove_check(cmd, pv_name, force_count, prompt))
+ if (!pvremove_check(cmd, pv_name, force_count, prompt, pvslist))
goto out;
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
@@ -820,9 +808,48 @@ int pvremove_single(struct cmd_context *cmd, const char *pv_name,
r = 1;
out:
+ return r;
+}
+
+int pvremove_many(struct cmd_context *cmd, struct dm_list *pv_names,
+ unsigned force_count, unsigned prompt)
+{
+ int ret = 1;
+ struct dm_list *pvslist = NULL;
+ struct pv_list *pvl;
+ const struct dm_str_list *pv_name;
+
+ if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
+ log_error("Can't get lock for orphan PVs");
+ return 0;
+ }
+
+ lvmcache_seed_infos_from_lvmetad(cmd);
+
+ if (!(pvslist = get_pvs(cmd))) {
+ ret = 0;
+ goto_out;
+ }
+
+ dm_list_iterate_items(pv_name, pv_names) {
+ if (!pvremove_single(cmd, pv_name->str, NULL, force_count, prompt, pvslist)) {
+ stack;
+ ret = 0;
+ }
+ if (sigint_caught()) {
+ ret = 0;
+ goto_out;
+ }
+ }
+
+out:
unlock_vg(cmd, VG_ORPHANS);
- return r;
+ if (pvslist)
+ dm_list_iterate_items(pvl, pvslist)
+ free_pv_fid(pvl->pv);
+
+ return ret;
}
int pvcreate_single(struct cmd_context *cmd, const char *pv_name,
diff --git a/lib/metadata/raid_manip.c b/lib/metadata/raid_manip.c
index 37987281d..563064280 100644
--- a/lib/metadata/raid_manip.c
+++ b/lib/metadata/raid_manip.c
@@ -3,6 +3,7 @@
*
* This file is part of LVM2.
*
+ *
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
@@ -10,6 +11,29 @@
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * All raid conversion business in here:
+ *
+ * - takeover, i.e. raid level change (e.g. striped <-> raid6)
+ *
+ * - reshaping, i.e. raid algorithm change (e.g. raid5_ls -> raid5_ra)
+ *
+ * - duplication (e.g. start with a linear LV and add another,
+ * say raid10_offset in a top-level raid1 stack;
+ * N sub LVs possible)
+ *
+ * - unduplication, i.e. tear down duplicated LV raid1 stack
+ * keeping any of the duplicated sub LVs
+ *
+ * - leg images replacement
+ *
+ * - leg images repair
+ *
+ * - raid1 splitting, tracking and merging
+ *
+ *
+ * In general, int functions in this module return 1 on success and 0 on failure.
*/
#include "lib.h"
@@ -23,34 +47,131 @@
#include "lvm-string.h"
#include "lvm-signal.h"
-#if 0
-#include "dump.h"
-#endif
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(*a))
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
+/*
+ * Macros to check actual function arguments are being
+ * provided, provided correctly and display internal error
+ * with @msg on if not
+ */
+/* True if @arg, else false and display @msg */
+#define RETURN_IF_NONZERO(arg, msg) \
+{ \
+ if ((arg)) { \
+ log_error(INTERNAL_ERROR "%s[%u] -- no %s!", __func__, __LINE__, (msg)); \
+ return 0; \
+ } \
+}
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(*a))
+/* True if !@arg, else false and display @msg */
+#define RETURN_IF_ZERO(arg, msg) \
+ RETURN_IF_NONZERO(!(arg), (msg))
+
+/* True if !@arg, display @msg and goto err label */
+#define ERR_IF_ZERO(arg, msg) \
+{ \
+ if (!(arg)) { \
+ log_error(INTERNAL_ERROR "%s[%u] -- no %s!", __func__, __LINE__, (msg)); \
+ goto err; \
+ } \
+}
+
+/* Macro to check argument @lv exists */
+#define RETURN_IF_LV_ZERO(lv) \
+ RETURN_IF_ZERO((lv), "lv argument");
+
+/* Macro to check argument @seg exists */
+#define RETURN_IF_SEG_ZERO(seg) \
+ RETURN_IF_ZERO((seg), "lv segment argument");
+
+/* False if (@seg)->area_count, else true and display @msg */
+#define RETURN_IF_SEG_AREA_COUNT_FALSE(seg, s) \
+ RETURN_IF_ZERO((seg)->area_count, "segment areas"); \
+ RETURN_IF_ZERO((s) < seg->area_count, "valid segment area index")
+
+/* Macro to check argument @segtype exists */
+#define RETURN_IF_SEGTYPE_ZERO(segtype) \
+ RETURN_IF_ZERO((segtype), "lv segment type argument");
+
+/* Macro to check @lv and it's first segment @seg exist */
+#define RETURN_IF_LV_SEG_ZERO(lv, seg) \
+ RETURN_IF_LV_ZERO((lv)); \
+ RETURN_IF_SEG_ZERO((seg))
+
+/* Macro to check @lv and the segment type @segtype exist */
+#define RETURN_IF_LV_SEGTYPE_ZERO(lv, segtype) \
+ RETURN_IF_LV_ZERO((lv)); \
+ RETURN_IF_SEGTYPE_ZERO((segtype))
+
+/* Macro to check @lv, it's first segment @seg and the segment type @segtype exist */
+#define RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, seg, segtype) \
+ RETURN_IF_LV_SEG_ZERO((lv), (seg)); \
+ RETURN_IF_SEGTYPE_ZERO((segtype))
+
+/* HM Helper: conditionally return seg_metalv(@seg, @s) to prevent oops */
+static struct logical_volume *_seg_metalv_checked(struct lv_segment *seg, uint32_t s)
+{
+ RETURN_IF_SEG_ZERO(seg);
+
+ return (seg->meta_areas && seg_metatype(seg, s) == AREA_LV) ? seg_metalv(seg, s) : NULL;
+}
/* Ensure minimum region size on @lv */
-static void _ensure_min_region_size(struct logical_volume *lv)
+static int _ensure_min_region_size(const struct logical_volume *lv)
{
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+ uint32_t min_region_size, region_size;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(lv->size, "LV size set");
+ RETURN_IF_ZERO(seg->region_size, "segment region size set");
+
/* MD's bitmap is limited to tracking 2^21 regions */
- uint32_t min_region_size = lv->size / (1 << 21);
- uint32_t region_size = seg->region_size;
+ min_region_size = lv->size / (1 << 21);
+ region_size = seg->region_size;
- while (seg->region_size < min_region_size)
- seg->region_size *= 2;
+ while (region_size < min_region_size)
+ region_size *= 2;
- if (seg->region_size != region_size)
+ if (seg->region_size != region_size) {
log_very_verbose("Setting region_size to %u", seg->region_size);
+ seg->region_size = region_size;
+ }
+
+ return 1;
+}
+
+/* HM Helper: return any alias for segment type of @seg */
+static const char *_get_segtype_alias(const struct segment_type *segtype)
+{
+ const char *r = "\0";
+
+ RETURN_IF_SEG_ZERO(segtype);
+
+ if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID5))
+ r = SEG_TYPE_NAME_RAID5_LS;
+ else if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID6))
+ r = SEG_TYPE_NAME_RAID6_ZR;
+ else if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID10))
+ r = SEG_TYPE_NAME_RAID10_NEAR;
+ else if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID5_LS))
+ r = SEG_TYPE_NAME_RAID5;
+ else if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID6_ZR))
+ r = SEG_TYPE_NAME_RAID6;
+ else if (!strcmp(segtype->name, SEG_TYPE_NAME_RAID10_NEAR))
+ r = SEG_TYPE_NAME_RAID10;
+
+ return r;
+}
+
+/* HM Return "linear" for striped @segtype with 1 area instead of "striped" */
+static const char *_get_segtype_name(const struct segment_type *segtype, unsigned new_image_count)
+{
+ if (!segtype ||
+ (segtype_is_striped(segtype) && new_image_count == 1))
+ return "linear";
+
+ return segtype->name;
}
/*
@@ -58,34 +179,112 @@ static void _ensure_min_region_size(struct logical_volume *lv)
*
* Default region_size on @lv unless already set i
*/
-static void _check_and_init_region_size(struct logical_volume *lv)
+static int _check_and_init_region_size(const struct logical_volume *lv)
{
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
seg->region_size = seg->region_size ?: get_default_region_size(lv->vg->cmd);
- _ensure_min_region_size(lv);
+
+ return _ensure_min_region_size(lv);
}
/* Return data images count for @total_rimages depending on @seg's type */
-static uint32_t _data_rimages_count(struct lv_segment *seg, uint32_t total_rimages)
+static uint32_t _data_rimages_count(const struct lv_segment *seg, const uint32_t total_rimages)
{
+ RETURN_IF_SEG_ZERO(seg);
+ RETURN_IF_NONZERO(!seg_is_thin(seg) && total_rimages <= seg->segtype->parity_devs,
+ "total rimages count > parity devices");
+
return total_rimages - seg->segtype->parity_devs;
}
+/* HM Helper: return sub LV in @lv by @name and index in areas array in @idx */
+static struct logical_volume *_find_lv_in_sub_lvs(struct logical_volume *lv,
+ const char *name, uint32_t *idx)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(idx, "idx argument pointer");
+
+ for (s = 0; s < seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV, "sub lv");
+
+ if (!strcmp(name, seg_lv(seg, s)->name)) {
+ *idx = s;
+ return seg_lv(seg, s);
+ }
+ }
+
+ return NULL;
+}
+
+/* HM helper: allow allocaion on list of @pvs */
+static void _pvs_allow_allocation(struct dm_list *pvs)
+{
+ struct pv_list *pvl;
+
+ if (pvs)
+ dm_list_iterate_items(pvl, pvs)
+ pvl->pv->status &= ~PV_ALLOCATION_PROHIBITED;
+}
+
+/* HM helper: return first hit strstr() of @str for string in @... */
+static char *_strstr_strings(const char *str, ...)
+{
+ char *substr, *r = NULL;
+ va_list ap;
+
+ RETURN_IF_ZERO(str, "string argument");
+
+ va_start(ap, str);
+ while ((substr = va_arg(ap, char *)))
+ if ((r = strstr(str, substr)))
+ break;
+ va_end(ap);
+
+ return r;
+}
+
+/* HM helper: return top-level lv name for given image @lv */
+static char *_top_level_lv_name(struct logical_volume *lv)
+{
+ char *p, *r;
+
+ RETURN_IF_LV_ZERO(lv);
+ RETURN_IF_ZERO((r = dm_pool_strdup(lv->vg->vgmem, lv->name)),
+ "lv name allocation possible");
+
+ if ((p = _strstr_strings(r, "_rimage_", "_dup_", "_extracted", NULL)))
+ *p = '\0'; /* LV name returned is now that of top-level RAID */
+
+ return r;
+}
+
/*
- * HM
+ * HM helper:
*
* Compare the raid levels in segtype @t1 and @t2
*
- * Return 0 if same, else != 0
+ * Return 1 if same, else 0
*/
static int _cmp_level(const struct segment_type *t1, const struct segment_type *t2)
{
- return strncmp(t1->name, t2->name, 5);
+ RETURN_IF_ZERO(t1, "first segment");
+ RETURN_IF_ZERO(t2, "second segment");
+
+ if ((segtype_is_any_raid10(t1) && !segtype_is_any_raid10(t2)) ||
+ (!segtype_is_any_raid10(t1) && segtype_is_any_raid10(t2)))
+ return 0;
+
+ return !strncmp(t1->name, t2->name, 5);
}
/*
- * HM
+ * HM Helper:
*
* Check for same raid levels in segtype @t1 and @t2
*
@@ -93,85 +292,149 @@ static int _cmp_level(const struct segment_type *t1, const struct segment_type *
*/
static int is_same_level(const struct segment_type *t1, const struct segment_type *t2)
{
-#if 0
- static uint64_t level_flags[] = {
- SEG_RAID0|SEG_RAID0_META,
- SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
- SEG_RAID6_ZR|SEG_RAID6_NC|SEG_RAID6_NR| \
- SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6|SEG_RAID6_N_6
- };
- unsigned i = ARRAY_SIZE(level_flags);
+ return _cmp_level(t1, t2);
+}
- if (t1->flag & t2->flag)
- return 1;
+static int _lv_is_raid_with_tracking(const struct logical_volume *lv, uint32_t *ss)
+{
+ uint32_t s;
+ const struct lv_segment *seg;
- while (i--)
- if ((t1->flag & level_flags[i]) && (t2->flag & level_flags[i]))
- return 1;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (lv_is_raid(lv))
+ for (s = 0; s < seg->area_count; s++)
+ if (lv_is_visible(seg_lv(seg, s))) {
+ if (!(seg_lv(seg, s)->status & LVM_WRITE)) {
+ if (ss)
+ *ss = s;
+
+ return 1;
+ }
+
+ RETURN_IF_ZERO(0, "read-only tracking LV!");
+ }
return 0;
-#else
- return !_cmp_level(t1, t2);
-#endif
}
-/*
- * HM
- *
- * Check for raid level by segtype going up from @t1 to @t2
- *
- * Return 1 if same, else != 1
- */
-static int is_level_up(const struct segment_type *t1, const struct segment_type *t2)
+/* API function to check for @lv to be a tracking one */
+int lv_is_raid_with_tracking(const struct logical_volume *lv)
{
- if (segtype_is_raid(t1) && segtype_is_striped(t2))
- return 0;
-
- if (segtype_is_striped(t1) && segtype_is_raid(t2))
- return 1;
-
- return _cmp_level(t1, t2) < 0;
+ return _lv_is_raid_with_tracking(lv, NULL);
}
-static int _lv_is_raid_with_tracking(const struct logical_volume *lv,
- struct logical_volume **tracking)
+/* HM Helper: return true in case this is a raid1 top-level LV inserted to do synchronization of 2 given sub LVs */
+static int _lv_is_duplicating(const struct logical_volume *lv)
{
uint32_t s;
- const struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
- *tracking = NULL;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
- if (!lv_is_raid(lv))
+ /* Needs to be raid1 with >= 2 legs and the legs must have the proper names suffix */
+ if (!seg_is_raid1(seg))
return 0;
+ /* Sub LVs must be present and duplicated ones and "image" infix may not be present in any of their names */
for (s = 0; s < seg->area_count; s++)
- if (lv_is_visible(seg_lv(seg, s)) &&
- !(seg_lv(seg, s)->status & LVM_WRITE)) {
- *tracking = seg_lv(seg, s);
- return 1;
+ if (seg_type(seg, s) != AREA_LV ||
+ !lv_is_duplicated(seg_lv(seg, s)) ||
+ strstr(seg_lv(seg, s)->name, "image"))
+ return 0;
+
+ return 1;
+}
+
+/* HM Helper: check if @lv is active and display cluster/local message if not */
+static int _lv_is_active(struct logical_volume *lv)
+{
+ RETURN_IF_LV_ZERO(lv);
+
+ if (vg_is_clustered(lv->vg)) {
+ if (!lv_is_active_exclusive_locally(lv)) {
+ log_error("%s in clustered VG must be active exclusive "
+ "locally to perform this operation.",
+ display_lvname(lv));
+ return 0;
}
- return 0;
+ } else if (!lv_is_active(lv)) {
+ log_error("%s must be active to perform this operation.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
}
-int lv_is_raid_with_tracking(const struct logical_volume *lv)
+/* HM Helper: activate all lvs on list @lvs */
+static int _activate_lv_list_excl_local(struct dm_list *lvs)
{
- struct logical_volume *tracking;
+ struct lv_list *lvl;
- return _lv_is_raid_with_tracking(lv, &tracking);
+ /* lvs list may be empty here! */
+ RETURN_IF_ZERO(lvs, "lvs list argument");
+
+ dm_list_iterate_items(lvl, lvs)
+ if (!activate_lv_excl_local(lvl->lv->vg->cmd, lvl->lv))
+ return_0;
+ return 1;
}
+/* Helper: return image count of @lv depending on segment type */
uint32_t lv_raid_image_count(const struct logical_volume *lv)
{
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
return seg_is_raid(seg) ? seg->area_count : 1;
}
+/* Calculate raid rimage extents required based on total @extents for @segtype, @stripes and @data_copies */
+uint32_t raid_rimage_extents(const struct segment_type *segtype,
+ uint32_t extents, uint32_t stripes, uint32_t data_copies)
+{
+ uint64_t r;
+
+ RETURN_IF_ZERO(segtype, "segtype argument");
+
+ if (!extents ||
+ segtype_is_mirror(segtype) ||
+ segtype_is_raid1(segtype) ||
+ segtype_is_raid01(segtype))
+ return extents;
+
+ r = extents;
+ if (segtype_is_any_raid10(segtype))
+ r *= (data_copies ?: 1); /* Caller should ensure data_copies > 0 */
+
+ r = dm_div_up(r, (stripes ?: 1)); /* Caller should ensure stripes > 0 */
+
+PFLA("r=%llu", (unsigned long long) r);
+ return r > UINT_MAX ? 0 : (uint32_t) r;
+}
+
+/* Calculate total extents required to provide @extents to user based on @segtype, @stripes and @data_copies */
+uint32_t raid_total_extents(const struct segment_type *segtype,
+ uint32_t extents, uint32_t stripes, uint32_t data_copies)
+{
+ RETURN_IF_ZERO(segtype, "segtype argument");
+ RETURN_IF_ZERO(extents, "extents > 0");
+
+ return raid_rimage_extents(segtype, extents, stripes, data_copies) * stripes;
+}
+
+/* Activate @sub_lv preserving any exclusive local activation given by @top_lv */
static int _activate_sublv_preserving_excl(struct logical_volume *top_lv,
struct logical_volume *sub_lv)
{
- struct cmd_context *cmd = top_lv->vg->cmd;
+ struct cmd_context *cmd;
+
+ RETURN_IF_ZERO(top_lv, "top level LV");
+ RETURN_IF_ZERO(sub_lv, "sub LV");
+ cmd = top_lv->vg->cmd;
/* If top RAID was EX, use EX */
if (lv_is_active_exclusive_locally(top_lv)) {
@@ -183,8 +446,102 @@ static int _activate_sublv_preserving_excl(struct logical_volume *top_lv,
return 1;
}
+/* Return # of reshape LEs per device for @seg */
+static uint32_t _reshape_len_per_dev(struct lv_segment *seg)
+{
+ RETURN_IF_SEG_ZERO(seg);
+
+ return seg->reshape_len;
+}
+
+/* Return # of reshape LEs per @lv (sum of all sub LVs reshape LEs) */
+static uint32_t _reshape_len_per_lv(struct logical_volume *lv)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ return _reshape_len_per_dev(seg) * _data_rimages_count(seg, seg->area_count);
+}
+
/*
- * HM
+ * HM Helper:
+ *
+ * store the allocated reshape length per data image
+ * in the only segment of the top-level RAID @lv and
+ * in the first segment of each sub lv.
+ */
+static int _lv_set_reshape_len(struct logical_volume *lv, uint32_t reshape_len)
+{
+ uint32_t s;
+ struct lv_segment *seg, *data_seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(reshape_len < lv->le_count - 1, "proper reshape_len argument");
+
+ seg->reshape_len = reshape_len;
+
+ for (s = 0; s < seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV, "sub lv");
+
+ dm_list_iterate_items(data_seg, &seg_lv(seg, s)->segments) {
+ data_seg->reshape_len = reshape_len;
+ reshape_len = 0;
+ }
+ }
+
+ return 1;
+}
+
+/* HM Helper:
+ *
+ * correct segments logical start extents in all sub LVs of @lv
+ * after having reordered any segments in sub LVs e.g. because of
+ * reshape space (re)allocation.
+ */
+static int _lv_set_image_lvs_start_les(struct logical_volume *lv)
+{
+ uint32_t le, s;
+ struct lv_segment *data_seg, *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+
+ for (s = 0; s < seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV, "sub lv");
+
+ le = 0;
+ dm_list_iterate_items(data_seg, &(seg_lv(seg, s)->segments)) {
+ data_seg->reshape_len = le ? 0 : seg->reshape_len;
+ data_seg->le = le;
+ le += data_seg->len;
+ }
+ }
+
+ /* At least try merging segments _after_ adjusting start LEs */
+ return lv_merge_segments(lv);
+}
+
+/* HM Helper: put @lv on @removal_lvs resetting it's raid image state */
+static int _lv_reset_raid_add_to_list(struct logical_volume *lv, struct dm_list *removal_lvs)
+{
+ struct lv_list *lvl;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(removal_lvs, "removal LVs list argument");
+ RETURN_IF_ZERO((lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl))), "LV list item allocated")
+
+ lvl->lv = lv;
+ dm_list_add(removal_lvs, &lvl->list);
+ lv->status &= ~(RAID_IMAGE|RAID_META|LV_DUPLICATED);
+ lv_set_visible(lv);
+
+ return 1;
+}
+
+/*
+ * HM Helper:
*
* Deactivate and remove the LVs on @removal_lvs list from @vg
*
@@ -194,19 +551,108 @@ static int _deactivate_and_remove_lvs(struct volume_group *vg, struct dm_list *r
{
struct lv_list *lvl;
- dm_list_iterate_items(lvl, removal_lvs) {
-PFLA("lvl->lv->name=%s", lvl->lv->name);
- if (!deactivate_lv(vg->cmd, lvl->lv))
- return_0;
+ RETURN_IF_ZERO(vg, "VG");
+ RETURN_IF_ZERO(removal_lvs, "removal LVs list argument");
-PFLA("lvl->lv->name=%s", lvl->lv->name);
- if (!lv_remove(lvl->lv))
+ dm_list_iterate_items(lvl, removal_lvs)
+ if (!deactivate_lv(vg->cmd, lvl->lv) ||
+ !lv_remove(lvl->lv))
return_0;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * report health string in @*raid_health for @lv from kernel reporting # of devs in @*kernel_devs
+ */
+static int _get_dev_health(struct logical_volume *lv, uint32_t *kernel_devs,
+ uint32_t *devs_health, uint32_t *devs_in_sync,
+ char **raid_health)
+{
+ unsigned d;
+ char *rh;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(kernel_devs, "kernel_devs");
+ RETURN_IF_ZERO(devs_health, "devs_health");
+ RETURN_IF_ZERO(devs_in_sync, "devs_in_sync");
+
+ *devs_health = *devs_in_sync = 0;
+
+ if (!lv_raid_dev_count(lv, kernel_devs)) {
+ log_error("Failed to get device count");
+ return_0;
}
+ if (!lv_raid_dev_health(lv, &rh)) {
+ log_error("Failed to get device health");
+ return_0;
+ }
+
+ d = (unsigned) strlen(rh);
+ while (d--) {
+ (*devs_health)++;
+ if (rh[d] == 'A')
+ (*devs_in_sync)++;
+ }
+
+ if (raid_health)
+ *raid_health = rh;
+
return 1;
}
+/* HM Helper: return 1 in case raid device with @idx is alive and in sync */
+static int _dev_in_sync(struct logical_volume *lv, const uint32_t idx)
+{
+ uint32_t kernel_devs, devs_health, devs_in_sync;
+ char *raid_health;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, idx);
+
+ if (!seg_is_raid(seg))
+ return seg->area_count;
+
+ if (!_get_dev_health(lv, &kernel_devs, &devs_health, &devs_in_sync, &raid_health) ||
+ idx >= kernel_devs)
+ return 0;
+
+ return raid_health[idx] == 'A';
+}
+
+/* HM Helper: return number of devices in sync for (raid) @lv */
+static int _devs_in_sync_count(struct logical_volume *lv)
+{
+ uint32_t kernel_devs, devs_health, devs_in_sync;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (!seg_is_raid(seg))
+ return seg->area_count;
+
+ if (!_get_dev_health(lv, &kernel_devs, &devs_health, &devs_in_sync, NULL))
+ return 0;
+
+ return (int) devs_in_sync;
+}
+
+/* Return 1 if @lv is degraded, else 0 */
+static int _lv_is_degraded(struct logical_volume *lv)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ if (!(seg = first_seg(lv)))
+ return 0;
+
+ return _devs_in_sync_count(lv) < seg->area_count;
+}
+
/*
* _raid_in_sync
* @lv
@@ -217,60 +663,396 @@ PFLA("lvl->lv->name=%s", lvl->lv->name);
*
* Returns: 1 if in-sync, 0 otherwise.
*/
-static int _raid_in_sync(struct logical_volume *lv)
+static int _raid_in_sync(const struct logical_volume *lv)
{
dm_percent_t sync_percent;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
if (seg_is_striped(seg) || seg_is_any_raid0(seg))
return 1;
if (!lv_raid_percent(lv, &sync_percent)) {
- log_error("Unable to determine sync status of %s/%s.",
- lv->vg->name, lv->name);
+ log_error("Unable to determine sync status of %s.", display_lvname(lv));
return 0;
}
+PFLA("sync_percent=%d DM_PERCENT_100=%d", sync_percent, DM_PERCENT_100);
+ if (sync_percent == DM_PERCENT_0) {
+ /*
+ * FIXME We repeat the status read here to workaround an
+ * unresolved kernel bug when we see 0 even though the
+ * the array is 100% in sync.
+ * https://bugzilla.redhat.com/1210637
+ */
+ usleep(500000);
+ if (!lv_raid_percent(lv, &sync_percent)) {
+ log_error("Unable to determine sync status of %s/%s.",
+ lv->vg->name, lv->name);
+ return 0;
+ }
+PFLA("sync_percent=%d DM_PERCENT_100=%d", sync_percent, DM_PERCENT_100);
+ if (sync_percent == DM_PERCENT_100)
+ log_warn("WARNING: Sync status for %s is inconsistent.",
+ display_lvname(lv));
+ }
return (sync_percent == DM_PERCENT_100) ? 1 : 0;
}
/*
- * HM
+ * HM API function:
*
- * Remove seg from segments using @lv and set one segment mapped to error target on @lv
+ * return 1 if raid @lv is in sync or no RAID @lv else 0.
*
- * Returns 1 on success or 0 on failure
+ * Called via lv_extend/lv_reduce API to prevent
+ * reshaping @lv from being resized.
*/
-static int _remove_and_set_error_target(struct logical_volume *lv, struct lv_segment *seg)
+int lv_raid_in_sync(const struct logical_volume *lv)
{
- lv_set_visible(lv);
+ struct lv_segment *seg;
- if (!remove_seg_from_segs_using_this_lv(lv, seg))
+ RETURN_IF_LV_ZERO(lv);
+
+ if (!(seg = first_seg(lv)))
+ return 1;
+
+ if (seg_is_reshapable_raid(seg)) {
+ if (!lv_is_active(lv)) {
+ log_error("RAID LV %s has to be active to resize it!", display_lvname(lv));
+ return 0;
+ }
+
+ return _raid_in_sync(lv);
+ }
+
+ return 1;
+}
+
+/* HM Helper: start repair on idle/frozen @lv */
+static int _lv_cond_repair(struct logical_volume *lv)
+{
+ char *action;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+
+ if (!lv_raid_sync_action(lv, &action))
+ return 0;
+
+ return (strcmp(action, "idle") &&
+ strcmp(action, "frozen")) ? 1 : lv_raid_message(lv, "repair");
+}
+
+/*
+ * HM Helper:
+ *
+ * report current number of redundant disks for @segtype depending on @total_images and @data_copies in @*nr
+ */
+static int _seg_get_redundancy(const struct segment_type *segtype, unsigned total_images,
+ unsigned data_copies, unsigned *nr)
+{
+ if (segtype_is_thin(segtype)) {
+ *nr = 0;
+ return 1;
+ }
+
+ RETURN_IF_ZERO(total_images, "total images");
+ RETURN_IF_ZERO(data_copies, "data copies");
+ RETURN_IF_ZERO(nr, "nr argument");
+
+ if (!segtype)
+ *nr = 0;
+
+ else if (segtype_is_any_raid10(segtype)) {
+ if (!total_images % data_copies &&
+ !segtype_is_raid10_far(segtype))
+ /* HM FIXME: this is the ideal case if (data_copies - 1) fail per 'mirror group' */
+ *nr = total_images / data_copies;
+ else
+ *nr = data_copies - 1;
+
+ } else if (segtype_is_raid01(segtype) ||
+ segtype_is_raid1(segtype) ||
+ segtype_is_mirror(segtype))
+ *nr = total_images - 1;
+
+ else if (segtype_is_raid4(segtype) ||
+ segtype_is_any_raid5(segtype) ||
+ segtype_is_any_raid6(segtype))
+ *nr = segtype->parity_devs;
+
+ else
+ *nr = 0;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * in case of any resilience related conversions on @lv -> ask the user unless "-y/--yes" on command line
+ */
+static int _yes_no_conversion(const struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force, int duplicate,
+ unsigned new_image_count,
+ const unsigned new_data_copies,
+ const unsigned new_stripes,
+ const unsigned new_stripe_size)
+{
+ int data_copies_change, segtype_change, stripes_change, stripe_size_change;
+ unsigned cur_redundancy, new_redundancy;
+ struct lv_segment *seg;
+ struct segment_type *new_segtype_tmp;
+ const struct segment_type *segtype;
+ struct lvinfo info = { 0 };
+ struct cmd_context *cmd;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(new_data_copies, "new data copies");
+ RETURN_IF_ZERO((new_segtype_tmp = (struct segment_type *) new_segtype), /* Drop const */
+ "segment type argument");
+ cmd = lv->vg->cmd;
+
+ if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
+ log_error("Unable to retrieve logical volume information: aborting");
+ return 0;
+ }
+
+ /* If this is a duplicating LV with raid1 on top, the segtype of the respective leg is relevant */
+ if (_lv_is_duplicating(lv)) {
+
+ if (first_seg(seg_lv(seg, 0))->segtype == new_segtype)
+ segtype = first_seg(seg_lv(seg, 1))->segtype;
+ else
+ segtype = first_seg(seg_lv(seg, 0))->segtype;
+
+ } else
+ segtype = seg->segtype;
+
+ segtype_change = new_segtype != segtype;
+ data_copies_change = new_data_copies && (new_data_copies != seg->data_copies);
+ stripes_change = new_stripes && (new_stripes != _data_rimages_count(seg, seg->area_count));
+ stripe_size_change = new_stripe_size && (new_stripe_size != seg->stripe_size);
+ new_image_count = new_image_count ?: lv_raid_image_count(lv);
+
+ /* Get number of redundant disk for current and new segtype */
+ if (!_seg_get_redundancy(segtype, seg->area_count, seg->data_copies, &cur_redundancy) ||
+ !_seg_get_redundancy(new_segtype, new_image_count, new_data_copies, &new_redundancy))
+ return 0;
+
+PFLA("yes=%d cur_redundancy=%u new_redundancy=%u", yes, cur_redundancy, new_redundancy);
+ if (duplicate)
+ ;
+ else if (new_redundancy == cur_redundancy) {
+ if (stripes_change)
+ log_print_unless_silent("Converting active%s %s %s%s%s%s will keep "
+ "resilience of %u disk failure%s",
+ info.open_count ? " and open" : "", display_lvname(lv),
+ segtype != new_segtype ? "from " : "",
+ segtype != new_segtype ? _get_segtype_name(segtype, seg->area_count) : "",
+ segtype != new_segtype ? " to " : "",
+ segtype != new_segtype ? _get_segtype_name(new_segtype, new_image_count) : "",
+ cur_redundancy,
+ (!cur_redundancy || cur_redundancy > 1) ? "s" : "");
+
+ } else if (new_redundancy > cur_redundancy)
+ log_print_unless_silent("Converting active%s %s %s%s%s%s will enhance "
+ "resilience from %u disk failure%s to %u",
+ info.open_count ? " and open" : "", display_lvname(lv),
+ segtype != new_segtype ? "from " : "",
+ segtype != new_segtype ? _get_segtype_name(segtype, seg->area_count) : "",
+ segtype != new_segtype ? " to " : "",
+ segtype != new_segtype ? _get_segtype_name(new_segtype, new_image_count) : "",
+ cur_redundancy,
+ (!cur_redundancy || cur_redundancy > 1) ? "s" : "",
+ new_redundancy);
+
+ else if (new_redundancy &&
+ new_redundancy < cur_redundancy)
+ log_warn("WARNING: Converting active%s %s %s%s%s%s will degrade "
+ "resilience from %u disk failures to just %u",
+ info.open_count ? " and open" : "", display_lvname(lv),
+ segtype != new_segtype ? "from " : "",
+ segtype != new_segtype ? _get_segtype_name(segtype, seg->area_count) : "",
+ segtype != new_segtype ? " to " : "",
+ segtype != new_segtype ? _get_segtype_name(new_segtype, new_image_count) : "",
+ cur_redundancy, new_redundancy);
+
+ else if (!new_redundancy && cur_redundancy)
+ log_warn("WARNING: Converting active%s %s from %s to %s will remove "
+ "all resilience to disk failures",
+ info.open_count ? " and open" : "", display_lvname(lv),
+ _get_segtype_name(segtype, seg->area_count),
+ _get_segtype_name(new_segtype, new_image_count));
+
+
+ /****************************************************************************/
+ /* No --type arg */
+ /* Linear/raid0 with 1 image to raid1 via "-mN" option */
+ if (segtype == new_segtype &&
+ (seg_is_linear(seg) || (seg_is_any_raid0(seg) && seg->area_count == 1)) &&
+ new_image_count > 1 &&
+ !(new_segtype_tmp = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)))
+ return_0;
+
+ if (!yes) {
+ if (_lv_is_duplicating(lv)) {
+ // const char *add_remove, *to_from;
+ struct add_remove {
+ const char *ar;
+ const char *tf;
+ };
+ static struct add_remove add_remove[2] = {
+ { "add", "to" },
+ { "remove", "from" },
+ };
+ struct add_remove *ar = add_remove + (new_image_count > seg->area_count ? 0 : 1);
+
+PFLA("new_image_count=%u seg->area_count=%u", new_image_count, seg->area_count);
+
+ if (yes_no_prompt("Do you really want to %s a %s duplicated "
+ "sub_lv %s duplicating %s? [y/n]: ",
+ ar->ar, new_segtype->name, ar->tf, display_lvname(lv)) == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
+ }
+
+ /* HM FIXME: all these checks or just the add/remove one above? */
+ if (segtype_change &&
+ yes_no_prompt("Do you really want to %s %s with type %s to %s? [y/n]: ",
+ duplicate ? "duplicate" : "convert",
+ display_lvname(lv),
+ _get_segtype_name(segtype, seg->area_count),
+ _get_segtype_name(new_segtype_tmp, new_image_count)) == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+
+ if (data_copies_change &&
+ yes_no_prompt("Do you really want to %s %s with %u to %u data copies %s resilience%s? [y/n]: ",
+ duplicate ? "duplicate" : "convert",
+ display_lvname(lv), seg->data_copies, new_data_copies,
+ seg->data_copies < new_data_copies ? "enhancing" : "reducing",
+ duplicate ? " on the copy": "") == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+
+ if (!duplicate) {
+ if (stripes_change &&
+ yes_no_prompt("Do you really want to %s %s with %u stripes to %u stripes? [y/n]: ",
+ duplicate ? "duplicate" : "convert",
+ display_lvname(lv), _data_rimages_count(seg, seg->area_count), new_stripes) == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+
+ if (stripe_size_change &&
+ yes_no_prompt("Do you really want to %s %s with stripesize %s to stripesize %s? [y/n]: ",
+ duplicate ? "duplicate" : "convert",
+ display_lvname(lv),
+ display_size(cmd, seg->stripe_size),
+ display_size(cmd, new_stripe_size)) == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+ }
+ }
+
+ if (sigint_caught())
+ return_0;
+
+ /* Now archive metadata after the user has confirmed */
+ if (!archive(lv->vg))
return_0;
- return replace_lv_with_error_segment(lv);
+ return 1;
+}
+
+/* HM Helper: prohibit allocation on @pv if @lv already has segments allocated on it */
+static int _avoid_pv_of_lv(struct logical_volume *lv, struct physical_volume *pv)
+{
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(pv, "PV");
+
+ if (!(lv->status & PARTIAL_LV) &&
+ lv_is_on_pv(lv, pv))
+ pv->status |= PV_ALLOCATION_PROHIBITED;
+
+ return 1;
+}
+
+/* HM Helper: prohibit allocation on any listed PVs via @data if @lv already has segments allocated on those */
+static int _avoid_pvs_of_lv(struct logical_volume *lv, void *data)
+{
+ struct dm_list *allocate_pvs = (struct dm_list *) data;
+ struct pv_list *pvl;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(allocate_pvs, "allocate pv list argument");
+
+ dm_list_iterate_items(pvl, allocate_pvs)
+ if (!_avoid_pv_of_lv(lv, pvl->pv))
+ return 0;
+
+ return 1;
}
/*
- * _raid_remove_top_layer
+ * Prevent any PVs holding other image components of @lv from being used for allocation,
+ * I.e. remove respective PVs from @allocatable_pvs
+ */
+static int _avoid_pvs_with_other_images_of_lv(struct logical_volume *lv, struct dm_list *allocate_pvs)
+{
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(allocate_pvs, "allocate pv list argument");
+
+ log_debug_metadata("Preventing allocation on PVs of LV %s", display_lvname(lv));
+
+ /* HM FIXME: check fails in case we will ever have mixed AREA_PV/AREA_LV segments */
+ if ((seg_type(first_seg(lv), 0) == AREA_PV ? _avoid_pvs_of_lv(lv, allocate_pvs):
+ for_each_sub_lv(lv, _avoid_pvs_of_lv, allocate_pvs)))
+ return 1;
+
+ log_error("Failed to prevent PVs holding image components "
+ "from LV %s being used for allocation.",
+ display_lvname(lv));
+ return 0;
+}
+
+/*
+ * Helper:
+ *
+ * _convert_raid_to_linear
* @lv
- * @removal_list
+ * @removal_lvs
*
* Remove top layer of RAID LV in order to convert to linear.
* This function makes no on-disk changes. The residual LVs
- * returned in 'removal_list' must be freed by the caller.
+ * returned in 'removal_lvs' must be freed by the caller.
*
* Returns: 1 on succes, 0 on failure
*/
-static int _raid_remove_top_layer(struct logical_volume *lv,
- struct dm_list *removal_list)
+static int _extract_image_component_list(struct lv_segment *seg,
+ uint64_t type, uint32_t idx,
+ struct dm_list *removal_lvs);
+static int _convert_raid_to_linear(struct logical_volume *lv,
+ struct dm_list *removal_lvs)
{
- struct lv_list *lvl_array, *lvl;
struct logical_volume *lv_tmp;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
- if (!seg_is_mirrored(seg) &&
- !(seg_is_raid4(seg) || seg_is_any_raid5(seg))) {
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(removal_lvs, "removal LV list argument");
+
+ if (!seg_is_any_raid0(seg) &&
+ !seg_is_mirrored(seg) &&
+ !seg_is_raid4(seg) && !seg_is_any_raid5(seg) &&
+ !seg_is_raid01(seg)) {
log_error(INTERNAL_ERROR
"Unable to remove RAID layer from segment type %s",
lvseg_name(seg));
@@ -281,129 +1063,102 @@ static int _raid_remove_top_layer(struct logical_volume *lv,
if (seg->area_count != 1) {
log_error(INTERNAL_ERROR
"Unable to remove RAID layer when there"
- " is more than one sub-lv");
+ " is more than one sub LV");
return 0;
}
- if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * sizeof(*lvl))))
- return_0;
-
- if (seg->meta_areas) {
- lv_tmp = seg_metalv(seg, 0);
- lv_tmp->status &= ~RAID_META;
- lv_set_visible(lv_tmp);
- lvl_array[0].lv = lv_tmp;
-PFL();
- /* Remove reference from top-layer lv to the rmeta one. */
- if (!remove_seg_from_segs_using_this_lv(lv_tmp, seg))
- return_0;
+ if (seg->meta_areas &&
+ !_extract_image_component_list(seg, RAID_META, 0 /* idx */, removal_lvs))
+ return 0;
- seg_metatype(seg, 0) = AREA_UNASSIGNED;
- dm_list_add(removal_list, &(lvl_array[0].list));
- }
-PFL();
- /* Add remaining last image lv to removal_list */
+ /* Add remaining last image LV to removal_lvs */
lv_tmp = seg_lv(seg, 0);
- lv_tmp->status &= ~RAID_IMAGE;
- lv_set_visible(lv_tmp);
- lvl_array[1].lv = lv_tmp;
- dm_list_add(removal_list, &(lvl_array[1].list));
-PFL();
+ if (!_lv_reset_raid_add_to_list(lv_tmp, removal_lvs))
+ return 0;
+
if (!remove_layer_from_lv(lv, lv_tmp))
return_0;
-PFL();
- lv->status &= ~(MIRRORED | RAID);
+
+ if (!(first_seg(lv)->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ first_seg(lv)->region_size = 0;
+
+ lv->status &= ~(MIRRORED | RAID | RAID_IMAGE | LV_DUPLICATED);
return 1;
}
+/* HM Helper: convert raid01 @lv to striped */
+static int _convert_raid01_to_striped(struct logical_volume *lv,
+ struct dm_list *removal_lvs)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (!seg_is_raid01(seg))
+ return 0;
+
+ return _convert_raid_to_linear(lv, removal_lvs);
+}
+
/*
- * _clear_lv
- * @lv
+ * HM Helper
+ *
+ * clear first 4K of @lv
*
- * If LV is active:
- * clear first block of device
- * otherwise:
- * activate, clear, deactivate
+ * We're holding an exclusive lock, so we can clear the
+ * first block of the (metadata) LV directly on the respective
+ * PV avoiding activation of the metadata lv altogether and
+ * hence superfluous latencies.
*
* Returns: 1 on success, 0 on failure
+ *
+ * HM FIXME: share with lv_manip.c!
*/
static int _clear_lv(struct logical_volume *lv)
{
- int was_active = lv_is_active_locally(lv);
+ struct lv_segment *seg;
+ struct physical_volume *pv;
+ uint64_t offset;
if (test_mode())
return 1;
- lv->status |= LV_TEMPORARY;
- if (!was_active && !activate_lv_local(lv->vg->cmd, lv)) {
- log_error("Failed to activate localy %s for clearing",
- lv->name);
- return 0;
- }
- lv->status &= ~LV_TEMPORARY;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(seg->area_count == 1, "area count == 1");
+ RETURN_IF_ZERO(seg_type(seg, 0) == AREA_PV, "area PV");
+ RETURN_IF_ZERO((pv = seg_pv(seg, 0)), "physical volume");
+ RETURN_IF_ZERO(pv->pe_start, "no PE start address");
-PFLA("Clearing metadata area of %s/%s", lv->vg->name, lv->name);
- log_verbose("Clearing metadata area of %s/%s",
- lv->vg->name, lv->name);
/*
- * Rather than wiping lv->size, we can simply
- * wipe the first sector to remove the superblock of any previous
- * RAID devices. It is much quicker.
+ * Rather than wiping lv->size, we can simply wipe the first 4KiB
+ * to remove the superblock of any previous RAID devices. It is much
+ * quicker than wiping a potentially larger metadata device completely.
*/
- if (!wipe_lv(lv, (struct wipe_params) { .do_zero = 1, .zero_sectors = 1 })) {
- log_error("Failed to zero %s", lv->name);
- return 0;
- }
-
- if (!was_active && !deactivate_lv(lv->vg->cmd, lv)) {
- log_error("Failed to deactivate %s", lv->name);
- return 0;
- }
+ log_verbose("Clearing metadata area of %s", display_lvname(lv));
+ offset = (pv->pe_start + seg_pe(seg, 0) * lv->vg->extent_size) << 9;
- return 1;
+ return dev_set(pv->dev, offset, 4096, 0);
}
/*
- * HM
+ * HM Helper:
*
- * Wipe all LVs on @lv_list
+ * wipe all LVs first 4 KiB on @lv_list
*
- * Makes on-disk metadata changes!
+ * Does _not_ make any on-disk metadata changes!
*
* Returns 1 on success or 0 on failure
*/
static int _clear_lvs(struct dm_list *lv_list)
{
struct lv_list *lvl;
- struct volume_group *vg = NULL;
- if (dm_list_empty(lv_list)) {
- log_debug_metadata(INTERNAL_ERROR "Empty list of LVs given for clearing");
- return 1;
- }
+ RETURN_IF_ZERO(lv_list, "lv list argument");
+ RETURN_IF_NONZERO(dm_list_empty(lv_list), "LVs given for clearing");
-PFL();
- dm_list_iterate_items(lvl, lv_list) {
- if (!lv_is_visible(lvl->lv)) {
- log_error(INTERNAL_ERROR
- "LVs must be set visible before clearing");
- return 0;
- }
-
- vg = lvl->lv->vg;
- }
-
- /*
- * FIXME: only vg_[write|commit] if LVs are not already written
- * as visible in the LVM metadata (which is never the case yet).
- */
-PFL();
-PFLA("vg_validate(vg)=%d", vg_validate(vg));
-PFL();
- if (!vg || !vg_write(vg) || !vg_commit(vg))
- return_0;
-PFL();
dm_list_iterate_items(lvl, lv_list)
if (!_clear_lv(lvl->lv))
return 0;
@@ -414,27 +1169,41 @@ PFL();
/*
* HM
*
- * Check for maximum supported devices caused by the kernel
- * MD maximum device limits _and_ dm-raid superblock bitfield constraints
- * Wipe all LVs on @lv_list
+ * Check for maximum supported raid/mirror devices imposed by the kernel
+ * maximum device limits _and_ dm-raid superblock bitfield constraints
*
* Returns 1 on success or 0 on failure
*/
-static int _check_maximum_devices(uint32_t num_devices)
+static int _check_max_devices(uint32_t image_count, unsigned max)
{
- if (num_devices > DEFAULT_RAID_MAX_IMAGES) {
- log_error("Unable to handle arrays with more than %u devices",
- DEFAULT_RAID_MAX_IMAGES);
+ RETURN_IF_ZERO(image_count, "image count");
+ RETURN_IF_ZERO(max, "maximum image count");
+
+ if (image_count > max) {
+ log_error("Unable to handle %s with more than %u devices",
+ max == DEFAULT_MIRROR_MAX_IMAGES ? "mirrors" : "raid arrays", max);
return 0;
}
return 1;
}
+static int _check_max_raid_devices(uint32_t image_count)
+{
+ return _check_max_devices(image_count, DEFAULT_RAID_MAX_IMAGES);
+}
+
+static int _check_max_mirror_devices(uint32_t image_count)
+{
+ return _check_max_devices(image_count, DEFAULT_MIRROR_MAX_IMAGES);
+}
+
/* Replace @lv with error segment */
static int _replace_lv_with_error_segment(struct logical_volume *lv)
{
- if (lv && (lv->status & PARTIAL_LV)) {
+ RETURN_IF_ZERO(lv, "lv argument");
+
+ if (lv->status & PARTIAL_LV) {
log_debug("Replacing %s segments with error target", lv->name);
if (!replace_lv_with_error_segment(lv)) {
@@ -452,6 +1221,10 @@ static int _lv_name_add_string_index(struct cmd_context *cmd, const char **lv_na
size_t len;
char *b, *e, *newname, *tmpname;
+ RETURN_IF_ZERO(cmd, "command context argument");
+ RETURN_IF_ZERO(lv_name, "lv name argument");
+ RETURN_IF_ZERO(prefix, "name prefix argument");
+
if (!(tmpname = dm_pool_strdup(cmd->mem, *lv_name)))
return 0;
@@ -471,35 +1244,139 @@ static int _lv_name_add_string_index(struct cmd_context *cmd, const char **lv_na
strcpy(newname, prefix);
strcat(newname, b);
*lv_name = newname;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * get @index from @lv names string number suffix
+ */
+static int _lv_name_get_string_index(struct logical_volume *lv, unsigned *index)
+{
+ char *numptr, *n;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(index, "index argument");
+
+ if (!(numptr = strrchr(lv->name, '_')))
+ goto err;
+
+ n = ++numptr;
+
+ while (*n) {
+ if (*n < '0' || *n > '9')
+ goto err;
+ n++;
+ }
+
+ *index = atoi(numptr);
return 1;
+
+err:
+ log_error("Malformatted image name");
+ return 0;
}
/*
* Shift image @*name (suffix) from @s to (@s - @missing)
*
* E.g. s=5, missing=2 -> change "*_r(image,meta)_5" to "*_r(image,meta)_3"
+ * - or -
+ * s=5, missing=2 -> change "*_dup_5_*" to "*_dup_3_*"
*/
-static int _shift_image_name(struct lv_segment *seg, char **name, unsigned s, unsigned missing)
+static int __shift_lv_name(char *shift_name, char **name, unsigned s, unsigned missing)
{
+ int r = 0;
unsigned num;
- size_t len;
- char *numptr, *shift_name;
+ ssize_t len, len1;
+ char *numptr;
+
+ RETURN_IF_ZERO(shift_name, "shift name argument");
+ RETURN_IF_ZERO(name, "name argument");
+
+log_very_verbose("Before shifting %s", *name);
+ /* Handle duplicating sub LV names */
+ if ((numptr = strstr(shift_name, "_dup_")) &&
+ (_strstr_strings(shift_name, "_rimage_", "_rmeta_", NULL))) {
+ char *suffix;
+log_very_verbose("shifting duplicating sub LV %s", shift_name);
+
+ numptr += strlen("_dup_");
+ if ((suffix = strchr(numptr, '_')) &&
+ (num = atoi(numptr)) == s) {
+ len = suffix - numptr + 1;
+log_very_verbose("shifting duplicating sub LV %s numptr=%s suffix=%s len=%ld", shift_name, numptr, suffix, len);
+ if ((len1 = dm_snprintf(numptr, len, "%u", num - missing)) < 0)
+ goto out;
+
+ if (len1 < len) {
+ strncpy(*name, shift_name, numptr - shift_name + len1);
+ strcat(*name, suffix);
+
+ } else
+ *name = shift_name;
+
+ r = 1;
+ }
+log_very_verbose("shifting s=%u num=%u", s, num);
- if (!(shift_name = dm_pool_strdup(seg_lv(seg, s - missing)->vg->cmd->mem, *name))) {
- log_error("Memory allocation failed.");
- return 0;
+ /* Handle regular (sub) LV names */
+ } else {
+ if ((numptr = strrchr(shift_name, '_')) &&
+ (num = atoi(numptr + 1)) == s) {
+ *name = shift_name;
+ len = strlen(++numptr) + 1;
+ r = dm_snprintf(numptr, len, "%u", num - missing) < 0 ? 0 : 1;
+ }
}
- if (!(numptr = strrchr(shift_name, '_')) ||
- (num = atoi(numptr + 1)) != s) {
- log_error("Malformatted image name");
+ log_very_verbose("After shifting %s", *name);
+ return r;
+out:
+ log_error("Malformatted image name");
+ return 0;
+}
+
+static int _shift_lv_name(struct logical_volume *lv, unsigned s, unsigned missing)
+{
+ char *shift_name;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+
+ if (!(shift_name = dm_pool_strdup(lv->vg->cmd->mem, lv->name))) {
+ log_error("Memory allocation failed.");
return 0;
}
- *name = shift_name;
- len = strlen(++numptr) + 1;
+ return __shift_lv_name(shift_name, (char **) &lv->name, s, missing);
+}
- return dm_snprintf(numptr, len, "%u", num - missing) < 0 ? 0 : 1;
+/* Change name of @lv with # @s to # (@s - @missing) */
+static int _shift_image_name(struct logical_volume *lv, unsigned s, unsigned missing)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (lv_is_duplicated(lv) &&
+ (seg_is_raid(seg) || seg_is_mirror(seg))) {
+ uint32_t ss;
+ struct lv_segment *fseg = first_seg(lv);
+
+ for (ss = 0; ss < fseg->area_count; ss++) {
+ if (!_shift_image_name(seg_lv(fseg, ss), s, missing))
+ return 0;
+
+ if (fseg->meta_areas &&
+ (seg_metatype(fseg, ss) != AREA_LV ||
+ !_shift_image_name(seg_metalv(fseg, ss), s, missing)))
+ return 0;
+ }
+ }
+
+ return _shift_lv_name(lv, s, missing);
}
/*
@@ -509,49 +1386,52 @@ static int _shift_image_name(struct lv_segment *seg, char **name, unsigned s, un
* Shift all higher indexed segment areas down to fill in gaps where
* there are 'AREA_UNASSIGNED' areas.
*
+ * We don't need to bother with name reallocation,
+ * because the name length will be less or equal
+ * when shifting down as opposed to shifting up.
+ *
* Returns: 1 on success, 0 on failure
*/
static int _shift_image_components(struct lv_segment *seg)
{
uint32_t s, missing;
+ RETURN_IF_ZERO(seg, "lv segment argument");
+
if (!seg_is_raid(seg))
return_0;
+ RETURN_IF_ZERO(seg->meta_areas, "meta areas");
+
/* Should not be possible here, but... */
- if (!_check_maximum_devices(seg->area_count))
+ if (!_check_max_raid_devices(seg->area_count))
return 0;
- log_very_verbose("Shifting images in %s", seg->lv->name);
+ log_very_verbose("Shifting images in %s", lvseg_name(seg));
for (s = missing = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) == AREA_UNASSIGNED) {
- if (seg_metatype(seg, s) != AREA_UNASSIGNED) {
- log_error(INTERNAL_ERROR "Metadata segment area"
- " #%d should be AREA_UNASSIGNED", s);
- return 0;
- }
-
+ RETURN_IF_ZERO(seg_metatype(seg, s) == AREA_UNASSIGNED, " unassigned metadata segment area");
missing++;
- continue;
- }
- if (missing) {
+ } else if (missing) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV && seg_lv(seg, s), "image lv");
+ RETURN_IF_ZERO(seg_metatype(seg, s) == AREA_LV && seg_metalv(seg, s), "meta lv");
+
log_very_verbose("Shifting %s and %s by %u",
seg_metalv(seg, s)->name,
seg_lv(seg, s)->name, missing);
seg->areas[s - missing] = seg->areas[s];
- if (!_shift_image_name(seg, (char **) &seg_lv(seg, s - missing)->name, s, missing))
+ seg_type(seg, s) = AREA_UNASSIGNED;
+ if (!_shift_image_name(seg_lv(seg, s - missing), s, missing))
return 0;
- if (seg->meta_areas) {
- seg->meta_areas[s - missing] = seg->meta_areas[s];
- if (!_shift_image_name(seg, (char **) &seg_metalv(seg, s - missing)->name, s, missing))
- return 0;
- }
+ seg->meta_areas[s - missing] = seg->meta_areas[s];
+ seg_metatype(seg, s) = AREA_UNASSIGNED;
+ if (!_shift_image_name(seg_metalv(seg, s - missing), s, missing))
+ return 0;
}
-
}
seg->area_count -= missing;
@@ -562,10 +1442,13 @@ static int _shift_image_components(struct lv_segment *seg)
static char *_generate_raid_name(struct logical_volume *lv,
const char *suffix, int count)
{
- const char *format = (count < 0) ? "%s_%s" : "%s_%s_%u";
+ const char *format = (count < 0) ? "%s_%s" : "%s_%s%u";
size_t len = strlen(lv->name) + strlen(suffix) + ((count < 0) ? 2 : 5);
char *name;
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(suffix, "name suffix argument");
+
if (!(name = dm_pool_alloc(lv->vg->vgmem, len))) {
log_error("Failed to allocate new name.");
return NULL;
@@ -574,15 +1457,16 @@ static char *_generate_raid_name(struct logical_volume *lv,
if (dm_snprintf(name, len, format, lv->name, suffix, count) < 0)
return_NULL;
-PFLA("name=%s", name);
+ if (count < 0)
+ name[strlen(name) - 1] = '\0';
+
if (!validate_name(name)) {
log_error("New logical volume name \"%s\" is not valid.", name);
return NULL;
}
if (find_lv_in_vg(lv->vg, name)) {
- log_error("Logical volume %s already exists in volume group %s.",
- name, lv->vg->name);
+ dm_pool_free(lv->vg->vgmem, name);
return NULL;
}
@@ -590,17 +1474,25 @@ PFLA("name=%s", name);
}
/*
- * Eliminate the extracted LVs on @removal_list from @vg incl. vg write, commit and backup
+ * Eliminate the extracted LVs on @removal_lvs from @vg incl. vg write, commit and backup
*/
-static int _eliminate_extracted_lvs(struct volume_group *vg, struct dm_list *removal_list)
+static int _eliminate_extracted_lvs_optional_write_vg(struct volume_group *vg,
+ struct dm_list *removal_lvs,
+ int vg_write_requested)
{
- sync_local_dev_names(vg->cmd);
+ RETURN_IF_ZERO(vg, "vg argument");
+
+ if (!removal_lvs || dm_list_empty(removal_lvs))
+ return 1;
+ sync_local_dev_names(vg->cmd);
PFL();
- if (!dm_list_empty(removal_list)) {
- if (!_deactivate_and_remove_lvs(vg, removal_list))
- return 0;
+ if (!_deactivate_and_remove_lvs(vg, removal_lvs))
+ return 0;
+ dm_list_init(removal_lvs);
+PFL();
+ if (vg_write_requested) {
if (!vg_write(vg) || !vg_commit(vg))
return_0;
@@ -608,75 +1500,70 @@ PFL();
log_error("Backup of VG %s failed after removal of image component LVs", vg->name);
}
PFL();
-
return 1;
}
+static int _eliminate_extracted_lvs(struct volume_group *vg, struct dm_list *removal_lvs)
+{
+ return _eliminate_extracted_lvs_optional_write_vg(vg, removal_lvs, 1);
+}
+
/*
- * Reallocate segment areas given by @seg_areas (i.e eith data or metadata areas)
- * in segment @seg to amount in @areas copying the minimum of common areas across
+ * Reallocate segment areas given by @type (i.e. data or metadata areas)
+ * in first segment of @lv to amount in @areas copying the minimum of common areas across
*/
-static int _realloc_seg_areas(struct logical_volume *lv, struct lv_segment *seg,
- uint32_t areas, struct lv_segment_area **seg_areas)
+static int _realloc_seg_areas(struct logical_volume *lv,
+ uint32_t areas, uint64_t type)
{
+ uint32_t s;
+ struct lv_segment *seg;
+ struct lv_segment_area **seg_areas;
struct lv_segment_area *new_areas;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(areas, "areas count");
+
+ switch (type) {
+ case RAID_META:
+ seg_areas = &seg->meta_areas;
+ break;
+ case RAID_IMAGE:
+ seg_areas = &seg->areas;
+ break;
+ default:
+ log_error(INTERNAL_ERROR "Called with bogus type argument");
+ return 0;
+ }
+
if (!(new_areas = dm_pool_zalloc(lv->vg->vgmem, areas * sizeof(*new_areas)))) {
log_error("Allocation of new areas array failed.");
return 0;
}
+ for (s = 0; s < areas; s++)
+ new_areas[s].type = AREA_UNASSIGNED;
+
if (*seg_areas)
memcpy(new_areas, *seg_areas, min(areas, seg->area_count) * sizeof(*new_areas));
*seg_areas = new_areas;
+
return 1;
}
/*
* HM
*
- * Reallocate both data and metadata areas of segmen @seg to new amount in @ares
+ * Reallocate both data and metadata areas of first segment of segment of @lv to new amount in @areas
*/
-static int _realloc_meta_and_data_seg_areas(struct logical_volume *lv, struct lv_segment *seg,
- uint32_t areas)
+static int _realloc_meta_and_data_seg_areas(struct logical_volume *lv, uint32_t areas)
{
- return (_realloc_seg_areas(lv, seg, areas, &seg->meta_areas) &&
- _realloc_seg_areas(lv, seg, areas, &seg->areas)) ? 1 : 0;
-}
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(areas, "areas count");
-#if 0
-/*
- * HM
- *
- * Move the end of a partial segment area from @seg_from to @seg_to
- */
-static int _raid_move_partial_lv_segment_area(struct lv_segment *seg_to, uint32_t area_to,
- struct lv_segment *seg_from, uint32_t area_from, uint32_t area_reduction)
-{
- uint32_t pe;
- struct physical_volume *pv;
-
- if (seg_type(seg_from, area_from) != AREA_PV)
- return 0;
-
- pv = seg_pv(seg_from, area_from);
- pe = seg_pe(seg_from, area_from) + seg_from->area_len - area_reduction;;
-
- if (!release_lv_segment_area(seg_from, area_from, area_reduction))
- return_0;
-
- if (!release_lv_segment_area(seg_to, area_to, area_reduction))
- return_0;
-
- if (!set_lv_segment_area_pv(seg_to, area_to, pv, pe))
- return_0;
-
- seg_from->area_len -= area_reduction;
-
- return 1;
+ return (_realloc_seg_areas(lv, areas, RAID_META) &&
+ _realloc_seg_areas(lv, areas, RAID_IMAGE)) ? 1 : 0;
}
-#endif
/*
* _extract_image_component
@@ -684,49 +1571,52 @@ static int _raid_move_partial_lv_segment_area(struct lv_segment *seg_to, uint32_
* @idx: The index in the areas array to remove
* @data: != 0 to extract data dev / 0 extract metadata_dev
* @extracted_lv: The displaced metadata/data LV
+ * @set_error_seg: if set, replace LV of @type at @idx with error segment
*/
-static int _extract_image_component(struct lv_segment *seg,
- uint64_t type, uint32_t idx,
- struct logical_volume **extracted_lv)
+static int _extract_image_component_error_seg(struct lv_segment *seg,
+ uint64_t type, uint32_t idx,
+ struct logical_volume **extracted_lv,
+ int set_error_seg)
{
struct logical_volume *lv;
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_ZERO(extracted_lv, "extracted LVs argument");
+ RETURN_IF_NONZERO(set_error_seg < 0 || set_error_seg > 1,
+ "set error segment argument");
+
switch (type) {
- case RAID_META:
- lv = seg_metalv(seg, idx);
- break;
- case RAID_IMAGE:
- lv = seg_lv(seg, idx);
- break;
- default:
- log_error(INTERNAL_ERROR "Bad type provided to %s.", __func__);
- return 0;
+ case RAID_META:
+ lv = seg_metalv(seg, idx);
+ seg_metalv(seg, idx) = NULL;
+ seg_metatype(seg, idx) = AREA_UNASSIGNED;
+ break;
+ case RAID_IMAGE:
+ lv = seg_lv(seg, idx);
+ seg_lv(seg, idx) = NULL;
+ seg_type(seg, idx) = AREA_UNASSIGNED;
+ break;
+ default:
+ log_error(INTERNAL_ERROR "Bad type provided to %s.", __func__);
+ return 0;
}
- if (!lv)
- return 0;
-
- lv->status &= ~type;
- lv->status &= ~RAID;
+ RETURN_IF_ZERO(lv, "sub lv");
- log_very_verbose("Extracting image component %s from %s", lv->name, seg->lv->name);
+ log_very_verbose("Extracting image component %s from %s", lv->name, lvseg_name(seg));
+ lv->status &= ~(type | RAID);
lv_set_visible(lv);
- /* release lv areas */
+ /* remove reference from @seg to @lv */
if (!remove_seg_from_segs_using_this_lv(lv, seg))
return_0;
- switch (type) {
- case RAID_META:
- seg_metatype(seg, idx) = AREA_UNASSIGNED;
- seg_metalv(seg, idx) = NULL;
- break;
- case RAID_IMAGE:
- seg_type(seg, idx) = AREA_UNASSIGNED;
- seg_lv(seg, idx) = NULL;
- }
+ if (!(lv->name = _generate_raid_name(lv, "extracted_", -1)))
+ return_0;
- if (!(lv->name = _generate_raid_name(lv, "extracted", -1)))
+PFLA("set_error_seg=%d", set_error_seg);
+ if (set_error_seg &&
+ !replace_lv_with_error_segment(lv))
return_0;
*extracted_lv = lv;
@@ -734,6 +1624,18 @@ static int _extract_image_component(struct lv_segment *seg,
return 1;
}
+static int _extract_image_component(struct lv_segment *seg,
+ uint64_t type, uint32_t idx,
+ struct logical_volume **extracted_lv,
+ int set_error_seg)
+{
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_ZERO(extracted_lv, "extracted LVs argument");
+ RETURN_IF_NONZERO(set_error_seg < 0 || set_error_seg > 1, "set error segment argument");
+
+ return _extract_image_component_error_seg(seg, type, idx, extracted_lv, set_error_seg);
+}
+
/*
* @seg
* @idx: The index in the areas array to remove
@@ -754,21 +1656,80 @@ static int _extract_image_component(struct lv_segment *seg,
* Returns: 1 on success, 0 on failure
*/
static int _extract_image_component_pair(struct lv_segment *seg, uint32_t idx,
- struct lv_list *lvl_array)
+ struct lv_list *lvl_array,
+ struct dm_list *extracted_meta_lvs,
+ struct dm_list *extracted_data_lvs,
+ int set_error_seg)
{
- if (idx >= seg->area_count) {
- log_error(INTERNAL_ERROR "area index too large for segment");
- return 0;
- }
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_ZERO(extracted_meta_lvs, "extracted meta LVs list argument");
+ RETURN_IF_ZERO(extracted_data_lvs, "extracted data LVs list argument");
+ RETURN_IF_NONZERO(set_error_seg < 0 || set_error_seg > 1, "set error segment argument");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, idx);
/* Don't change extraction sequence; callers are relying on it */
- if (!_extract_image_component(seg, RAID_META, idx, &lvl_array[0].lv) ||
- !_extract_image_component(seg, RAID_IMAGE, idx, &lvl_array[1].lv))
+ if (extracted_meta_lvs) {
+ if (!_extract_image_component(seg, RAID_META, idx, &lvl_array[0].lv, set_error_seg))
+ return_0;
+
+ dm_list_add(extracted_meta_lvs, &lvl_array[0].list);
+ }
+
+ if (extracted_data_lvs) {
+ if (!_extract_image_component(seg, RAID_IMAGE, idx, &lvl_array[1].lv, set_error_seg))
+ return_0;
+
+ dm_list_add(extracted_data_lvs, &lvl_array[1].list);
+ }
+
+ return 1;
+}
+
+/*
+ * Extract all sub LVs of @type from @seg starting at @idx excluding @end and
+ * put them on @removal_lvs setting mappings to "error" if @error_seg
+ */
+static int _extract_image_component_sublist(struct lv_segment *seg,
+ uint64_t type, uint32_t idx, uint32_t end,
+ struct dm_list *removal_lvs,
+ int error_seg)
+{
+ uint32_t s;
+ struct lv_list *lvl;
+
+ RETURN_IF_ZERO(seg, "seg argument");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, idx);
+ RETURN_IF_NONZERO(end > seg->area_count || end <= idx, "area index wrong for segment");
+
+ if (!(lvl = dm_pool_alloc(seg_lv(seg, idx)->vg->vgmem, sizeof(*lvl) * (end - idx))))
return_0;
+ for (s = idx; s < end; s++) {
+ if (!_extract_image_component_error_seg(seg, type, s, &lvl->lv, error_seg))
+ return 0;
+
+ dm_list_add(removal_lvs, &lvl->list);
+ lvl++;
+ }
+
+ if (!idx && end == seg->area_count) {
+ if (type == RAID_IMAGE)
+ seg->areas = NULL;
+ else
+ seg->meta_areas = NULL;
+ }
+
return 1;
}
+/* Extract all sub LVs of @type from @seg starting with @idx and put them on @removal_Lvs */
+static int _extract_image_component_list(struct lv_segment *seg,
+ uint64_t type, uint32_t idx,
+ struct dm_list *removal_lvs)
+{
+ return _extract_image_component_sublist(seg, type, idx, seg->area_count, removal_lvs, 1);
+}
+
/* Add new @lvs to @lv at @area_offset */
static int _add_image_component_list(struct lv_segment *seg, int delete_from_list,
uint64_t lv_flags, struct dm_list *lvs, uint32_t area_offset)
@@ -791,7 +1752,7 @@ static int _add_image_component_list(struct lv_segment *seg, int delete_from_lis
lvl->lv->status &= ~LV_REBUILD;
if (!set_lv_segment_area_lv(seg, s++, lvl->lv, 0 /* le */,
- lvl->lv->status)) {
+ lvl->lv->status)) {
log_error("Failed to add sublv %s", lvl->lv->name);
return 0;
}
@@ -800,34 +1761,6 @@ static int _add_image_component_list(struct lv_segment *seg, int delete_from_lis
return 1;
}
-/* Remove sublvs fo @type from @lv starting at @idx and put them on @removal_list */
-static int _remove_image_component_list(struct lv_segment *seg,
- uint64_t type, uint32_t idx,
- struct dm_list *removal_list)
-{
- uint32_t s;
- unsigned i;
- struct lv_list *lvl_array;
-
- if (idx >= seg->area_count) {
- log_error(INTERNAL_ERROR "area index too large for segment");
- return 0;
- }
-
- if (!(lvl_array = dm_pool_alloc(seg_lv(seg, 0)->vg->vgmem, sizeof(*lvl_array) * (seg->area_count - idx))))
- return_0;
-
- for (i = 0, s = idx; s < seg->area_count; s++) {
- if (!_extract_image_component(seg, type, s, &lvl_array[i].lv))
- return 0;
-
- dm_list_add(removal_list, &lvl_array[i].list);
- i++;
- }
-
- return 1;
-}
-
/*
* Create an LV of specified type. Set visible after creation.
* This function does not make metadata changes.
@@ -843,23 +1776,22 @@ static struct logical_volume *_alloc_image_component(struct logical_volume *lv,
const struct segment_type *segtype;
switch (type) {
- case RAID_META:
- type_suffix = "rmeta";
- break;
- case RAID_IMAGE:
- type_suffix = "rimage";
- status |= LV_REBUILD;
- break;
- default:
- log_error(INTERNAL_ERROR "Bad type provided to %s.", __func__);
- return 0;
+ case RAID_META:
+ type_suffix = "rmeta";
+ break;
+ case RAID_IMAGE:
+ type_suffix = "rimage";
+ status |= LV_REBUILD;
+ break;
+ default:
+ log_error(INTERNAL_ERROR "Bad type provided to %s.", __func__);
+ return 0;
}
if (dm_snprintf(img_name, sizeof(img_name), "%s_%s_%%d",
- alt_base_name ?: lv->name, type_suffix) < 0)
+ alt_base_name ?: lv->name, type_suffix) < 0)
return_0;
-
if (!(tmp_lv = lv_create_empty(img_name, NULL, status, ALLOC_INHERIT, lv->vg))) {
log_error("Failed to allocate new raid component, %s.", img_name);
return 0;
@@ -870,12 +1802,11 @@ static struct logical_volume *_alloc_image_component(struct logical_volume *lv,
if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
- if (!lv_add_segment(ah, first_area, 1, tmp_lv, segtype, 0, status, 0)) {
+ if (!lv_add_segment(ah, first_area, 1 /* areas */, 1 /* data_copies */,
+ tmp_lv, segtype, 0, status, 0)) {
log_error("Failed to add segment to LV, %s", img_name);
return 0;
}
-
- first_seg(tmp_lv)->status |= SEG_RAID;
}
lv_set_visible(tmp_lv);
@@ -884,35 +1815,32 @@ static struct logical_volume *_alloc_image_component(struct logical_volume *lv,
}
/* Calculate absolute amount of metadata device extens based on @rimage_extents, @region_size and @extens_size */
-static uint32_t _raid_rmeta_extents(struct cmd_context *cmd,
- uint32_t rimage_extents, uint32_t region_size, uint32_t extent_size)
+static uint32_t _raid_rmeta_extents(struct cmd_context *cmd, uint32_t rimage_extents,
+ uint32_t region_size, uint32_t extent_size)
{
uint64_t bytes, regions, sectors;
-uint32_t r;
region_size = region_size ?: get_default_region_size(cmd);
- regions = rimage_extents * extent_size / region_size;
+ regions = (uint64_t) rimage_extents * extent_size / region_size;
/* raid and bitmap superblocks + region bytes */
bytes = 2 * 4096 + dm_div_up(regions, 8);
sectors = dm_div_up(bytes, 512);
-PFLA("sectors=%llu", (long long unsigned) sectors);
- r = dm_div_up(sectors, extent_size);
-PFLA("regions=%llu r=%llu", (long long unsigned) regions, (long long unsigned) r);
-return r;
+ return dm_div_up(sectors, extent_size);
}
/*
* Returns raid metadata device size _change_ in extents, algorithm from dm-raid ("raid" target) kernel code.
*/
uint32_t raid_rmeta_extents_delta(struct cmd_context *cmd,
- uint32_t rimage_extents_cur, uint32_t rimage_extents_new,
- uint32_t region_size, uint32_t extent_size)
+ uint32_t rimage_extents_cur, uint32_t rimage_extents_new,
+ uint32_t region_size, uint32_t extent_size)
{
uint32_t rmeta_extents_cur = _raid_rmeta_extents(cmd, rimage_extents_cur, region_size, extent_size);
uint32_t rmeta_extents_new = _raid_rmeta_extents(cmd, rimage_extents_new, region_size, extent_size);
- PFLA("rimage_extents_cur=%u rmeta_extents_cur=%u rimage_extents_new=%u rmeta_extents_new=%u region_size=%u extent_size=%u", rimage_extents_cur, rmeta_extents_cur, rimage_extents_new, rmeta_extents_new, region_size, extent_size);
+
+PFLA("rimage_extents_cur=%u rmeta_extents_cur=%u rimage_extents_new=%u rmeta_extents_new=%u region_size=%u extent_size=%u", rimage_extents_cur, rmeta_extents_cur, rimage_extents_new, rmeta_extents_new, region_size, extent_size);
/* Need minimum size on LV creation */
if (!rimage_extents_cur)
return rmeta_extents_new;
@@ -926,79 +1854,143 @@ uint32_t raid_rmeta_extents_delta(struct cmd_context *cmd,
/* Extending/reducing... */
return rmeta_extents_new > rmeta_extents_cur ?
- rmeta_extents_new - rmeta_extents_cur :
- rmeta_extents_cur - rmeta_extents_new;
+ rmeta_extents_new - rmeta_extents_cur :
+ rmeta_extents_cur - rmeta_extents_new;
}
/*
- * _alloc_rmeta_for_lv
- * @lv
+ * __alloc_rmeta_for_lv
+ * @data_lv
*
- * Allocate a RAID metadata device for the given LV (which is or will
+ * Allocate RAID metadata device for the given LV (which is or will
* be the associated RAID data device). The new metadata device must
* be allocated from the same PV(s) as the data device.
+ *
+ * HM FIXME: try coallocating on image LVs first, then use @allocate_pvs?
*/
-static int _alloc_rmeta_for_lv(struct logical_volume *data_lv,
- struct logical_volume **meta_lv)
+static int __alloc_rmeta_for_lv(struct logical_volume *data_lv,
+ struct logical_volume **meta_lv,
+ struct dm_list *allocate_pvs)
{
- struct dm_list allocatable_pvs;
+ int r = 1;
+ uint32_t rmeta_extents;
+ char *lv_name;
struct alloc_handle *ah;
- struct lv_segment *seg = first_seg(data_lv);
- char *p, base_name[NAME_LEN];
+ struct lv_segment *seg;
+ struct dm_list pvs;
+ struct segment_type *striped_segtype;
- dm_list_init(&allocatable_pvs);
+ RETURN_IF_LV_SEG_ZERO(data_lv, (seg = first_seg(data_lv)));
+ RETURN_IF_ZERO(meta_lv, "mate LV argument");
+ RETURN_IF_ZERO((striped_segtype = get_segtype_from_string(data_lv->vg->cmd, SEG_TYPE_NAME_STRIPED)),
+ "striped segtype");
- if (!seg_is_linear(seg)) {
- log_error(INTERNAL_ERROR "Unable to allocate RAID metadata "
- "area for non-linear LV, %s", data_lv->name);
- return 0;
- }
+ if (allocate_pvs) {
+ RETURN_IF_NONZERO(dm_list_empty(allocate_pvs), "allocate pvs listed");
- _check_and_init_region_size(data_lv);
+ } else {
+ allocate_pvs = &pvs;
+ dm_list_init(allocate_pvs);
+ if (!get_pv_list_for_lv(data_lv->vg->cmd->mem,
+ data_lv, allocate_pvs)) {
+ log_error("Failed to build list of PVs for %s", display_lvname(data_lv));
+ return 0;
+ }
+ }
- (void) dm_strncpy(base_name, data_lv->name, sizeof(base_name));
- if ((p = strstr(base_name, "_mimage_")) ||
- (p = strstr(base_name, "_rimage_")))
- *p = '\0';
+ if (!_check_and_init_region_size(data_lv))
+ return 0;
- if (!get_pv_list_for_lv(data_lv->vg->cmd->mem,
- data_lv, &allocatable_pvs)) {
- log_error("Failed to build list of PVs for %s/%s",
- data_lv->vg->name, data_lv->name);
+ if (!(lv_name = _top_level_lv_name(data_lv)))
return 0;
- }
- if (!(ah = allocate_extents(data_lv->vg, NULL, seg->segtype,
- 0, 1, 0,
- seg->region_size,
- _raid_rmeta_extents(data_lv->vg->cmd, data_lv->le_count,
- seg->region_size, data_lv->vg->extent_size),
- &allocatable_pvs, data_lv->alloc, 0, NULL)))
+PFLA("lv_name=%s", lv_name);
+ rmeta_extents = _raid_rmeta_extents(data_lv->vg->cmd, data_lv->le_count,
+ 0 /* region_size */, data_lv->vg->extent_size);
+ if (!(ah = allocate_extents(data_lv->vg, NULL, striped_segtype,
+ 0 /* stripes */, 1 /* mirrors */,
+ 0 /* log_count */ , 0 /* region_size */, rmeta_extents,
+ allocate_pvs, data_lv->alloc, 0, NULL)))
return_0;
- if (!(*meta_lv = _alloc_image_component(data_lv, base_name, ah, 0, RAID_META))) {
- alloc_destroy(ah);
- return_0;
- }
+ if ((*meta_lv = _alloc_image_component(data_lv, lv_name, ah, 0, RAID_META))) {
+ /*
+ * Wipe metadata device at beginning in order to avoid
+ * discovering a valid, but unrelated superblock in the kernel.
+ */
+ if (!_clear_lv(*meta_lv))
+ r = 0;
+
+ } else
+ r = 0;
alloc_destroy(ah);
+ return r;
+}
+
+static int _alloc_rmeta_for_lv(struct logical_volume *data_lv,
+ struct logical_volume **meta_lv,
+ struct dm_list *allocate_pvs)
+{
+ RETURN_IF_LV_SEG_ZERO(data_lv, first_seg(data_lv));
+ RETURN_IF_ZERO(meta_lv, "meta_lv argument");
+
+ if (__alloc_rmeta_for_lv(data_lv, meta_lv, NULL))
+ return 1;
+
+ return __alloc_rmeta_for_lv(data_lv, meta_lv, allocate_pvs);
+}
+
+/* HM Helper: allocate a metadata LV for @data_lv, set hidden and set @*meta_lv to it */
+static int _alloc_rmeta_for_lv_add_set_hidden(struct logical_volume *lv, uint32_t area_offset,
+ struct dm_list *allocate_pvs)
+{
+ struct lv_segment *seg;
+ struct dm_list meta_lvs;
+ struct lv_list lvl;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ dm_list_init(&meta_lvs);
+
+ if (!_alloc_rmeta_for_lv(seg_lv(seg, area_offset), &lvl.lv, allocate_pvs))
+ return 0;
+
+ dm_list_add(&meta_lvs, &lvl.list);
+ if (!_add_image_component_list(seg, 1, 0, &meta_lvs, area_offset))
+ return 0;
+
+ lv_set_hidden(lvl.lv);
+
return 1;
}
/*
+ * HM
+ *
* Allocate metadata devs for all @new_data_devs and link them to list @new_meta_lvs
*/
static int _alloc_rmeta_devs_for_rimage_devs(struct logical_volume *lv,
- struct dm_list *new_data_lvs, struct dm_list *new_meta_lvs)
+ struct dm_list *new_data_lvs,
+ struct dm_list *new_meta_lvs,
+ struct dm_list *allocate_pvs)
{
uint32_t a = 0, raid_devs = 0;
struct dm_list *l;
- struct lv_list *lvl, *lvl_array;
+ struct lv_list *lvl, *lvl1, *lvl_array;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(new_data_lvs, "new data LVs argument");
+ RETURN_IF_NONZERO(dm_list_empty(new_data_lvs), "new data LVs listed");
+ RETURN_IF_ZERO(new_meta_lvs, "new meta LVs argument");
+ RETURN_IF_NONZERO(!dm_list_empty(new_meta_lvs), "new meta LVs may be listed");
dm_list_iterate(l, new_data_lvs)
raid_devs++;
+PFLA("lv=%s raid_devs=%u", display_lvname(lv), raid_devs);
+
if (!raid_devs)
return 0;
@@ -1007,17 +1999,24 @@ static int _alloc_rmeta_devs_for_rimage_devs(struct logical_volume *lv,
dm_list_iterate_items(lvl, new_data_lvs) {
log_debug_metadata("Allocating new metadata LV for %s",
- lvl->lv->name);
- if (!_alloc_rmeta_for_lv(lvl->lv, &lvl_array[a].lv)) {
+ lvl->lv->name);
+ if (!_alloc_rmeta_for_lv(lvl->lv, &lvl_array[a].lv, allocate_pvs)) {
log_error("Failed to allocate metadata LV for %s in %s",
- lvl->lv->name, lv->vg->name);
+ lvl->lv->name, lv->vg->name);
return 0;
}
dm_list_add(new_meta_lvs, &lvl_array[a].list);
+
+ dm_list_iterate_items(lvl1, new_meta_lvs)
+ if (!_avoid_pvs_with_other_images_of_lv(lvl1->lv, allocate_pvs))
+ return 0;
+
a++;
}
+ _pvs_allow_allocation(allocate_pvs);
+
return 1;
}
@@ -1025,26 +2024,36 @@ static int _alloc_rmeta_devs_for_rimage_devs(struct logical_volume *lv,
* HM
*
* Allocate metadata devs for all data devs of an LV
- A*/
-static int _alloc_rmeta_devs_for_lv(struct logical_volume *lv, struct dm_list *meta_lvs)
+ */
+static int _alloc_rmeta_devs_for_lv(struct logical_volume *lv,
+ struct dm_list *meta_lvs,
+ struct dm_list *allocate_pvs)
{
- uint32_t count = lv_raid_image_count(lv), s;
+ uint32_t s;
struct lv_list *lvl_array;
struct dm_list data_lvs;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(meta_lvs, "mate LVs list argument");
+ RETURN_IF_NONZERO(seg->meta_areas, "meta LVs may exist");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
dm_list_init(&data_lvs);
-PFLA("seg->meta_areas=%p", seg->meta_areas);
- if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, count * sizeof(*lvl_array))))
+ if (!(seg->meta_areas = dm_pool_zalloc(lv->vg->vgmem,
+ seg->area_count * sizeof(*seg->meta_areas))))
+ return 0;
+
+ if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, seg->area_count * sizeof(*lvl_array))))
return_0;
- for (s = 0; s < count; s++) {
+ for (s = 0; s < seg->area_count; s++) {
lvl_array[s].lv = seg_lv(seg, s);
dm_list_add(&data_lvs, &lvl_array[s].list);
}
- if (!_alloc_rmeta_devs_for_rimage_devs(lv, &data_lvs, meta_lvs)) {
+ if (!_alloc_rmeta_devs_for_rimage_devs(lv, &data_lvs, meta_lvs, allocate_pvs)) {
log_error("Failed to allocate metadata LVs for %s", lv->name);
return 0;
}
@@ -1052,75 +2061,82 @@ PFLA("seg->meta_areas=%p", seg->meta_areas);
return 1;
}
+/* Get total area len of @lv, i.e. sum of area_len of all segments */
+static uint32_t _lv_total_rimage_len(struct logical_volume *lv)
+{
+ uint32_t s;
+ struct lv_segment *seg = first_seg(lv);
+
+ for (s = 0; s < seg->area_count; s++)
+ if (seg_type(seg, s) == AREA_LV)
+ return seg_lv(seg, s)->le_count;
+
+ return_0;
+}
+
/*
* Create @count new image component pairs for @lv and return them in
* @new_meta_lvs and @new_data_lvs allocating space if @allocate is set.
*
- * Use @pvs list for allocation if set.
+ * Use @pvs list for allocation if set, else just create empty image LVs.
*/
-static int _alloc_image_components(struct logical_volume *lv, int allocate,
- struct dm_list *pvs, uint32_t count,
- struct dm_list *new_meta_lvs,
- struct dm_list *new_data_lvs)
+static int _alloc_image_components(struct logical_volume *lv,
+ uint32_t count,
+ struct dm_list *meta_lvs,
+ struct dm_list *data_lvs,
+ struct dm_list *allocate_pvs)
{
+ int r = 0;
uint32_t s, extents;
- struct lv_segment *seg = first_seg(lv);
- const struct segment_type *segtype;
- struct alloc_handle *ah = NULL;
+ struct lv_segment *seg;
+ struct alloc_handle *ah;
struct dm_list *parallel_areas;
struct lv_list *lvl_array;
- if (!new_meta_lvs && !new_data_lvs)
- return 0;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(!meta_lvs && !data_lvs, "data and meta LVs list argument");
if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * count * sizeof(*lvl_array))))
return_0;
+ if (!_check_and_init_region_size(lv))
+ return 0;
PFL();
- if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 1)))
- return_0;
-PFL();
-
- _check_and_init_region_size(lv);
-
- if (seg_is_raid(seg))
- segtype = seg->segtype;
- else if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1)))
- return_0;
-PFL();
-
/*
- * The number of extents is based on the RAID type. For RAID1/10,
+ * The number of extents is based on the RAID type. For RAID1,
* each of the rimages is the same size - 'le_count'. However
- * for RAID 0/4/5/6, the stripes add together (NOT including the parity
- * devices) to equal 'le_count'. Thus, when we are allocating
+ * for RAID 0/4/5/6/10, the stripes add together (NOT including
+ * any parity devices) to equal 'le_count'. Thus, when we are allocating
* individual devices, we must specify how large the individual device
* is along with the number we want ('count').
*/
- if (allocate) {
- if (new_meta_lvs || new_data_lvs) {
- uint32_t stripes, mirrors, metadata_area_count = count;
+ if (allocate_pvs && !dm_list_empty(allocate_pvs)) {
+ uint32_t data_copies;
+ const struct segment_type *segtype;
+
+ if (!(parallel_areas = build_parallel_areas_from_lv(lv, 0, 1)))
+ return_0;
- /* Amount of extents for the rimage device(s) */
- if (segtype_is_striped_raid(seg->segtype)) {
- stripes = count;
- mirrors = 1;
- extents = count * (lv->le_count / _data_rimages_count(seg, seg->area_count));
+ /* Amount of extents for the rimage device(s) */
+ data_copies = count;
+ extents = _lv_total_rimage_len(lv);
-PFLA("stripes=%u lv->le_count=%u data_rimages_count=%u", stripes, lv->le_count, _data_rimages_count(seg, seg->area_count));
- } else {
- stripes = 1;
- mirrors = count;
- extents = lv->le_count;
- }
+PFLA("count=%u extents=%u lv->le_count=%u seg->area_count=%u seg->area_len=%u data_copies=%u", count, extents, lv->le_count, seg->area_count, seg->area_len, data_copies);
+
+ /* Use raid1 segtype for allocation to get images of the same size as the given ones in @lv */
+ if (!(segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)))
+ return_0;
+
+ if (!(ah = allocate_extents(lv->vg, NULL, segtype,
+ // data_copies, 1 /* stripes */, count /* metadata_area_count */,
+ 1 /* stripes */, data_copies, count /* metadata_area_count */,
+ 0 /* region_size */, extents,
+ allocate_pvs, lv->alloc, 0, parallel_areas)))
+ return_0;
+
+ } else
+ ah = NULL;
- if (!(ah = allocate_extents(lv->vg, NULL, segtype,
- stripes, mirrors, metadata_area_count,
- seg->region_size, extents,
- pvs, lv->alloc, 0, parallel_areas)))
- return_0;
- }
- }
PFLA("count=%u extents=%u", count, extents);
for (s = 0; s < count; s++) {
@@ -1133,134 +2149,344 @@ PFLA("count=%u extents=%u", count, extents);
/*
* If the segtype is raid0, we may avoid allocating metadata LVs
- * to accompany the data LVs by not passing in @new_meta_lvs
+ * to accompany the data LVs by not passing in @meta_lvs
*/
- if (new_meta_lvs) {
+ if (meta_lvs) {
if (!(lvl_array[s + count].lv = _alloc_image_component(lv, NULL, ah, s + count, RAID_META)))
- goto err;
+ goto_bad;
- dm_list_add(new_meta_lvs, &(lvl_array[s + count].list));
+ dm_list_add(meta_lvs, &lvl_array[s + count].list);
}
- if (new_data_lvs) {
+ if (data_lvs) {
if (!(lvl_array[s].lv = _alloc_image_component(lv, NULL, ah, s, RAID_IMAGE)))
- goto err;
+ goto_bad;
- dm_list_add(new_data_lvs, &(lvl_array[s].list));
+ dm_list_add(data_lvs, &lvl_array[s].list);
+
+ if (lvl_array[s].lv->le_count)
+ first_seg(lvl_array[s].lv)->reshape_len = _reshape_len_per_dev(seg);
}
}
-PFL();
+ r = 1;
+bad:
if (ah)
alloc_destroy(ah);
-PFL();
- return 1;
-err:
- alloc_destroy(ah);
- return 0;
+ return r;
}
-/* Factored out function to allocate an rmeta dev for a linear one */
-static int _alloc_rmeta_for_linear(struct logical_volume *lv, struct dm_list *meta_lvs)
+/*
+ * _raid_extract_images
+ * @lv
+ * @new_image_count: The absolute count of images (e.g. '2' for a 2-way mirror)
+ * @target_pvs: The list of PVs that are candidates for removal
+ * @shift: If set, use _shift_image_components().
+ * Otherwise, leave the [meta_]areas as AREA_UNASSIGNED and
+ * seg->area_count unchanged.
+ * @extracted_[meta|data]_lvs: The LVs removed from the array. If 'shift'
+ * is set, then there will likely be name conflicts.
+ * This function extracts _both_ portions of the indexed image. It
+ * does /not/ commit the results. (IOW, erroring-out requires no unwinding
+ * of operations.)
+ *
+ * Returns: 1 on success, 0 on failure
+ */
+static int _raid_extract_images(struct logical_volume *lv, uint32_t new_image_count,
+ struct dm_list *target_pvs, int shift,
+ struct dm_list *extracted_meta_lvs,
+ struct dm_list *extracted_data_lvs)
{
- struct lv_list *lvl;
+ int inc;
+ unsigned s, extract;
+ struct lv_list *lvl_pairs;
+ struct lv_segment *seg;
+ struct segment_type *error_segtype;
- if (!(lvl = dm_pool_alloc(lv->vg->vgmem, sizeof(*lvl)))) {
- log_error("Memory allocation failed");
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(target_pvs, "target pvs list argument");
+ RETURN_IF_ZERO(extracted_meta_lvs, "extracted meta LVs list argument");
+ RETURN_IF_ZERO(extracted_data_lvs, "extracted data LVs list argument");
+
+ extract = seg->area_count - new_image_count;
+
+ if ((s = dm_list_size(target_pvs)) < extract) {
+ log_error("Unable to remove %d images: Only %d device%s given.",
+ extract, s, s == 1 ? "" : "s");
return 0;
}
- if (!_alloc_rmeta_for_lv(lv, &lvl->lv))
+ log_verbose("Extracting %u image%s from %s", extract,
+ extract > 1 ? "s" : "", display_lvname(lv));
+
+ if (!(lvl_pairs = dm_pool_alloc(lv->vg->vgmem, 2 * extract * sizeof(*lvl_pairs))))
return_0;
+PFL();
- dm_list_add(meta_lvs, &lvl->list);
+ if (!(error_segtype = get_segtype_from_string(lv->vg->cmd, "error")))
+ return_0;
+PFL();
+
+ /*
+ * We make two passes over the devices.
+ * - The first pass we look for error LVs to handle them first
+ * - The second pass we look for PVs that match target_pvs and extract them
+ */
+ /* First pass */
+ for (s = seg->area_count; s-- && extract; ) {
+PFLA("s=%u", s);
+ /* Conditions for first pass */
+ if (!((seg->meta_areas && first_seg(seg_metalv(seg, s))->segtype == error_segtype) ||
+ first_seg(seg_lv(seg, s))->segtype == error_segtype))
+ continue;
+PFL();
+ if (!dm_list_empty(target_pvs) && target_pvs != &lv->vg->pvs) {
+ /*
+ * User has supplied a list of PVs, but we
+ * cannot honor that list because error LVs
+ * must come first.
+ */
+ log_error("%s has components with error targets"
+ " that must be removed first: %s.",
+ display_lvname(lv), display_lvname(seg_lv(seg, s)));
+ log_error("Try removing the PV list and rerun the command.");
+ return 0;
+ }
+
+PFL();
+ log_debug("LVs with error segments to be removed: %s %s",
+ display_lvname(seg_metalv(seg, s)), display_lvname(seg_lv(seg, s)));
+
+PFL();
+ if (!_extract_image_component_pair(seg, s, lvl_pairs, extracted_meta_lvs, extracted_data_lvs, 0))
+ return_0;
+
+ lvl_pairs += 2;
+ extract--;
+ }
+
+ /* Second pass */
+ for (s = seg->area_count; target_pvs && s-- && extract; ) {
+ /* Conditions for second pass */
+ if (!_raid_in_sync(lv) &&
+ (!seg_is_mirrored(seg) || !s)) {
+ log_error("Unable to extract %sRAID image"
+ " while RAID array is not in-sync",
+ seg_is_mirrored(seg) ? "primary " : "");
+ return 0;
+ }
+
+ inc = 0;
+
+#if 1
+ if (seg->meta_areas &&
+ lv_is_on_pvs(seg_metalv(seg, s), target_pvs)) {
+#else
+ /* HM FIXME: PARTIAL_LV not set for LVs on replacement PVs ("lvconvert --replace $PV $LV") */
+ if (seg->meta_areas &&
+ (seg_metalv(seg, s)->status & PARTIAL_LV) &&
+ lv_is_on_pvs(seg_metalv(seg, s), target_pvs)) {
+#endif
+ if (!_extract_image_component(seg, RAID_META, s, &lvl_pairs[0].lv, 0))
+ return_0;
+
+ dm_list_add(extracted_meta_lvs, &lvl_pairs[0].list);
+ inc++;
+ }
+
+#if 1
+ if (lv_is_on_pvs(seg_lv(seg, s), target_pvs)) {
+#else
+ /* HM FIXME: PARTIAL_LV not set for LVs on replacement PVs ("lvconvert --replace $PV $LV") */
+ if ((seg_lv(seg, s)->status & PARTIAL_LV) &&
+ lv_is_on_pvs(seg_lv(seg, s), target_pvs)) {
+#endif
+ if (!_extract_image_component(seg, RAID_IMAGE, s, &lvl_pairs[1].lv, 0))
+ return_0;
+
+ dm_list_add(extracted_data_lvs, &lvl_pairs[1].list);
+ inc++;
+ }
+
+ if (inc) {
+ lvl_pairs += 2;
+ extract--;
+ }
+ }
+
+ if (extract) {
+ log_error("Unable to extract enough images to satisfy request");
+ return 0;
+ }
+
+ if (shift && !_shift_image_components(seg)) {
+ log_error("Failed to shift and rename image components");
+ return 0;
+ }
return 1;
}
-/* Return reshape LEs per device for @seg */
-static uint32_t _reshape_les_per_dev(struct lv_segment *seg)
+/*
+ * Change the image count of the raid @lv to @new_image_count
+ * allocating from list @allocate_pvs and putting any removed
+ * LVs on the @removal_lvs list
+ */
+static int _lv_change_image_count(struct logical_volume *lv,
+ uint32_t new_image_count,
+ struct dm_list *allocate_pvs,
+ struct dm_list *removal_lvs)
{
- return seg->reshape_len / _data_rimages_count(seg, seg->area_count);
+ struct lv_segment *seg;
+ struct dm_list meta_lvs, data_lvs;
+ uint32_t old_image_count;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(new_image_count, "new image count");
+
+ old_image_count = seg->area_count;
+ if (old_image_count == new_image_count) {
+ log_print_unless_silent("%s already has image count of %d.",
+ display_lvname(lv), new_image_count);
+ return 1;
+ }
+
+ /* Check for maximum supported raid devices */
+ if (!_check_max_raid_devices(new_image_count))
+ return 0;
+
+ dm_list_init(&meta_lvs);
+ dm_list_init(&data_lvs);
+
+ if (old_image_count < new_image_count) {
+ /* Allocate additional meta and data LVs pair(s) */
+ RETURN_IF_ZERO(allocate_pvs , "allocate pvs list argument");
+ RETURN_IF_NONZERO(dm_list_empty(allocate_pvs), "allocate pvs listed");
+
+ log_debug_metadata("Allocating additional data and metadata LV pair%s for LV %s",
+ new_image_count - old_image_count > 1 ? "s" : "", display_lvname(lv));
+ if (!_alloc_image_components(lv, new_image_count - old_image_count,
+ &meta_lvs, &data_lvs, allocate_pvs)) {
+ log_error("Failed to allocate additional data and metadata LV pair for %s", display_lvname(lv));
+ return_0;
+ }
+
+ log_debug_metadata("Clearing newly allocated metadata LVs of %s", display_lvname(lv));
+ if (!_clear_lvs(&meta_lvs)) {
+ log_error("Failed to clear newly allocated metadata LVs of %s", display_lvname(lv));
+ return_0;
+ }
+
+ /* Grow areas arrays for metadata and data devs for adding ne image component pairs */
+ log_debug_metadata("Reallocating areas arrays of %s", display_lvname(lv));
+ if (!_realloc_meta_and_data_seg_areas(lv, new_image_count)) {
+ log_error("Relocation of areas arrays for %s failed", display_lvname(lv));
+ return_0;
+ }
+
+ log_debug_metadata("Adding new data and metadata LVs to %s", display_lvname(lv));
+ if (!_add_image_component_list(seg, 1, 0, &meta_lvs, old_image_count) ||
+ !_add_image_component_list(seg, 1, LV_REBUILD, &data_lvs, old_image_count)) {
+ log_error("Failed to add new data and metadata LVs to %s", display_lvname(lv));
+ return_0;
+ }
+
+ } else {
+ RETURN_IF_ZERO(removal_lvs, "removal LVs list argument");
+
+ /*
+ * Extract all image and any metadata LVs past new_image_count
+ *
+ * No need to reallocate data and metadata areas
+ * on reduction of image component pairs
+ */
+ log_debug_metadata("Extracting data and metadata LVs from %s", display_lvname(lv));
+ if (!_raid_extract_images(lv, new_image_count, allocate_pvs,
+ 0 /* Don't shift */, removal_lvs, removal_lvs)) {
+ log_error("Failed to extract data and metadata LVs from %s", display_lvname(lv));
+ return 0;
+ }
+ }
+
+ /* Must update area count after resizing it */
+ seg->area_count = new_image_count;
+
+ return 1;
}
/*
- * Relocate @out_of_place_les_per_disk from @lv's data images begin <-> end depending on @to_end
+ * Relocate @out_of_place_les_per_disk from @lv's data images begin <-> end depending on @where
*
- * to_end != 0 -> begin -> end
- * to_end == 0 -> end -> begin
+ * @where:
+ * alloc_begin -> end -> begin
+ * alloc_end -> begin -> end
*/
-static int _relocate_reshape_space(struct logical_volume *lv, int to_end)
+enum alloc_where { alloc_begin, alloc_end, alloc_anywhere, alloc_none };
+static int _lv_relocate_reshape_space(struct logical_volume *lv, enum alloc_where where)
{
- uint32_t le, end, s, len_per_dlv;
- struct logical_volume *dlv;
- struct lv_segment *seg = first_seg(lv);
- struct lv_segment *data_seg;
- struct dm_list *where;
+ uint32_t le, begin, end, s;
+ struct logical_volume *dlv;
+ struct dm_list *insert;
+ struct lv_segment *data_seg, *seg;
- if (!seg->reshape_len ||
- !(len_per_dlv = _reshape_les_per_dev(seg))) {
- log_error(INTERNAL_ERROR "No reshape space to relocate");
- return 0;
- }
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(_reshape_len_per_dev(seg), "reshape space to relocate");
-PFLA("seg->area_count=%u", seg->area_count);
/*
* Move the reshape LEs of each stripe (i.e. the data image sub lv)
- * in the first/last segments across to new segments of just use
- * them in case size fits
+ * in the first/last segment(s) across to the opposite end of the
+ * address space
*/
+PFLA("seg->area_count=%u", seg->area_count);
for (s = 0; s < seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV, "sub lv");
dlv = seg_lv(seg, s);
- /* Move to the end -> start from 0 and end with reshape LEs */
- if (to_end) {
- le = 0;
- end = len_per_dlv;
-
- /* Move to the beginning -> from "end - reshape LEs" to end */
- } else {
- le = dlv->le_count - len_per_dlv;
+ switch (where) {
+ case alloc_begin:
+ /* Move to the beginning -> start moving to the beginning from "end - reshape LEs" to end */
+ begin = dlv->le_count - _reshape_len_per_dev(seg);
end = dlv->le_count;
+ break;
+ case alloc_end:
+ /* Move to the end -> start moving to the end from 0 and end with reshape LEs */
+ begin = 0;
+ end = _reshape_len_per_dev(seg);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "bogus reshape space reallocation request [%d]", where);
+ return 0;
}
-PFLA("len_per_dlv=%u le=%u end=%u", len_per_dlv, le, end);
-dm_list_iterate_items(data_seg, &dlv->segments)
-PFLA("1. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le, data_seg->len, seg_pe(data_seg, 0));
-
-
/* Ensure segment boundary at begin/end of reshape space */
- if (!lv_split_segment(dlv, to_end ? end : le))
+ if (!lv_split_segment(dlv, begin ?: end))
return_0;
-dm_list_iterate_items(data_seg, &dlv->segments)
-PFLA("2. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le, data_seg->len, seg_pe(data_seg, 0));
-
- /* Find start segment */
- data_seg = find_seg_by_le(dlv, le);
+ /* Select destination to move to (begin/end) */
+ insert = begin ? dlv->segments.n : &dlv->segments;
+ RETURN_IF_ZERO((data_seg = find_seg_by_le(dlv, begin)), "data segment found");
+ le = begin;
while (le < end) {
- struct lv_segment *n = dm_list_item(data_seg->list.n, struct lv_segment);
+ struct dm_list *n = data_seg->list.n;
le += data_seg->len;
- /* select destination to move to (begin/end) */
- where = to_end ? &dlv->segments : dlv->segments.n;
- dm_list_move(where, &data_seg->list);
- data_seg = n;
- }
-dm_list_iterate_items(data_seg, &dlv->segments)
-PFLA("3. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le, data_seg->len, seg_pe(data_seg, 0));
+ dm_list_move(insert, &data_seg->list);
+
+ /* If moving to the begin, adjust insertion point so that we don't reverse order */
+ if (begin)
+ insert = data_seg->list.n;
+
+ data_seg = dm_list_item(n, struct lv_segment);
+ }
- /* Adjust starting LEs of data lv segments after move */;
le = 0;
dm_list_iterate_items(data_seg, &dlv->segments) {
+ data_seg->reshape_len = le ? 0 : _reshape_len_per_dev(seg);
data_seg->le = le;
le += data_seg->len;
}
-dm_list_iterate_items(data_seg, &dlv->segments)
-PFLA("4. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le, data_seg->len, seg_pe(data_seg, 0));
-
}
return 1;
@@ -1281,7 +2507,7 @@ PFLA("4. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le
* - we have to reshape forwards
* (true for adding disks to a raid set) ->
* add extent to each component image upfront
- * or move an exisiting one at the end across;
+ * or move an existing one at the end across;
* kernel will set component devs data_offset to
* the passed in one and new_data_offset to 0,
* i.e. the data starts at offset 0 after the reshape
@@ -1289,7 +2515,7 @@ PFLA("4. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le
* - we have to reshape backwards
* (true for removing disks form a raid set) ->
* add extent to each component image by the end
- * or use already exisiting one from a previous reshape;
+ * or use already existing one from a previous reshape;
* kernel will leave the data_offset of each component dev
* at 0 and set new_data_offset to the passed in one,
* i.e. the data will be at offset new_data_offset != 0
@@ -1310,69 +2536,98 @@ PFLA("4. dlv=%s data_seg->le=%u data_seg->len=%u pe=%u", dlv->name, data_seg->le
*
* Does not write metadata!
*/
-enum alloc_where { alloc_begin, alloc_end, alloc_anywhere };
static int _lv_alloc_reshape_space(struct logical_volume *lv,
enum alloc_where where,
+ enum alloc_where *where_it_was,
struct dm_list *allocate_pvs)
{
/* Reshape LEs per disk minimum one MiB for now... */
- uint32_t out_of_place_les_per_disk = max(2048ULL / (unsigned long long) lv->vg->extent_size, 1ULL);
+ uint32_t out_of_place_les_per_disk = (uint32_t) max(2048ULL / (unsigned long long) lv->vg->extent_size, 1ULL);
uint64_t data_offset, dev_sectors;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
/* Get data_offset and dev_sectors from the kernel */
+ /* FIXME: dev_sectors superfluous? */
if (!lv_raid_offset_and_sectors(lv, &data_offset, &dev_sectors)) {
- log_error("Can't get data offset and dev size for %s/%s from kernel",
- lv->vg->name, lv->name);
+ log_error("Can't get data offset and dev size for %s from kernel",
+ display_lvname(lv));
return 0;
}
+
PFLA("data_offset=%llu dev_sectors=%llu seg->reshape_len=%u out_of_place_les_per_disk=%u lv->le_count=%u", (unsigned long long) data_offset, (unsigned long long) dev_sectors, seg->reshape_len, out_of_place_les_per_disk, lv->le_count);
/*
* Check if we have reshape space allocated or extend the LV to have it
*
- * fist_seg(lv)->reshape_len (only segment of top level raid LV)
+ * first_seg(lv)->reshape_len (only segment of top level raid LV)
* is accounting for the data rimages so that unchanged
- * lv_extend()/lv_reduce() can be used to allocate/free
+ * lv_extend()/lv_reduce() can be used to allocate/free,
+ * because seg->len etc. still holds the whole size as before
+ * including the reshape space
*/
- if (!seg->reshape_len) {
- uint32_t reshape_len = out_of_place_les_per_disk * _data_rimages_count(seg, seg->area_count);
-
- if (!lv_extend(lv, seg->segtype,
- _data_rimages_count(seg, seg->area_count),
- seg->stripe_size,
- 1, seg->region_size,
- reshape_len /* # of reshape LEs to add */,
- allocate_pvs, lv->alloc, 0))
+ if (_reshape_len_per_dev(seg)) {
+ if (!_lv_set_reshape_len(lv, _reshape_len_per_dev(seg)))
return 0;
- /* Store the allocated reshape length per LV in the only segment of the top-level RAID LV */
- seg->reshape_len = reshape_len;
+ } else {
+ uint32_t data_rimages = _data_rimages_count(seg, seg->area_count);
+ uint32_t reshape_len = out_of_place_les_per_disk * data_rimages;
+ uint32_t prev_rimage_len = _lv_total_rimage_len(lv);
+ uint64_t lv_size = lv->size;
+
+ RETURN_IF_ZERO(allocate_pvs , "allocate pvs list argument");
+ RETURN_IF_NONZERO(dm_list_empty(allocate_pvs), "allocate pvs listed");
+
+PFLA("lv->le_count=%u seg->len=%u seg->area_len=%u", lv->le_count, seg->len, seg->area_len);
+PFLA("data_rimages=%u area_count=%u reshape_len=%u", data_rimages, seg->area_count, reshape_len);
+PFLA("first_seg(seg_lv(seg, 0)->reshape_len=%u", first_seg(seg_lv(seg, 0))->reshape_len);
+PFLA("first_seg(seg_lv(seg, 0)->len=%u", first_seg(seg_lv(seg, 0))->len);
+
+ if (!lv_extend(lv, seg->segtype, data_rimages,
+ seg->stripe_size, 1, seg->region_size,
+ reshape_len /* # of reshape LEs to add */,
+ allocate_pvs, lv->alloc, 0)) {
+ log_error("Failed to allocate out-of-place reshape space for %s.",
+ display_lvname(lv));
+ return 0;
+ }
+PFL();
+ lv->size = lv_size;
+
+ /* pay attention to lv_extend maybe having allocated more because of layout specific rounding */
+ if (!_lv_set_reshape_len(lv, _lv_total_rimage_len(lv) - prev_rimage_len))
+ return 0;
}
- /* Don't set any offset in case we fail relocating reshape space */
+ /* Preset data offset in case we fail relocating reshape space below */
seg->data_offset = 0;
+ if (where_it_was)
+ *where_it_was = where;
+
/*
* Handle reshape space relocation
*/
+PFLA("data_offset=%llu", (unsigned long long) data_offset);
switch (where) {
case alloc_begin:
- /* Kernel says we have it at the end -> relocate it to the begin */
- if (!data_offset &&
- !_relocate_reshape_space(lv, 0))
+ /* If kernel says data is at data_offset == 0 -> relocate reshape space at the end to the begin */
+ if (!data_offset && !_lv_relocate_reshape_space(lv, where))
return_0;
+
break;
case alloc_end:
- /* Kernel says we have it at the beginning -> relocate it to the end */
- if (data_offset &&
- !_relocate_reshape_space(lv, 1))
+ /* If kernel says data is at data_offset > 0 -> relocate reshape space at the begin to the end */
+ if (data_offset && !_lv_relocate_reshape_space(lv, where))
return_0;
+
break;
case alloc_anywhere:
- /* We don't care were the space is */
+ /* We don't care where the space is, kernel will just toggle data_offset accordingly */
break;
default:
@@ -1380,164 +2635,261 @@ PFLA("data_offset=%llu dev_sectors=%llu seg->reshape_len=%u out_of_place_les_per
return 0;
}
+ if (where_it_was && where != alloc_anywhere)
+ *where_it_was = data_offset ? alloc_begin : alloc_end;
+
/* Inform kernel about the reshape length in sectors */
- seg->data_offset = _reshape_les_per_dev(seg) * lv->vg->extent_size;
+ /* FIXME: avoid seg->data_offset used to put it on the table line in favour of seg->reshape_len? */
+ seg->data_offset = _reshape_len_per_dev(seg) * lv->vg->extent_size;
+PFLA("seg->data_offset=%llu", (unsigned long long) seg->data_offset);
- /* At least try merging segments */
- return lv_merge_segments(lv);
+ return _lv_set_image_lvs_start_les(lv);
}
-/* Remove any reshape space from the data lvs of @lv */
-static int _lv_free_reshape_space(struct logical_volume *lv)
+/* Remove any reshape space from the data LVs of @lv */
+static int _lv_free_reshape_space_with_status(struct logical_volume *lv, enum alloc_where *where)
{
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (_reshape_len_per_dev(seg)) {
+ uint32_t total_reshape_len = _reshape_len_per_lv(lv);
+PFL();
+
+ /* The allocator will have added times #data_copies stripes, so we need to lv_reduce() less visible size */
+ if (seg_is_any_raid10(seg)) {
+ RETURN_IF_NONZERO(total_reshape_len % seg->data_copies, "divisibility by # of data copies");
+ total_reshape_len /= seg->data_copies;
+ }
- if (seg->reshape_len) {
/*
- * Got reshape space on request to free it ->
- * if at the beginning of the data LVs remap it
- * to the end in order to lvreduce it
+ * Got reshape space on request to free it.
+ *
+ * If it happens to be at the beginning of
+ * the data LVs, remap it to the end in order
+ * to be able to free it via lv_reduce().
*/
- if (!_lv_alloc_reshape_space(lv, alloc_end, NULL))
+ if (!_lv_alloc_reshape_space(lv, alloc_end, where, NULL))
return_0;
- if (!lv_reduce(lv, seg->reshape_len))
+ if (!lv_reduce(lv, total_reshape_len))
return_0;
- seg->reshape_len = 0;
- }
-
- return 1;
-}
-
+ if (!_lv_set_reshape_len(lv, 0))
+ return 0;
-/* Correct LV names for @data_lvs in case of a linear @lv */
-static int _correct_data_lv_names(struct logical_volume *lv, uint32_t count, struct dm_list *data_lvs)
-{
- struct dm_list *l;
- struct lv_list *lvl, *lvl_n;
+ seg->data_offset = 0;
- dm_list_iterate(l, data_lvs) {
- lvl = dm_list_item(l, struct lv_list);
-
- if (l == dm_list_last(data_lvs)) {
- if (!(lvl->lv->name = _generate_raid_name(lv, "rimage", count)))
- return_0;
- continue;
- }
-
- lvl_n = dm_list_item(l->n, struct lv_list);
- lvl->lv->name = lvl_n->lv->name;
- }
+ } else if (where)
+ *where = alloc_none;
return 1;
}
-/* Return length of unsigned @idx as a string */
-static unsigned _unsigned_str_len(unsigned idx)
+static int _lv_free_reshape_space(struct logical_volume *lv)
{
- unsigned r = 0;
-
- do {
- r++;
- } while ((idx /= 10));
-
- return r;
+ return _lv_free_reshape_space_with_status(lv, NULL);
}
-/* Create an rimage string suffix with @idx appended */
-static const char *_generate_rimage_suffix(struct logical_volume *lv, unsigned idx)
+/*
+ * Convert @lv to raid1 by making the linear lv
+ * the one data sub LV of a new top-level lv
+ */
+static struct lv_segment *_convert_lv_to_raid1(struct logical_volume *lv, const char *suffix)
{
- const char *type = "_rimage";
- char *suffix;
- size_t len = strlen(type) + _unsigned_str_len(idx) + 1;
+ int thin;
+ struct lv_segment *seg, *seg1;;
+ uint32_t le_count;
+ uint64_t flags;
- if (!(suffix = dm_pool_alloc(lv->vg->vgmem, len))) {
- log_error("Failed to allocate name suffix.");
- return 0;
- }
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+ thin = lv_is_thin_volume(lv);
- if (dm_snprintf(suffix, len, "%s%u", type, idx) < 0)
- return_0;
+ le_count = lv->le_count - _reshape_len_per_lv(lv);
+ flags = RAID | LVM_READ | (lv->status & LVM_WRITE);
- return suffix;
-}
+ log_debug_metadata("Inserting layer LV on top of %s", display_lvname(lv));
+ if (!insert_layer_for_lv(lv->vg->cmd, lv, flags, suffix))
+ return NULL;
-/* Insert RAID layer on top of @lv with suffix counter @idx */
-static int _insert_raid_layer_for_lv(struct logical_volume *lv, const char *sfx, unsigned idx)
-{
- uint64_t flags = RAID | LVM_READ | LVM_WRITE;
- const char *suffix = sfx ?: _generate_rimage_suffix(lv, idx);
+ /* First segment has changed because of layer insertion */
+ RETURN_IF_ZERO((seg = first_seg(lv)), "lv raid segment after layer insertion");
+ RETURN_IF_ZERO(seg_lv(seg, 0), "first sub LV");
+ RETURN_IF_ZERO((seg1 = first_seg(seg_lv(seg, 0))), "first sub LV segment");
- if (!insert_layer_for_lv(lv->vg->cmd, lv, flags, suffix))
- return 0;
+ seg->status |= SEG_RAID;
+PFLA("seg1->lv=%s", display_lvname(seg1->lv));
+ seg1->lv->status |= RAID_IMAGE | flags;
+ seg1->lv->status &= ~LV_REBUILD;
- seg_lv(first_seg(lv), 0)->status |= RAID_IMAGE | flags;
+ /* Correct thin volume flags not covered by insert_layer_for_lv() */
+ if (thin) {
+ seg1->status |= SEG_THIN_VOLUME;
+ seg1->lv->status |= THIN_VOLUME;
+ lv->status &= ~THIN_VOLUME;
+ }
- return 1;
-}
+ /* Set raid1 segtype, so that the following image allocation works */
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)))
+ return NULL;
-/* Convert linear @lv to raid1 */
-static int _convert_linear_to_raid(struct logical_volume *lv)
-{
- struct lv_segment *seg = first_seg(lv);
- uint32_t region_size = seg->region_size;
- uint32_t stripe_size = seg->stripe_size;
+ lv->status |= RAID;
+ lv->le_count = seg->len = seg->area_len = le_count;
- if (!_insert_raid_layer_for_lv(lv, "_rimage_0", 0))
+ if (!_check_and_init_region_size(lv))
return 0;
- /* Segment has changed */
- seg = first_seg(lv);
- seg_lv(seg, 0)->status |= RAID_IMAGE | LVM_READ | LVM_WRITE;
- seg->region_size = region_size;
- seg->stripe_size = stripe_size;
- _check_and_init_region_size(lv);
-
- return 1;
+ return seg;
}
/* Reset any rebuild or reshape disk flags on @lv, first segment already passed to the kernel */
-static int _reset_flags_passed_to_kernel(struct logical_volume *lv, int *flag_cleared)
+static int _reset_flags_passed_to_kernel(struct logical_volume *lv, int write_requested)
{
+ int flags_cleared = 0;
uint32_t s;
- struct lv_segment *seg = first_seg(lv);
+ struct logical_volume *slv;
+ struct lv_segment *seg;
+ uint64_t reset_flags = LV_REBUILD | LV_RESHAPE_DELTA_DISKS_PLUS | LV_RESHAPE_DELTA_DISKS_MINUS;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
- *flag_cleared = 0;
for (s = 0; s < seg->area_count; s++) {
- if ((seg_metalv(seg, s)->status & (LV_REBUILD|LV_RESHAPE_DELTA_DISKS_PLUS|LV_RESHAPE_DELTA_DISKS_MINUS)) ||
- (seg_lv(seg, s)->status & (LV_REBUILD|LV_RESHAPE_DELTA_DISKS_PLUS|LV_RESHAPE_DELTA_DISKS_MINUS))) {
- seg_metalv(seg, s)->status &= ~(LV_REBUILD|LV_RESHAPE_DELTA_DISKS_PLUS|LV_RESHAPE_DELTA_DISKS_MINUS);
- seg_lv(seg, s)->status &= ~(LV_REBUILD|LV_RESHAPE_DELTA_DISKS_PLUS|LV_RESHAPE_DELTA_DISKS_MINUS);
+ if (seg_type(seg, s) == AREA_PV)
+ continue;
- *flag_cleared = 1;
+ RETURN_IF_ZERO((slv = seg_lv(seg, s)), "sub LV");
+
+ if (slv->status & LV_RESHAPE_DELTA_DISKS_MINUS) {
+ slv->status |= LV_RESHAPE_REMOVED;
+ if (seg->meta_areas) {
+ RETURN_IF_ZERO(seg_metatype(seg, s) == AREA_LV, "sub meta lv");
+ seg_metalv(seg, s)->status |= LV_RESHAPE_REMOVED;
+ }
+ }
+
+ if (slv->status & reset_flags) {
+ slv->status &= ~reset_flags;
+ flags_cleared++;
}
}
+ /* Reset passed in data offset (reshaping) */
if (seg->data_offset) {
seg->data_offset = 0;
- *flag_cleared = 1;
+ flags_cleared++;
}
- if (*flag_cleared) {
+ if (write_requested && flags_cleared) {
if (!vg_write(lv->vg) || !vg_commit(lv->vg)) {
- log_error("Failed to clear flags for %s/%s components",
- lv->vg->name, lv->name);
+ log_error("Failed to clear flags for %s components in metadata",
+ display_lvname(lv));
return 0;
}
- backup(lv->vg);
+ if (!backup(lv->vg)) {
+ log_error("Failed to backup metadata for VG %s", lv->vg->name);
+ return 0;
+ }
}
return 1;
}
+/*
+ * HM Helper:
+ *
+ * Updates and reloads metadata, clears any flags passed to the kerne,l
+ * eliminates any residual LVs and updates and reloads metadata again.
+ *
+ * @lv mandatory argument, rest variable:
+ *
+ * @lv [ @removal_lvs [ @fn_pre_on_lv @fn_pre_data [ @fn_post_on_lv @fn_post_data ] ] ]
+ *
+ * Run optional variable args function fn_pre_on_lv with fn_pre_data on @lv before first metadata update
+ * and optional variable args function fn_post_on_lv with fn_post_data on @lv before second metadata update
+ *
+ * This minimaly involves 2 metadata commits or more, depending on
+ * pre and post functions carrying out any additional ones or not.
+ *
+ * WARNING: needs to be called with at least 3 arguments to suit va_list processing!
+ */
+typedef int (*fn_on_lv_t)(struct logical_volume *lv, void *data);
+static int _lv_update_and_reload(struct logical_volume *lv, int recurse);
+static int _vg_write_lv_suspend_vg_commit(struct logical_volume *lv);
+static int _lv_update_reload_fns_reset_eliminate_lvs(struct logical_volume *lv, ...)
+{
+ int r = 0;
+ va_list ap;
+ fn_on_lv_t fn_pre_on_lv = NULL, fn_post_on_lv;
+ void *fn_pre_data, *fn_post_data;
+ struct lv_segment *seg;
+ struct dm_list *removal_lvs;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ va_start(ap, lv);
+ removal_lvs = va_arg(ap, struct dm_list *);
+ if ((fn_post_on_lv = va_arg(ap, fn_on_lv_t))) {
+ fn_post_data = va_arg(ap, void *);
+ if ((fn_pre_on_lv = va_arg(ap, fn_on_lv_t)))
+ fn_pre_data = va_arg(ap, void *);
+ }
+
+ /* Call any @fn_pre_on_lv before the first update and reload call (e.g. to rename LVs) */
+ if (fn_pre_on_lv && !fn_pre_on_lv(lv, fn_pre_data))
+ goto err;
+PFL();
+ /* Update metadata and reload mappings including flags (e.g. LV_REBUILD) */
+ if (!lv_update_and_reload(lv))
+ goto err;
+PFL();
+ /* Eliminate any residual LV and don't commit the metadata */
+ if (!_eliminate_extracted_lvs_optional_write_vg(lv->vg, removal_lvs, 0))
+ goto err;
+PFL();
+ /*
+ * Now that any 'REBUILD' or 'RESHAPE_DELTA_DISKS' etc.
+ * has/have made its/their way to the kernel, we must
+ * remove the flag(s) so that the individual devices are
+ * not rebuilt/reshaped/taken over upon every activation.
+ *
+ * Writes and commits metadata if any flags have been reset
+ * and if successful, performs metadata backup.
+ */
+ /* Avoid vg write+commit in vvv and do it here _once_ in case of fn_on_lv() being called */
+ log_debug_metadata("Clearing any flags for %s passed to the kernel", display_lvname(lv));
+ if (!_reset_flags_passed_to_kernel(lv, 0))
+ goto err;
+PFL();
+ /* Call any @fn_pre_on_lv before the second update and reload call (e.g. to rename LVs back) */
+ if (fn_post_on_lv && !fn_post_on_lv(lv, fn_post_data))
+ goto err;
+PFL();
+ log_debug_metadata("Updating metadata and reloading mappings for %s", display_lvname(lv));
+ if (!lv_update_and_reload(lv)) {
+ log_error(INTERNAL_ERROR "Update and reload of LV %s failed", display_lvname(lv));
+ goto err;
+ }
+PFL();
+ r = 1;
+err:
+ va_end(ap);
+ return r;
+}
+
/* Area reorder helper: swap 2 LV segment areas @a1 and @a2 */
-static void _swap_areas(struct lv_segment_area *a1, struct lv_segment_area *a2)
+static int _swap_areas(struct lv_segment_area *a1, struct lv_segment_area *a2)
{
- struct lv_segment_area tmp = *a1;
+ struct lv_segment_area tmp;
+#if 0
char *tmp_name;
+#endif
+
+ RETURN_IF_ZERO(a1, "first segment area argument");
+ RETURN_IF_ZERO(a2, "first segment area argument");
+ tmp = *a1;
*a1 = *a2;
*a2 = tmp;
#if 0
@@ -1546,295 +2898,173 @@ static void _swap_areas(struct lv_segment_area *a1, struct lv_segment_area *a2)
a1->u.lv.lv->name = a2->u.lv.lv->name;
a2->u.lv.lv->name = tmp_name;
#endif
+ return 1;
}
/*
- * Reorder the areas in the first segment of @lv to suit raid10/raid0 layout
+ * Reorder the areas in the first segment of @seg to suit raid10_{near,far}/raid0 layout.
*
- * Examples with 6 disks indexed 0..5:
+ * raid10_{near,far} can only be reordered to raid0 if !mod(#total_devs, #mirrors)
*
- * raid0 (012345) -> raid10 (031425) order
- * idx 024135
- * raid10 (012345) -> raid0 (024135/135024) order depending on mirror selection
- * idx 031425
+ * Examples with 6 disks indexed 0..5 with 3 stripes:
+ * raid0 (012345) -> raid10_{near,far} (031425) order
+ * idx 024135
+ * raid10_{near,far} (012345) -> raid0 (024135/135024) order depending on mirror leg selection (TBD)
+ * idx 031425
* _or_ (variations possible)
- * idx 304152
+ * idx 304152
*
- */
-static void _lv_raid10_reorder_seg_areas(struct logical_volume *lv, int to_raid10)
-{
- unsigned s, ss, xchg;
- struct lv_segment *seg = first_seg(lv);
- uint32_t half_areas = seg->area_count / 2;
- short unsigned idx[seg->area_count];
-unsigned i = 0;
-
- /* Set up positional index array */
- if (to_raid10)
- for (s = 0; s < seg->area_count; s++)
- idx[s] = s < half_areas ? s * 2 : (s - half_areas) * 2 + 1;
- else
-#if 1
- for (s = 0; s < seg->area_count; s++)
- idx[s < half_areas ? s * 2 : (s - half_areas) * 2 + 1] = s;
-#else
- /* This selection casues image name suffixes to start > 0 and needs names shifting! */
- for (s = 0; s < seg->area_count; s++)
- idx[s < half_areas ? s * 2 + 1 : (s - half_areas) * 2] = s;
-#endif
- /* Sort areas */
- do {
- xchg = seg->area_count;
-
- for (s = 0; s < seg->area_count ; s++)
- if (idx[s] == s)
- xchg--;
-
- else {
- _swap_areas(seg->areas + s, seg->areas + idx[s]);
- _swap_areas(seg->meta_areas + s, seg->meta_areas + idx[s]);
- ss = idx[idx[s]];
- idx[idx[s]] = idx[s];
- idx[s] = ss;
- }
-i++;
- } while (xchg);
-
-PFLA("%d iterations", i);
-for (s = 0; s < seg->area_count; s++)
-PFLA("seg_lv(seg, %u)->name=%s", s, seg_lv(seg, s)->name);
-}
-
-/*
- * Add raid rmeta/rimage pair(s) to @lv to get to
- * absolute @new_count using @pvs to allocate from
+ * Examples 3 stripes with 9 disks indexed 0..8 to create a 3 striped raid0 with 3 data_copies per leg:
+ * vvv
+ * raid0 (012345678) -> raid10 (034156278) order
+ * v v v
+ * raid10 (012345678) -> raid0 (036124578) order depending on mirror leg selection (TBD)
*
*/
-static int _raid_add_images(struct logical_volume *lv,
- const struct segment_type *segtype,
- uint32_t new_count, struct dm_list *pvs)
+enum raid0_raid10_conversion { reorder_to_raid10_near, reorder_from_raid10_near };
+static int _reorder_raid10_near_seg_areas(struct lv_segment *seg, enum raid0_raid10_conversion conv)
{
- struct lv_segment *seg = first_seg(lv);
- int add_all_rmeta = 0, linear, flag_cleared;
- int reshape_disks = (seg_is_striped_raid(seg) && segtype && is_same_level(seg->segtype, segtype));
- uint32_t s;
- uint32_t old_count = lv_raid_image_count(lv);
- uint32_t count = new_count - old_count;
- uint64_t lv_flags = LV_REBUILD;
- struct dm_list data_lvs, meta_lvs;
-
-PFLA("seg->meta_areas=%p", seg->meta_areas);
- segtype = segtype ?: seg->segtype;
-PFLA("segtype->name=%s seg->segtype->name=%s, seg->area_count=%u new_count=%u old_count=%u count=%u", segtype->name, seg->segtype->name, seg->area_count, new_count, old_count, count);
-
- if (!(linear = seg_is_linear(seg)) &&
- !seg_is_raid(seg)) {
- log_error("Unable to add RAID images to %s of segment type %s",
- lv->name, lvseg_name(seg));
+ unsigned dc, idx1, idx1_sav, idx2, s, ss, str, xchg;
+ uint32_t *idx, stripes;
+ unsigned i = 0;
+
+ /* Internal sanity checks... */
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_ZERO(conv == reorder_to_raid10_near || conv == reorder_from_raid10_near,
+ "supported reordering requested");
+ RETURN_IF_NONZERO((conv == reorder_to_raid10_near && !(seg_is_striped(seg) || seg_is_any_raid0(seg))) ||
+ (conv == reorder_from_raid10_near && !seg_is_raid10_near(seg)),
+ "proper segment types to reorder");
+ RETURN_IF_ZERO(seg->data_copies > 1, "#data_copies > 1");
+ if ((stripes = seg->area_count) % seg->data_copies) {
+ log_error("Can't convert raid10_near LV %s with number of stripes not divisable by number of data copies",
+ display_lvname(seg->lv));
return 0;
}
-PFL();
- if (lv->status & LV_NOTSYNCED) {
- log_error("Can't add image to out-of-sync RAID LV:"
- " use 'lvchange --resync' first.");
- return 0;
- }
+ stripes /= seg->data_copies;
+PFLA("seg->data_copies=%u stripes=%u", seg->data_copies, stripes);
- dm_list_init(&data_lvs); /* For data image additions */
- dm_list_init(&meta_lvs); /* For metadata image additions */
+ if (!(idx = dm_pool_zalloc(seg_lv(seg, 0)->vg->vgmem, seg->area_count * sizeof(*idx))))
+ return 0;
-PFLA("seg->meta_areas=%p", seg->meta_areas);
- /*
- * If the segtype is linear, then we must allocate a metadata
- * LV to accompany it.
- */
- if (linear) {
-PFL();
+ /* Set up positional index array */
+ switch (conv) {
+ case reorder_to_raid10_near:
/*
- * A complete resync will be done because of
- * the new raid1 set, no need to mark each sub-lv
+ * raid0 (012 345) with 3 stripes/2 data copies -> raid10 (031425)
+ *
+ * _reorder_raid10_near_seg_areas 2137 idx[0]=0
+ * _reorder_raid10_near_seg_areas 2137 idx[1]=2
+ * _reorder_raid10_near_seg_areas 2137 idx[2]=4
+ * _reorder_raid10_near_seg_areas 2137 idx[3]=1
+ * _reorder_raid10_near_seg_areas 2137 idx[4]=3
+ * _reorder_raid10_near_seg_areas 2137 idx[5]=5
*
- * -> reset rebuild flag
+ * raid0 (012 345 678) with 3 stripes/3 data copies -> raid10 (036147258)
*
- * Need to add an rmeta device to the
- * given linear device as well
+ * _reorder_raid10_near_seg_areas 2137 idx[0]=0
+ * _reorder_raid10_near_seg_areas 2137 idx[1]=3
+ * _reorder_raid10_near_seg_areas 2137 idx[2]=6
+ *
+ * _reorder_raid10_near_seg_areas 2137 idx[3]=1
+ * _reorder_raid10_near_seg_areas 2137 idx[4]=4
+ * _reorder_raid10_near_seg_areas 2137 idx[5]=7
+ * _reorder_raid10_near_seg_areas 2137 idx[6]=2
+ * _reorder_raid10_near_seg_areas 2137 idx[7]=5
+ * _reorder_raid10_near_seg_areas 2137 idx[8]=8
*/
+ /* idx[from] = to */
+ for (s = ss = 0; s < seg->area_count; s++)
+ if (s < stripes)
+ idx[s] = s * seg->data_copies;
- /* HM FIXME: avoid lv_flags altogether and better always define rebuild settings? */
- lv_flags = 0;
- add_all_rmeta = 1;
-
- /* Allocate an rmeta device to pair with the linear image */
- if (!_alloc_rmeta_for_linear(lv, &meta_lvs))
- return 0;
-
- /*
- * In case this is a conversion from raid0 to raid4/5/6,
- * add the metadata image LVs for the raid0 rimage LVs
- * presumably they don't exist already.
- */
- } else if (!seg->meta_areas) {
- add_all_rmeta = 1;
-
- if (!_alloc_rmeta_devs_for_lv(lv, &meta_lvs))
- return 0;
- }
-
+ else {
+ uint32_t factor = s % stripes;
-PFLA("seg->segtype->flags=%X lv_flags=%lX", seg->segtype->flags, lv_flags);
- /* Allocate the additional meta and data lvs requested */
- if (!_alloc_image_components(lv, 1, pvs, count, &meta_lvs, &data_lvs))
- return_0;
-PFL();
- /*
- * If linear, we must correct data LV names. They are off-by-one
- * because the linear volume hasn't taken its proper name of "_rimage_0"
- * yet. This action must be done before '_clear_lvs' because it
- * commits the LVM metadata before clearing the LVs.
- */
- if (linear) {
-PFL();
- if (!_correct_data_lv_names(lv, count, &data_lvs))
- return 0;
- if (!_convert_linear_to_raid(lv))
- return 0;
+ if (!factor)
+ ss++;
- /* Need access to the new first segment after the linear -> raid1 conversion */
- seg = first_seg(lv);
+ idx[s] = ss + factor * seg->data_copies;
+ }
- /* If we convert to raid1 via "lvconvert -mN", set segtype */
- if (old_count != new_count &&
- segtype == seg->segtype &&
- !(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1)))
- return 0;
+ break;
- seg->segtype = segtype;
- }
+ case reorder_from_raid10_near:
+ /*
+ * Order depending on mirror leg selection (TBD)
+ *
+ * raid10 (012345) with 3 stripes/2 data copies -> raid0 (024135/135024)
+ * raid10 (012345678) with 3 stripes/3 data copies -> raid0 (036147258/147036258/...)
+ */
+ /* idx[from] = to */
PFL();
- /* Metadata LVs must be cleared before being added to the array */
- log_debug_metadata("Clearing newly allocated metadata LVs");
- if (!_clear_lvs(&meta_lvs))
- goto fail;
+ for (s = 0; s < seg->area_count; s++)
+ idx[s] = -1; /* = unused */
+ idx1 = 0;
+ idx2 = stripes;
+ for (str = 0; str < stripes; str++) {
PFL();
- /*
- * FIXME: It would be proper to activate the new LVs here, instead of having
- * them activated by the suspend. However, this causes residual device nodes
- * to be left for these sub-lvs.
- */
-
- /* Grow areas arrays for metadata and data devs */
- log_debug_metadata("Reallocating areas arrays");
- if (!_realloc_meta_and_data_seg_areas(lv, seg, new_count)) {
- log_error("Relocation of areas arrays failed.");
- return 0;
- }
-
- seg->area_count = new_count;
+ idx1_sav = idx1;
+ for (dc = 0; dc < seg->data_copies; dc++) {
+ struct logical_volume *slv;
PFL();
- /*
- * Set segment areas for metadata sub_lvs adding
- * an extra meta area when converting from linear
- */
- log_debug_metadata("Adding new metadata LVs");
- if (!_add_image_component_list(seg, 0, 0, &meta_lvs, add_all_rmeta ? 0 : old_count)) {
- seg->area_count = old_count;
- goto fail;
- }
-
- /* Set segment areas for data sub_lvs */
- log_debug_metadata("Adding new data LVs");
- if (!_add_image_component_list(seg, 0, lv_flags, &data_lvs, old_count)) {
- if (!_remove_image_component_list(seg, RAID_META, add_all_rmeta ? 0 : old_count, &meta_lvs))
- return 0;
-
- seg->area_count = old_count;
- goto fail;
- }
+ s = str * seg->data_copies + dc;
+ slv = seg_lv(seg, s);
+ idx[s] = ((slv->status & PARTIAL_LV) || idx1 != idx1_sav) ? idx2++ : idx1++;
+ }
- /* Reorder the areas in case this is a raid0 -> raid10 conversion */
- if (seg_is_any_raid0(seg) && segtype_is_raid10(segtype)) {
- log_debug_metadata("Redordering areas for raid0 -> raid10 takeover");
- _lv_raid10_reorder_seg_areas(lv, 1);
- }
+ if (idx1 == idx1_sav) {
+ log_error("Failed to find a valid mirror in stripe %u!", str);
+ return 0;
+ }
+ }
- /*
- * Conversion from linear to raid1 -> set rebuild flags properly
- *
- * We might as well clear all flags and the raid set
- * will be resnchronized properly because it is new,
- * but this shows proper status chars.
- */
- if (linear || (seg_is_any_raid0(seg) && old_count == 1)) {
- seg_lv(seg, 0)->status &= ~LV_REBUILD;
-
- for (s = old_count; s < new_count; s++)
- seg_lv(seg, s)->status |= LV_REBUILD;
+ break;
- } else if (reshape_disks) {
- uint32_t plus_extents = count * (lv->le_count / _data_rimages_count(seg, old_count));
-PFL();
- /*
- * Reshape adding image component pairs:
- *
- * - reset rebuild flag on new image LVs
- * - set delta disks plus flag on new image LVs
- */
- for (s = old_count; s < new_count; s++) {
-PFL();
- seg_lv(seg, s)->status &= ~LV_REBUILD;
- seg_lv(seg, s)->status |= LV_RESHAPE_DELTA_DISKS_PLUS;
- }
-PFL();
- /* Reshape adding image component pairs -> change sizes accordingly */
-PFLA("lv->le_count=%u data_rimages=%u plus_extents=%u", lv->le_count, _data_rimages_count(seg, old_count), plus_extents);
- lv->le_count += plus_extents;
- lv->size = lv->le_count * lv->vg->extent_size;
- seg->len += plus_extents;
- seg->area_len += plus_extents;
- seg->reshape_len = seg->reshape_len / _data_rimages_count(seg, old_count) *
- _data_rimages_count(seg, new_count);
- if (old_count == 2 && !seg->stripe_size)
- seg->stripe_size = DEFAULT_STRIPESIZE;
-PFLA("lv->le_count=%u", lv->le_count);
+ default:
+ return 0;
}
+PFL()
+for (s = 0; s < seg->area_count ; s++)
+PFLA("idx[%u]=%d", s, idx[s]);
- seg->segtype = segtype;
+ /* Sort areas */
+ do {
+ xchg = seg->area_count;
-PFL();
- if (!lv_update_and_reload_origin(lv)) {
- if (!_remove_image_component_list(seg, RAID_META, add_all_rmeta ? 0 : old_count, &meta_lvs) ||
- !_remove_image_component_list(seg, RAID_IMAGE, old_count, &data_lvs))
- return 0;
+ for (s = 0; s < seg->area_count ; s++)
+ if (idx[s] == s)
+ xchg--;
- goto fail;
- }
+ else {
+ if (!_swap_areas(seg->areas + s, seg->areas + idx[s]) ||
+ !_swap_areas(seg->meta_areas + s, seg->meta_areas + idx[s]))
+ return 0;
-PFL();
- /*
- * Now that the 'REBUILD' or 'RESHAPE_DELTA_DISKS' has/have made its/their
- * way to the kernel, we must remove the flag(s) so that the individual
- * devices are not rebuilt/reshaped upon every activation.
- */
- if (!_reset_flags_passed_to_kernel(lv, &flag_cleared))
- return 0;
-PFL();
- return flag_cleared ? lv_update_and_reload_origin(lv) : 1;
+ ss = idx[idx[s]];
+ idx[idx[s]] = idx[s];
+ idx[s] = ss;
+ }
+ i++;
+ } while (xchg);
-fail:
-PFL();
- /* Cleanly remove newly-allocated LVs that failed insertion attempt */
- if (!_eliminate_extracted_lvs(lv->vg, &meta_lvs) ||
- !_eliminate_extracted_lvs(lv->vg, &data_lvs))
- return_0;
+for (s = 0; s < seg->area_count ; s++)
+PFLA("s=%u idx[s]=%u", s, idx[s]);
+PFLA("%d iterations", i);
+for (s = 0; s < seg->area_count; s++)
+PFLA("seg_lv(seg, %u)->name=%s", s, seg_lv(seg, s)->name);
- return 0;
+ return 1;
}
/* Write vg of @lv, suspend @lv and commit the vg */
static int _vg_write_lv_suspend_vg_commit(struct logical_volume *lv)
{
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(first_seg(lv), "raid segment");
+
if (!vg_write(lv->vg)) {
log_error("Failed to write changes to %s in %s",
lv->name, lv->vg->name);
@@ -1842,8 +3072,8 @@ static int _vg_write_lv_suspend_vg_commit(struct logical_volume *lv)
}
if (!suspend_lv(lv->vg->cmd, lv)) {
- log_error("Failed to suspend %s/%s before committing changes",
- lv->vg->name, lv->name);
+ log_error("Failed to suspend %s before committing changes",
+ display_lvname(lv));
vg_revert(lv->vg);
return 0;
}
@@ -1857,261 +3087,49 @@ static int _vg_write_lv_suspend_vg_commit(struct logical_volume *lv)
return 1;
}
+/****************************************************************************/
/*
- * _raid_extract_images
- * @lv
- * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
- * @target_pvs: The list of PVs that are candidates for removal
- * @shift: If set, use _shift_image_components().
- * Otherwise, leave the [meta_]areas as AREA_UNASSIGNED and
- * seg->area_count unchanged.
- * @extracted_[meta|data]_lvs: The LVs removed from the array. If 'shift'
- * is set, then there will likely be name conflicts.
+ * HM
*
- * This function extracts _both_ portions of the indexed image. It
- * does /not/ commit the results. (IOW, erroring-out requires no unwinding
- * of operations.)
+ * Add/remove metadata areas to/from raid0
*
- * Returns: 1 on success, 0 on failure
+ * Update metadata and reload mappings if @update_and_reload
*/
-static int _raid_extract_images(struct logical_volume *lv, uint32_t new_count,
- struct dm_list *target_pvs, int shift,
- struct dm_list *extracted_meta_lvs,
- struct dm_list *extracted_data_lvs)
+static int _alloc_and_add_rmeta_devs_for_lv(struct logical_volume *lv, struct dm_list *allocate_pvs)
{
- int ss;
- unsigned s, extract, i, lvl_idx = 0;
- struct lv_list *lvl_array;
- struct lv_segment *seg = first_seg(lv);
- struct segment_type *error_segtype;
-
- extract = seg->area_count - new_count;
- log_verbose("Extracting %u image%s from %s/%s", extract,
- (extract > 1) ? "s" : "", lv->vg->name, lv->name);
+ struct lv_segment *seg;
+ struct dm_list meta_lvs;
+struct lv_list *lvl;
- if ((s = dm_list_size(target_pvs)) < extract) {
- log_error("Unable to remove %d images: Only %d device%s given.",
- extract, s, (s == 1) ? "" : "s");
- return 0;
- }
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
- if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * extract * sizeof(*lvl_array))))
- return_0;
+ dm_list_init(&meta_lvs);
- if (!(error_segtype = get_segtype_from_string(lv->vg->cmd, "error")))
+ log_debug_metadata("Allocating metadata LVs for %s", display_lvname(lv));
+ if (!_alloc_rmeta_devs_for_lv(lv, &meta_lvs, allocate_pvs)) {
+ log_error("Failed to allocate metadata LVs for %s", display_lvname(lv));
return_0;
-
- /*
- * We make two passes over the devices.
- * - The first pass we look for error LVs
- * - The second pass we look for PVs that match target_pvs
- */
- for (ss = (seg->area_count * 2) - 1; (ss >= 0) && extract; ss--) {
- s = ss % seg->area_count;
-
- if (ss / seg->area_count) {
- /* Conditions for first pass */
- if ((first_seg(seg_lv(seg, s))->segtype != error_segtype) &&
- (first_seg(seg_metalv(seg, s))->segtype != error_segtype))
- continue;
-
- if (!dm_list_empty(target_pvs) &&
- (target_pvs != &lv->vg->pvs)) {
- /*
- * User has supplied a list of PVs, but we
- * cannot honor that list because error LVs
- * must come first.
- */
- log_error("%s has components with error targets"
- " that must be removed first: %s.",
- display_lvname(lv), display_lvname(seg_lv(seg, s)));
-
- log_error("Try removing the PV list and rerun"
- " the command.");
- return 0;
- }
-
- log_debug("LVs with error segments to be removed: %s %s",
- display_lvname(seg_metalv(seg, s)),
- display_lvname(seg_lv(seg, s)));
-
- } else {
- /* Conditions for second pass */
- if (!target_pvs ||
- !lv_is_on_pvs(seg_lv(seg, s), target_pvs) ||
- !lv_is_on_pvs(seg_metalv(seg, s), target_pvs))
- continue;
-
- if (!_raid_in_sync(lv) &&
- (!seg_is_mirrored(seg) || !s)) {
- log_error("Unable to extract %sRAID image"
- " while RAID array is not in-sync",
- seg_is_mirrored(seg) ? "primary " : "");
- return 0;
- }
- }
-
-PFLA("seg_lv(seg, %u)=%s", s, seg_lv(seg,s)->name);
- if (!_extract_image_component_pair(seg, s, lvl_array + lvl_idx)) {
- log_error("Failed to extract %s from %s", seg_lv(seg, s)->name, lv->name);
- return 0;
- }
-
- lvl_idx += 2;
- extract--;
}
- if (shift && !_shift_image_components(seg)) {
- log_error("Failed to shift and rename image components");
- return 0;
- }
-
- for (i = 0; i < lvl_idx; i += 2) {
- dm_list_add(extracted_meta_lvs, &lvl_array[i].list);
- dm_list_add(extracted_data_lvs, &lvl_array[i + 1].list);
+dm_list_iterate_items(lvl, &meta_lvs)
+PFLA("meta_lv=%s", lvl->lv->name);
+ /* Metadata LVs must be cleared before being added to the array */
+ log_debug_metadata("Clearing newly allocated metadata LVs for %s", display_lvname(lv));
+ if (!_clear_lvs(&meta_lvs)) {
+ log_error("Failed to initialize metadata LVs for %s", display_lvname(lv));
+ return_0;
}
- if (extract) {
- log_error("Unable to extract enough images to satisfy request");
- return 0;
+ /* Set segment areas for metadata sub_lvs */
+ log_debug_metadata("Adding newly allocated metadata LVs to %s", display_lvname(lv));
+ if (!_add_image_component_list(seg, 1, 0, &meta_lvs, 0)) {
+ log_error("Failed to add newly allocated metadata LVs to %s", display_lvname(lv));
+ return_0;
}
return 1;
}
-/* Remove image component pairs from @lv defined by @new_count (< old_count) */
-static int _raid_remove_images(struct logical_volume *lv,
- const struct segment_type *segtype,
- uint32_t new_count, struct dm_list *pvs)
-{
- struct lv_segment *seg = first_seg(lv);
- int reshape_disks = (seg_is_striped_raid(seg) && segtype && is_same_level(seg->segtype, segtype));
- unsigned old_count = seg->area_count;
- struct dm_list removal_list;
- struct lv_list *lvl;
-
- /* HM FIXME: TESTME: allow to remove out-of-sync dedicated parity/Q syndrome devices */
- if (seg_is_striped_raid(seg) &&
- (lv->status & LV_NOTSYNCED) &&
- !((seg_is_raid5_n(seg) || seg_is_raid6_n_6(seg)) &&
- old_count - new_count == 1)) {
- log_error("Can't remove image(s) from out-of-sync striped RAID LV:"
- " use 'lvchange --resync' first.");
- return 0;
- }
-
-PFLA("seg->segtype=%s segtype=%s new_count=%u", seg->segtype->name, segtype->name, new_count);
- dm_list_init(&removal_list);
-
- /* If we convert away from raid4/5/6/10 -> remove any reshape space */
- if (!(segtype_is_raid10(segtype) ||
- segtype_is_raid4(segtype) ||
- segtype_is_any_raid5(segtype) ||
- segtype_is_any_raid6(segtype)) &&
- !_lv_free_reshape_space(lv)) {
- log_error(INTERNAL_ERROR "Failed to remove reshape space from %s/%s",
- lv->vg->name, lv->name);
- return 0;
- }
-PFL();
- /* Reorder the areas in case this is a raid10 -> raid0 conversion */
- if (seg_is_raid10(seg) && segtype_is_any_raid0(segtype)) {
- log_debug_metadata("Reordering areas for raid0 -> raid10 takeover");
- _lv_raid10_reorder_seg_areas(lv, 0);
- }
-PFL();
-
- /* Extract all image and any metadata lvs past new_count */
- if (!_raid_extract_images(lv, new_count, pvs, 1,
- &removal_list, &removal_list)) {
- log_error("Failed to extract images from %s/%s",
- lv->vg->name, lv->name);
- return 0;
- }
-PFL();
- /* Shrink areas arrays for metadata and data devs after the extration */
- if (!_realloc_meta_and_data_seg_areas(lv, seg, new_count)) {
- log_error("Relocation of areas arrays failed.");
- return 0;
- }
-
- /* Set before any optional removal of metadata devs following immediately */
- seg->area_count = new_count;
-
- /*
- * In case this is a conversion to raid0 (i.e. no metadata devs),
- * remove the metadata image LVs.
- */
- if (segtype_is_raid0(segtype) &&
- seg->meta_areas &&
- !_remove_image_component_list(seg, RAID_META, 0, &removal_list))
- return 0;
-PFL();
-
- /* raid0* does not have a bitmap -> no region size */
- if (segtype_is_any_raid0(segtype))
- seg->region_size = 0;
-
-PFL();
- /* Reshape adding image component pairs -> change sizes accordingly */
- if (reshape_disks) {
- uint32_t minus_extents = (old_count - new_count) * (lv->le_count / _data_rimages_count(seg, old_count));
-
-PFLA("lv->le_count=%u data_rimages=%u minus_extents=%u", lv->le_count, _data_rimages_count(seg, old_count), minus_extents);
- lv->le_count -= minus_extents;
- lv->size = lv->le_count * lv->vg->extent_size;
- seg->len -= minus_extents;
- seg->area_len -= minus_extents;
- seg->reshape_len = seg->reshape_len / _data_rimages_count(seg, old_count) *
- _data_rimages_count(seg, new_count);
-PFLA("lv->le_count=%u", lv->le_count);
- }
-
- /* Convert to linear? */
- if (segtype_is_linear(segtype)) { /* new_count == 1 */
- if (!(seg->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1)))
- return_0;
-
- if (!_raid_remove_top_layer(lv, &removal_list)) {
- log_error("Failed to remove RAID layer"
- " after linear conversion");
- return 0;
- }
-
- lv->status &= ~(LV_NOTSYNCED | LV_WRITEMOSTLY);
- seg->stripe_size = 0;
- seg->writebehind = 0;
- }
-
-PFL();
- seg->segtype = segtype;
- if (!_vg_write_lv_suspend_vg_commit(lv))
- return 0;
-
- /*
- * We activate the extracted sub-LVs first so they are
- * renamed and won't conflict with the remaining sub-LVs.
- */
-PFL();
- dm_list_iterate_items(lvl, &removal_list) {
- if (!activate_lv_excl_local(lv->vg->cmd, lvl->lv)) {
- log_error("Failed to resume extracted LVs");
- return 0;
- }
- }
-
-PFL();
- if (!resume_lv(lv->vg->cmd, lv)) {
- log_error("Failed to resume %s/%s after committing changes",
- lv->vg->name, lv->name);
- return 0;
- }
-
-PFL();
- /* Eliminate the residual LVs, write VG, commit it and take a backup */
- return _eliminate_extracted_lvs(lv->vg, &removal_list);
-}
-
/*
* HM
*
@@ -2119,148 +3137,125 @@ PFL();
*
* Update metadata and reload mappings if @update_and_reload
*/
-static int _raid0_add_or_remove_metadata_lvs(struct logical_volume *lv, int update_and_reload)
+static int _raid0_add_or_remove_metadata_lvs(struct logical_volume *lv,
+ int update_and_reload,
+ struct dm_list *allocate_pvs,
+ struct dm_list *removal_lvs)
{
- const char *raid0_type_name;
- struct lv_segment *seg = first_seg(lv);
- struct dm_list removal_mlvs;
+ uint64_t raid_type_flag;
+ struct lv_segment *seg;
- dm_list_init(&removal_mlvs);
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
if (seg->meta_areas) {
+PFL();
+ RETURN_IF_ZERO(removal_lvs, "removal LVs list argument");
log_debug_metadata("Extracting metadata LVs");
- if (!_remove_image_component_list(seg, RAID_META, 0, &removal_mlvs))
+ if (!_extract_image_component_list(seg, RAID_META, 0, removal_lvs)) {
+ log_error(INTERNAL_ERROR "Failed to extract metadata LVs");
return 0;
+ }
PFL();
+ raid_type_flag = SEG_RAID0;
seg->meta_areas = NULL;
- raid0_type_name = SEG_TYPE_NAME_RAID0;
-
-PFL();
} else {
- struct dm_list meta_lvs;
-
- dm_list_init(&meta_lvs);
-
- if (!(seg->meta_areas = dm_pool_zalloc(lv->vg->vgmem,
- seg->area_count * sizeof(*seg->meta_areas))))
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
return 0;
- if (!_alloc_rmeta_devs_for_lv(lv, &meta_lvs))
- return 0;
-
- /* Metadata LVs must be cleared before being added to the array */
- log_debug_metadata("Clearing newly allocated metadata LVs");
- if (!_clear_lvs(&meta_lvs)) {
- log_error("Failed to initialize metadata LVs");
- return 0;
- }
-
- /* Set segment areas for metadata sub_lvs */
- if (!_add_image_component_list(seg, 1, 0, &meta_lvs, 0))
- return 0;
-
- raid0_type_name = SEG_TYPE_NAME_RAID0_META;
+ raid_type_flag = SEG_RAID0_META;
}
-PFL();
- if (!(seg->segtype = get_segtype_from_string(lv->vg->cmd, raid0_type_name)))
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, raid_type_flag)))
return_0;
PFL();
- if (update_and_reload &&
- !lv_update_and_reload_origin(lv))
- return_0;
+ if (update_and_reload) {
+ if (!lv_update_and_reload_origin(lv))
+ return_0;
+
+ /* If any residual LVs, eliminate them, write VG, commit it and take a backup */
+ return _eliminate_extracted_lvs(lv->vg, removal_lvs);
+ }
PFL();
- /* If any residual LVs, eliminate them, write VG, commit it and take a backup */
- return dm_list_empty(&removal_mlvs) ? 1 : _eliminate_extracted_lvs(lv->vg, &removal_mlvs);
+ return 1;
}
-/*
- * lv_raid_change_image_count
- * @lv
- * @new_count: The absolute count of images (e.g. '2' for a 2-way mirror)
- * @pvs: The list of PVs that are candidates for removal (or empty list)
- *
- * RAID arrays have 'images' which are composed of two parts, they are:
- * - 'rimage': The data/parity holding portion
- * - 'rmeta' : The metadata holding portion (i.e. superblock/bitmap area)
- * This function adds or removes _both_ portions of the image and commits
- * the results.
- *
- * Returns: 1 on success, 0 on failure
- */
-static int _lv_raid_change_image_count(struct logical_volume *lv,
- const struct segment_type *segtype,
- uint32_t new_count, struct dm_list *pvs)
+/* Set segment area data image LVs from @data_lvs with @status in @lv and give them proper names */
+static int _set_lv_areas_from_data_lvs_and_create_names(struct logical_volume *lv,
+ struct dm_list *data_lvs,
+ uint64_t status)
{
- uint32_t old_count = lv_raid_image_count(lv);
+ uint32_t s = 0;
+ char **name;
+ const char *suffix = (status & RAID_IMAGE) ? "rimage_" : "rmeta_";
+ struct lv_list *lvl, *tlvl;
+ struct lv_segment *seg;
- if (old_count == new_count) {
- log_warn("%s/%s already has image count of %d.",
- lv->vg->name, lv->name, new_count);
- return 1;
- }
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(data_lvs, "data LVs list argument");
- segtype = segtype ?: first_seg(lv)->segtype;
-PFLA("segtype=%s old_count=%u new_count=%u", segtype->name, old_count, new_count);
+ dm_list_iterate_items_safe(lvl, tlvl, data_lvs) {
+PFLA("lv=%s", display_lvname(lvl->lv));
+ dm_list_del(&lvl->list);
+ lv_set_hidden(lvl->lv);
- /* Check for maximum supported raid devices */
- if (!_check_maximum_devices(new_count))
- return 0;
+ if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0, status | RAID_IMAGE))
+ return_0;
- /*
- * LV must be either in-active or exclusively active
- */
- if (lv_is_active(lv) &&
- vg_is_clustered(lv->vg) &&
- !lv_is_active_exclusive_locally(lv)) {
- log_error("%s/%s must be active exclusive locally to"
- " perform this operation.", lv->vg->name, lv->name);
- return 0;
+ name = (status & RAID_IMAGE) ? (char **) &seg_lv(seg, s)->name :
+ (char **) &seg_metalv(seg, s)->name;
+ if (!(*name = _generate_raid_name(lv, suffix, s++))) {
+ log_error("Failed to allocate new data image LV name for %s", display_lvname(lv));
+ return 0;
+ }
}
- return (old_count > new_count ? _raid_remove_images : _raid_add_images)(lv, segtype, new_count, pvs);
-}
-
-int lv_raid_change_image_count(struct logical_volume *lv,
- uint32_t new_count, struct dm_list *pvs)
-{
- return _lv_raid_change_image_count(lv, NULL, new_count, pvs);
+ return 1;
}
-int lv_raid_split(struct logical_volume *lv, const char *split_name,
- uint32_t new_count, struct dm_list *splittable_pvs)
+/*
+ * HM API function:
+ *
+ * split off raid1 images of @lv, prefix with @split_name or selecet duplicated LV by @split_name,
+ * leave @new_image_count in the raid1 set and find them on @splittable_pvs
+ *
+ * HM FIXME: enhance to split off selectable sub LV (for raid01)
+ */
+static int _raid_split_duplicate(struct logical_volume *lv, int yes,
+ const char *split_name, uint32_t new_image_count);
+int lv_raid_split(struct logical_volume *lv, int yes,
+ const char *split_name, uint32_t new_image_count,
+ struct dm_list *splittable_pvs)
{
+ uint32_t split_count, s;
struct lv_list *lvl;
- struct dm_list removal_list, data_list;
- struct cmd_context *cmd = lv->vg->cmd;
- struct logical_volume *tracking;
+ struct dm_list meta_lvs, data_lvs;
+ struct cmd_context *cmd;
+ struct logical_volume *tracking, *split_lv = NULL;
+ struct lv_segment *seg;
struct dm_list tracking_pvs;
- dm_list_init(&removal_list);
- dm_list_init(&data_list);
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(!seg_is_mirrored(seg) && !seg_is_raid01(seg),
+ "mirrored/raid10 segment to split off");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, new_image_count);
+ RETURN_IF_ZERO(new_image_count, "images left, rejecting request");
+ RETURN_IF_ZERO(split_name, "split name argument");
+ cmd = lv->vg->cmd;
- if (!new_count) {
- log_error("Unable to split all images from %s/%s",
- lv->vg->name, lv->name);
- return 0;
- }
+ dm_list_init(&meta_lvs);
+ dm_list_init(&data_lvs);
- if (!seg_is_mirrored(first_seg(lv)) ||
- segtype_is_raid10(first_seg(lv)->segtype)) {
- log_error("Unable to split logical volume of segment type, %s",
- lvseg_name(first_seg(lv)));
+ if (!_lv_is_active((lv)))
return 0;
- }
- if (vg_is_clustered(lv->vg) && !lv_is_active_exclusive_locally(lv)) {
- log_error("%s/%s must be active exclusive locally to"
- " perform this operation.", lv->vg->name, lv->name);
- return 0;
- }
+ /* Special case for splitting off an image of a duplicating LV */
+ if (_lv_is_duplicating(lv))
+ return _raid_split_duplicate(lv, yes, split_name, new_image_count);
+ /* raid1 leg split from here... */
if (find_lv_in_vg(lv->vg, split_name)) {
log_error("Logical Volume \"%s\" already exists in %s",
split_name, lv->vg->name);
@@ -2268,8 +3263,8 @@ int lv_raid_split(struct logical_volume *lv, const char *split_name,
}
if (!_raid_in_sync(lv)) {
- log_error("Unable to split %s/%s while it is not in-sync.",
- lv->vg->name, lv->name);
+ log_error("Unable to split %s while it is not in-sync.",
+ display_lvname(lv));
return 0;
}
@@ -2277,8 +3272,9 @@ int lv_raid_split(struct logical_volume *lv, const char *split_name,
* We only allow a split while there is tracking if it is to
* complete the split of the tracking sub-LV
*/
- if (_lv_is_raid_with_tracking(lv, &tracking)) {
- if (!lv_is_on_pvs(tracking, splittable_pvs)) {
+ log_debug_metadata("Check if LV %s is tracking changes", display_lvname(lv));
+ if (_lv_is_raid_with_tracking(lv, &s)) {
+ if (!lv_is_on_pvs((tracking = seg_lv(seg, s)), splittable_pvs)) {
log_error("Unable to split additional image from %s "
"while tracking changes for %s",
lv->name, tracking->name);
@@ -2293,47 +3289,103 @@ int lv_raid_split(struct logical_volume *lv, const char *split_name,
return_0;
}
- if (!_raid_extract_images(lv, new_count, splittable_pvs, 1,
- &removal_list, &data_list)) {
- log_error("Failed to extract images from %s/%s",
- lv->vg->name, lv->name);
- return 0;
- }
+ split_count = seg->area_count - new_image_count;
-
- /* Convert to linear? */
- if (new_count == 1 && !_raid_remove_top_layer(lv, &removal_list)) {
- log_error("Failed to remove RAID layer after linear conversion");
+ log_debug_metadata("Extracting image components from LV %s", display_lvname(lv));
+ if (!_raid_extract_images(lv, new_image_count, splittable_pvs, 0 /* Don't shift */,
+ &meta_lvs, &data_lvs)) {
+ log_error("Failed to extract images from %s",
+ display_lvname(lv));
return 0;
}
- /* Rename all extracted rimages */
- dm_list_iterate_items(lvl, &data_list)
+ /* Rename all extracted rimages with @split_name prefix */
+ log_debug_metadata("Rename all extracted LVs of LV %s to split name", display_lvname(lv));
+ dm_list_iterate_items(lvl, &data_lvs)
if (!_lv_name_add_string_index(cmd, &lvl->lv->name, split_name))
return 0;
- if (!_vg_write_lv_suspend_vg_commit(lv))
- return 0;
+ /* Split off multiple images as a new raid1 LV */
+ if (split_count > 1) {
+ uint64_t status = RAID | LVM_READ | LVM_WRITE;
+ struct lv_segment *raid1_seg;
- dm_list_iterate_items(lvl, &data_list)
- if (!activate_lv_excl_local(cmd, lvl->lv))
+ log_print_unless_silent("Splitting off %u images into new raid1 LV %s/%s",
+ split_count, lv->vg->name, split_name);
+
+ /* Create empty LV with @split_name to add segment and images */
+ log_debug_metadata("Creating new raid1 LV");
+ if (!(split_lv = lv_create_empty(split_name, NULL, status | VISIBLE_LV, ALLOC_INHERIT, lv->vg))) {
+ log_error("Failed to create new raid1 LV %s/%s.", lv->vg->name, split_name);
return_0;
+ }
- dm_list_iterate_items(lvl, &removal_list)
- if (!activate_lv_excl_local(cmd, lvl->lv))
+ /* Create the one top-level segment for our new raid1 split LV and add it to the LV */
+ log_debug_metadata("Creating new raid1 segment for slit off image component pairs of %s",
+ display_lvname(lv));
+ if (!(raid1_seg = alloc_lv_segment(seg->segtype, split_lv, 0, seg->len, 0, status,
+ seg->stripe_size, NULL,
+ split_count, seg->area_len,
+ split_count, 0, seg->region_size, 0, NULL))) {
+ log_error("Failed to create raid1 segment for %s", display_lvname(split_lv));
return_0;
+ }
+ dm_list_add(&split_lv->segments, &raid1_seg->list);
+ raid1_seg->data_copies = split_count;
- if (!resume_lv(lv->vg->cmd, lv_lock_holder(lv))) {
- log_error("Failed to resume %s/%s after committing changes",
- lv->vg->name, lv->name);
+ /* Set new raid1 segment area data and metadata image LVs and give them proper names */
+ log_debug_metadata("setting areas of new raid1 segment");
+ if(!_set_lv_areas_from_data_lvs_and_create_names(split_lv, &data_lvs, RAID_IMAGE) ||
+ !_set_lv_areas_from_data_lvs_and_create_names(split_lv, &meta_lvs, RAID_META))
+ return 0;
+
+ split_lv->le_count = seg->len;
+ split_lv->size = seg->len * lv->vg->extent_size;
+ }
+
+ /* Adjust numbers of raid1 areas and data copies (i.e. sub LVs) */
+ seg->area_count = seg->data_copies = new_image_count;
+
+ /* Convert to linear? */
+ if (new_image_count == 1) {
+ log_debug_metadata("Converting LV %s with one image to linear", display_lvname(lv));
+ if (!_convert_raid_to_linear(lv, &meta_lvs)) {
+ log_error("Failed to remove RAID layer after linear conversion");
+ return 0;
+ }
+ }
+
+ log_debug_metadata("Supending LV %s and commiting metadata", display_lvname(lv));
+ if (!_vg_write_lv_suspend_vg_commit(lv))
+ return 0;
+
+ log_debug_metadata("Activating split off LVs");
+ if (!_activate_lv_list_excl_local(&data_lvs) ||
+ !_activate_lv_list_excl_local(&meta_lvs))
+ return_0;
+
+ log_debug_metadata("Resuming LV %s after split", display_lvname(lv));
+ if (!resume_lv(cmd, lv_lock_holder(lv))) {
+ log_error("Failed to resume %s after committing changes",
+ display_lvname(lv));
return 0;
}
- return _eliminate_extracted_lvs(lv->vg, &removal_list);
-}
+ log_debug_metadata("Removing extracted metadata LVs of LV %s", display_lvname(lv));
+ if (!_eliminate_extracted_lvs(lv->vg, &meta_lvs))
+ return 0;
+
+ if (split_lv && !activate_lv_excl_local(cmd, split_lv))
+ return 0;
+ log_print_unless_silent("LV %s split off successfully from %s",
+ display_lvname(split_lv ?: dm_list_item(data_lvs.n, struct lv_list)->lv), display_lvname(lv));
+ return 1;
+}
/*
+ * API function:
+ *
* lv_raid_split_and_track
* @lv
* @splittable_pvs
@@ -2347,19 +3399,23 @@ int lv_raid_split(struct logical_volume *lv, const char *split_name,
* Returns: 1 on success, 0 on error
*/
int lv_raid_split_and_track(struct logical_volume *lv,
+ int yes,
+ const char *sub_lv_name,
struct dm_list *splittable_pvs)
{
int s;
- struct lv_segment *seg = first_seg(lv);
+ struct logical_volume *split_lv;
+ struct lv_segment *seg;
+ struct volume_group *vg;
- if (!seg_is_mirrored(seg)) {
- log_error("Unable to split images from non-mirrored RAID");
- return 0;
- }
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(!seg_is_mirrored(seg) && !seg_is_raid01(seg),
+ "mirrored/raid10 segment to split off");
+ vg = lv->vg;
if (!_raid_in_sync(lv)) {
- log_error("Unable to split image from %s/%s while not in-sync",
- lv->vg->name, lv->name);
+ log_error("Unable to split image from %s while not in-sync",
+ display_lvname(lv));
return 0;
}
@@ -2369,38 +3425,143 @@ int lv_raid_split_and_track(struct logical_volume *lv,
return 0;
}
- for (s = seg->area_count - 1; s >= 0; --s) {
- if (!lv_is_on_pvs(seg_lv(seg, s), splittable_pvs))
+ /* Find sub LV by name if given, else select last raid1 leg */
+ for (s = seg->area_count - 1; s >= 0; s--) {
+ if (sub_lv_name &&
+ !strstr(sub_lv_name, seg_lv(seg, s)->name))
continue;
- lv_set_visible(seg_lv(seg, s));
- seg_lv(seg, s)->status &= ~LVM_WRITE;
- break;
+
+ if (lv_is_on_pvs(seg_lv(seg, s), splittable_pvs)) {
+ split_lv = seg_lv(seg, s);
+ split_lv->status &= ~LVM_WRITE;
+ lv_set_visible(split_lv);
+ break;
+ }
}
if (s < 0) {
- log_error("Unable to find image to satisfy request");
+ log_error("No image found to satisfy request");
return 0;
}
- if (!lv_update_and_reload(lv))
+ if (!yes && yes_no_prompt("Do you really want to split off tracking LV %s from %s [y/n]: ",
+ display_lvname(split_lv), display_lvname(lv)) == 'n')
+ return 0;
+
+ if (sigint_caught())
return_0;
+
+ /*
+ * Check restrictions to keep resilience with just 2 raid1 legs
+ *
+ * A 2-legged raid1 flat mirror will loose all resilience if we allow one leg to
+ * be tracked, because only one remaining leg will receive any writes.
+ *
+ * A duplicating LV (i.e. raid1 top-level with variations of layouts as 2 sub LVs)
+ * _may_ still keep resilience, presumably the remaining leg is raid1/4/5/10, because
+ * data redundancy is being ensured within the reaming raid sub LV.
+ */
+ if (seg->area_count < 3) {
+ int duplicating = _lv_is_duplicating(lv), redundant = 0;
+
+ if (duplicating) {
+ struct lv_segment *seg1;
+
+ RETURN_IF_LV_SEG_ZERO(seg_lv(seg, !s), (seg1 = first_seg(seg_lv(seg, !s))));
+
+ /* Allow for 2-legged tracking, presumably the duplicated LV left is resilient */
+ if (!_lv_is_degraded(lv) &&
+ seg_is_raid(seg1) &&
+ !seg_is_any_raid0(seg1))
+ redundant = 1;
+ else
+ log_error("Split would cause complete loss of redundancy with "
+ "one %s%s duplicating leg %s",
+ _lv_is_degraded(lv) ? "degraded " : "",
+ lvseg_name(seg1), display_lvname(seg_lv(seg, !s)));
+
+ } else
+ log_error("Tracking an image in 2-way raid1 LV %s will cause loss of redundancy!",
+ display_lvname(lv));
+
+ if (!redundant) {
+ log_error("Run \"lvconvert %s %s\" to have 3 legs before splitting of %s and redo",
+ duplicating ? "--dup ..." : "-m2",
+ display_lvname(lv), display_lvname(split_lv));
+ return 0;
+ }
+ }
+
+ if (_lv_is_degraded(lv)) {
+ log_error("Splitting off degraded LV %s rejected to limit danger of data loss; repair first",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!vg_write(vg) || !vg_commit(vg) || !backup(vg))
+ return_0;
+
+ /* Suspend+resume the tracking LV to create its devnode */
+ if (!suspend_lv(vg->cmd, split_lv) || !resume_lv(vg->cmd, split_lv)) {
+ log_error("Failed to suspend+resume %s after committing changes",
+ display_lvname(split_lv));
+ return 0;
+ }
+
log_print_unless_silent("%s split from %s for read-only purposes.",
- seg_lv(seg, s)->name, lv->name);
+ split_lv->name, lv->name);
+ log_print_unless_silent("Use 'lvconvert --merge %s' to merge back into %s",
+ display_lvname(split_lv), display_lvname(lv));
+ return 1;
+}
- /* Activate the split (and tracking) LV */
- if (!_activate_sublv_preserving_excl(lv, seg_lv(seg, s)))
+/* HM Helper: */
+static int _lv_update_and_reload(struct logical_volume *lv, int recurse)
+{
+ uint32_t s;
+ struct logical_volume *lv1;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (!recurse && !_vg_write_lv_suspend_vg_commit(lv))
return_0;
- log_print_unless_silent("Use 'lvconvert --merge %s/%s' to merge back into %s",
- lv->vg->name, seg_lv(seg, s)->name, lv->name);
+ for (s = 0; s < seg->area_count; s++) {
+ if ((lv1 = seg_lv(seg, s))) {
+PFLA("lv1=%s recurse=%d", display_lvname(lv1), recurse);
+ if (seg_type(first_seg(lv1), 0) == AREA_LV && lv_is_duplicated(lv1)) {
+ if (!_lv_update_and_reload(lv1, 1))
+ return_0;
+
+ } else if (!_activate_sublv_preserving_excl(lv, lv1))
+ return_0;
+ }
+
+ if ((lv1 = _seg_metalv_checked(seg, s)) &&
+ !_activate_sublv_preserving_excl(lv, lv1))
+ return_0;
+ }
+
+ if (!resume_lv(lv->vg->cmd, lv_lock_holder(lv))) {
+ log_error("Failed to resume %s after committing changes",
+ display_lvname(lv));
+ return 0;
+ }
+
return 1;
}
+/*
+ * API function:
+ *
+ * merge split of tracking @image_lv back into raid1 set
+ */
int lv_raid_merge(struct logical_volume *image_lv)
{
uint32_t s;
- char *p, *lv_name;
+ char *lv_name;
struct lv_list *lvl;
struct logical_volume *lv;
struct logical_volume *meta_lv = NULL;
@@ -2408,15 +3569,13 @@ int lv_raid_merge(struct logical_volume *image_lv)
struct volume_group *vg = image_lv->vg;
struct logical_volume *tracking;
- if (!(lv_name = dm_pool_strdup(vg->vgmem, image_lv->name)))
- return_0;
+ RETURN_IF_ZERO(image_lv, "image LV argument");
- if (!(p = strstr(lv_name, "_rimage_"))) {
- log_error("Unable to merge non-mirror image %s.",
+ if (!(lv_name = _top_level_lv_name(image_lv))) {
+ log_error("Unable to merge non-{mirror,duplicating} image %s.",
display_lvname(image_lv));
return 0;
}
- *p = '\0'; /* lv_name is now that of top-level RAID */
if (!(lvl = find_lv_in_vg(vg, lv_name))) {
log_error("Unable to find containing RAID array for %s.",
@@ -2425,23 +3584,27 @@ int lv_raid_merge(struct logical_volume *image_lv)
}
lv = lvl->lv;
- seg = first_seg(lv);
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(!seg_is_mirrored(seg) && !seg_is_raid01(seg),
+ "mirrored/raid10 to merge into, rejecting request");
+ RETURN_IF_ZERO(seg->meta_areas, "metadata LV areas");
- if (!seg_is_raid1(seg)) {
- log_error("%s is no RAID1 array - refusing to merge.",
- display_lvname(lv));
+ if (!_lv_is_raid_with_tracking(lv, &s)) {
+ log_error("%s is not a tracking LV.", display_lvname(lv));
return 0;
}
- if (!_lv_is_raid_with_tracking(lv, &tracking)) {
- log_error("%s is not a tracking LV.",
- display_lvname(lv));
+ if ((tracking = seg_lv(seg, s)) != image_lv) {
+ log_error("%s is not the tracking LV of %s but %s is.",
+ display_lvname(image_lv), display_lvname(lv), display_lvname(tracking));
return 0;
}
- if (tracking != image_lv) {
- log_error("%s is not the tracking LV of %s but %s is.",
- display_lvname(image_lv), display_lvname(lv), display_lvname(tracking));
+ /* Image LVs should not be resizable directly, but... */
+ /* HM FIXME: duplicating sub LVs can have different size! */
+ if (seg->len > image_lv->le_count) {
+ log_error(INTERNAL_ERROR "The image LV %s of %s has invalid size!",
+ display_lvname(image_lv), display_lvname(lv));
return 0;
}
@@ -2450,86 +3613,101 @@ int lv_raid_merge(struct logical_volume *image_lv)
display_lvname(image_lv));
}
- for (s = 0; s < seg->area_count; ++s)
- if (seg_lv(seg, s) == image_lv)
- meta_lv = seg_metalv(seg, s);
-
- if (!meta_lv) {
- log_error("Failed to find meta for %s in RAID array %s.",
- display_lvname(image_lv),
- display_lvname(lv));
+ if (!(meta_lv = seg_metalv(seg, s))) {
+ log_error("Failed to find metadata LV for %s in %s.",
+ display_lvname(image_lv), display_lvname(lv));
return 0;
}
- if (!deactivate_lv(vg->cmd, meta_lv)) {
- log_error("Failed to deactivate %s before merging.",
- display_lvname(meta_lv));
- return 0;
- }
+ image_lv->status |= (LVM_WRITE|RAID_IMAGE);
+ lv_set_hidden(image_lv);
- if (!deactivate_lv(vg->cmd, image_lv)) {
- log_error("Failed to deactivate %s before merging.",
+ if (!lv_update_and_reload(lv))
+ return_0;
+
+ /* Suspend+resume the tracking LV to remove its devnode */
+ if (!suspend_lv(lv->vg->cmd, image_lv) || !resume_lv(lv->vg->cmd, image_lv)) {
+ log_error("Failed to suspend+resume %s after committing changes",
display_lvname(image_lv));
return 0;
}
- lv_set_hidden(image_lv);
- image_lv->status |= (lv->status & LVM_WRITE);
- image_lv->status |= RAID_IMAGE;
+ log_print_unless_silent("LV %s successfully merged back into %s",
+ display_lvname(image_lv), display_lvname(lv));
+ return 1;
+}
- if (!lv_update_and_reload(lv))
- return_0;
+/*
+ * Adjust all data sub LVs of @lv to mirror
+ * or raid name depending on @direction
+ * adjusting their LV status
+ */
+enum mirror_raid_conv { mirror_to_raid1 = 0, raid1_to_mirror };
+static int _adjust_data_lvs(struct logical_volume *lv, enum mirror_raid_conv direction)
+{
+ uint32_t s;
+ char *p;
+ struct lv_segment *seg;
+ static struct {
+ char type_char;
+ uint64_t set_flag;
+ uint64_t reset_flag;
+ } conv[] = {
+ { 'r', RAID_IMAGE , MIRROR_IMAGE },
+ { 'm', MIRROR_IMAGE, RAID_IMAGE }
+ };
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ for (s = 0; s < seg->area_count; ++s) {
+ struct logical_volume *dlv = seg_lv(seg, s);
+
+ if (!(p = _strstr_strings(dlv->name, "_mimage_", "_rimage_", NULL))) {
+ log_error(INTERNAL_ERROR "name lags image part");
+ return 0;
+ }
+
+ *(p + 1) = conv[direction].type_char;
+ log_debug_metadata("data LV renamed to %s", dlv->name);
+
+ dlv->status &= ~conv[direction].reset_flag;
+ dlv->status |= conv[direction].set_flag;
+ }
- log_print_unless_silent("%s/%s successfully merged back into %s/%s",
- vg->name, image_lv->name, vg->name, lv->name);
return 1;
}
/*
- * Convert @lv with "mirror" mapping to "raid1".
+ * Convert @lv with "mirror" mapping to "raid1"
+ * opitonally changing number of data_copies
+ * defined by @new_image_count.
*
* Returns: 1 on success, 0 on failure
*/
-static int _convert_mirror_to_raid1(struct logical_volume *lv,
- const struct segment_type *new_segtype)
+static int _convert_mirror_to_raid(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ uint32_t new_image_count,
+ uint32_t new_region_size,
+ struct dm_list *allocate_pvs,
+ int update_and_reload,
+ struct dm_list *removal_lvs)
{
- uint32_t s;
- struct lv_segment *seg = first_seg(lv);
- struct lv_list lvl_array[seg->area_count], *lvl;
- struct dm_list meta_lvs;
- struct lv_segment_area *meta_areas;
- char *new_name;
+ struct lv_segment *seg;
- dm_list_init(&meta_lvs);
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
- if (!(meta_areas = dm_pool_zalloc(lv->vg->vgmem,
- lv_mirror_count(lv) * sizeof(*meta_areas)))) {
- log_error("Failed to allocate meta areas memory.");
+ if (!seg_is_mirrored(seg)) {
+ log_error(INTERNAL_ERROR "mirror conversion supported only");
return 0;
}
-#if 0
- /* HM FIXME: has been called in lvconvert already */
- if (!archive(lv->vg))
- return_0;
-#endif
- for (s = 0; s < seg->area_count; s++) {
- log_debug_metadata("Allocating new metadata LV for %s",
- seg_lv(seg, s)->name);
- if (!_alloc_rmeta_for_lv(seg_lv(seg, s), &(lvl_array[s].lv))) {
- log_error("Failed to allocate metadata LV for %s in %s",
- seg_lv(seg, s)->name, lv->name);
- return 0;
- }
- dm_list_add(&meta_lvs, &(lvl_array[s].list));
- }
-
- log_debug_metadata("Clearing newly allocated metadata LVs");
- if (!_clear_lvs(&meta_lvs)) {
- log_error("Failed to initialize metadata LVs");
+ new_image_count = new_image_count ?: seg->area_count;
+ if (new_image_count < 2) {
+ log_error("can't reduce to lees than 2 data_copies");
return 0;
}
+ /* Remove any mirror log */
if (seg->log_lv) {
log_debug_metadata("Removing mirror log, %s", seg->log_lv->name);
if (!remove_mirror_log(lv->vg->cmd, lv, NULL, 0)) {
@@ -2538,128 +3716,110 @@ static int _convert_mirror_to_raid1(struct logical_volume *lv,
}
}
- seg->meta_areas = meta_areas;
- s = 0;
- dm_list_iterate_items(lvl, &meta_lvs) {
- log_debug_metadata("Adding %s to %s", lvl->lv->name, lv->name);
-
- /* Images are known to be in-sync */
- lvl->lv->status &= ~LV_REBUILD;
- first_seg(lvl->lv)->status &= ~LV_REBUILD;
- lv_set_hidden(lvl->lv);
-
- if (!set_lv_segment_area_lv(seg, s, lvl->lv, 0,
- lvl->lv->status)) {
- log_error("Failed to add %s to %s",
- lvl->lv->name, lv->name);
- return 0;
- }
+ /* Rename all data sub LVs from "*_mimage_*" to "*_rimage_*" and set their status */
+ log_debug_metadata("Adjust data LVs of %s", display_lvname(lv));
+ if (!_adjust_data_lvs(lv, mirror_to_raid1))
+ return 0;
- s++;
- }
+ seg->region_size = new_region_size;
- for (s = 0; s < seg->area_count; ++s) {
- if (!(new_name = _generate_raid_name(lv, "rimage", s)))
- return_0;
- log_debug_metadata("Renaming %s to %s", seg_lv(seg, s)->name, new_name);
- seg_lv(seg, s)->name = new_name;
- seg_lv(seg, s)->status &= ~MIRROR_IMAGE;
- seg_lv(seg, s)->status |= RAID_IMAGE;
- }
+ /* Allocate metadata devs for all mimage ones (writes+commits metadata) */
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
+ return 0;
init_mirror_in_sync(1);
- log_debug_metadata("Setting new segtype for %s", lv->name);
seg->segtype = new_segtype;
- lv->status &= ~MIRROR;
- lv->status &= ~MIRRORED;
+ seg->data_copies = new_image_count;
+ lv->status &= ~(MIRROR | MIRRORED);
lv->status |= RAID;
seg->status |= RAID;
- if (!lv_update_and_reload(lv))
- return_0;
+ /* Change image pair count to requested # of images */
+ if (new_image_count != seg->area_count) {
+ log_debug_metadata("Changing image count to %u on %s",
+ new_image_count, display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, removal_lvs))
+ return 0;
+ }
- return 1;
+ return update_and_reload ? _lv_update_reload_fns_reset_eliminate_lvs(lv, removal_lvs, NULL) : 1;
}
/*
- * Convert @lv with "raid1" mapping to "mirror".
+ * Convert @lv with "raid1" mapping to "mirror"
+ * optionally changing number of data_copies
+ * defined by @new_image_count.
*
* Returns: 1 on success, 0 on failure
*/
static int _convert_raid1_to_mirror(struct logical_volume *lv,
const struct segment_type *new_segtype,
- struct dm_list *allocatable_pvs)
+ uint32_t new_image_count,
+ uint32_t new_region_size,
+ struct dm_list *allocate_pvs,
+ int update_and_reload,
+ struct dm_list *removal_lvs)
{
- uint32_t s;
- uint32_t image_count = lv_raid_image_count(lv);
- char *new_name;
- struct lv_segment *seg = first_seg(lv);
- struct dm_list removal_mlvs;
- struct lv_list *lvl_array;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
- if (image_count > DEFAULT_MIRROR_MAX_IMAGES) {
- log_error("Unable to convert mirror LV %s/%s with %u images",
- lv->vg->name, lv->name, image_count);
- log_error("Please reduce to the maximum of %u images with \"lvconvert -m%u %s/%s\"",
- DEFAULT_MIRROR_MAX_IMAGES, DEFAULT_MIRROR_MAX_IMAGES - 1, lv->vg->name, lv->name);
+ if (!seg_is_raid1(seg)) {
+ log_error(INTERNAL_ERROR "raid1 conversion supported only");
return 0;
}
- dm_list_init(&removal_mlvs);
-
- /* Allocate for number of metadata LVs */
- if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, seg->area_count * sizeof(*lvl_array))))
- return_0;
-
- /* Remove rmeta LVs */
- if (seg->meta_areas) {
- for (s = 0; s < seg->area_count; s++) {
- struct logical_volume *mlv = lvl_array[s].lv = seg_metalv(seg, s);
-
- dm_list_add(&removal_mlvs, &lvl_array[s].list);
- if (!(new_name = _generate_raid_name(mlv, "extracted", -1)))
- return_0;
- log_debug_metadata("Extracting and Renaming %s to %s", mlv->name, new_name);
- if (!_remove_and_set_error_target(mlv, seg))
- return 0;
-
- mlv->name = new_name;
- }
-
- seg->meta_areas = NULL;
+ if ((new_image_count = new_image_count ?: seg->area_count) < 2) {
+ log_error("can't reduce to lees than 2 data_copies");
+ return 0;
}
- /* Add mirrored mirror_log LVs */
- if (!add_mirror_log(lv->vg->cmd, lv, 1, seg->region_size, allocatable_pvs, lv->vg->alloc)) {
- log_error("Unable to add mirror log to %s/%s", lv->vg->name, lv->name);
+ if (!_check_max_mirror_devices(new_image_count)) {
+ log_error("Unable to convert %s LV %s with %u images to %s",
+ SEG_TYPE_NAME_RAID1, display_lvname(lv), new_image_count, SEG_TYPE_NAME_MIRROR);
+ log_error("At least reduce to the maximum of %u images with \"lvconvert -m%u %s\"",
+ DEFAULT_MIRROR_MAX_IMAGES, DEFAULT_MIRROR_MAX_IMAGES - 1, display_lvname(lv));
return 0;
}
- for (s = 0; s < seg->area_count; ++s) {
- struct logical_volume *dlv = seg_lv(seg, s);
-
- if (!(new_name = _generate_raid_name(lv, "mimage", s)))
- return_0;
- log_debug_metadata("Renaming %s to %s", dlv->name, new_name);
- dlv->name = new_name;
- dlv->status &= ~RAID_IMAGE;
- dlv->status |= MIRROR_IMAGE;
+ /* Change image pair count to requested # of images */
+ if (new_image_count != seg->area_count) {
+ log_debug_metadata("Changing image count to %u on %s",
+ new_image_count, display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, removal_lvs))
+ return 0;
}
+ /* Remove rmeta LVs */
+ log_debug_metadata("Extracting and renaming metadata LVs");
+ if (!_extract_image_component_list(seg, RAID_META, 0, removal_lvs))
+ return 0;
+
+ seg->meta_areas = NULL;
+
+ /* Rename all data sub LVs from "*_rimage_*" to "*_mimage_*" and set their status */
+ log_debug_metadata("Adjust data LVs of %s", display_lvname(lv));
+ if (!_adjust_data_lvs(lv, raid1_to_mirror))
+ return 0;
- log_debug_metadata("Setting new segtype %s for %s", new_segtype->name, lv->name);
seg->segtype = new_segtype;
- lv->status |= (MIRROR | MIRRORED);
+ seg->region_size = new_region_size;
+ seg->data_copies = new_image_count;
lv->status &= ~RAID;
seg->status &= ~RAID;
+ lv->status |= (MIRROR | MIRRORED);
- if (!lv_update_and_reload(lv))
- return_0;
+PFL();
+ /* Add mirror_log LV (should happen in wih image allocation */
+ if (!add_mirror_log(lv->vg->cmd, lv, 1, seg->region_size, allocate_pvs, lv->vg->alloc)) {
+ log_error("Unable to add mirror log to %s", display_lvname(lv));
+ return 0;
+ }
- /* Eliminate the residual LVs, write VG, commit it and take a backup */
- return _eliminate_extracted_lvs(lv->vg, &removal_mlvs);
+PFL();
+ return update_and_reload ? _lv_update_reload_fns_reset_eliminate_lvs(lv, removal_lvs, NULL) : 1;
}
/* BEGIN: striped -> raid0 conversion */
@@ -2668,39 +3828,42 @@ static int _convert_raid1_to_mirror(struct logical_volume *lv,
*
* Helper convert striped to raid0
*
- * For @lv, empty hidden LVs in @new_data_lvs have been created by the caller.
+ * For @lv, empty hidden LVs in @data_lvs have been created by the caller.
*
* All areas from @lv segments are being moved to new
- * segments allocated with area_count=1 for @new_data_lvs.
+ * segments allocated with area_count=1 for @data_lvs.
*
* Returns: 1 on success, 0 on failure
*/
static int _striped_to_raid0_move_segs_to_raid0_lvs(struct logical_volume *lv,
- struct dm_list *new_data_lvs)
+ struct dm_list *data_lvs)
{
- uint32_t area_idx = 0, le;
+ uint32_t s = 0, le;
struct logical_volume *dlv;
- struct lv_segment *seg_from, *seg_new, *tmp;
- struct dm_list *l;
+ struct lv_segment *seg_from, *seg_new;
+ struct lv_list *lvl;
struct segment_type *segtype;
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(data_lvs, "data LVs list pointer argument");
+ RETURN_IF_NONZERO(dm_list_empty(data_lvs), "data LVs listed");
+
if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
- dm_list_iterate(l, new_data_lvs) {
- dlv = (dm_list_item(l, struct lv_list))->lv;
-
+ dm_list_iterate_items(lvl, data_lvs) {
+ dlv = lvl->lv;
le = 0;
dm_list_iterate_items(seg_from, &lv->segments) {
uint64_t status = RAID | SEG_RAID | (seg_from->status & (LVM_READ | LVM_WRITE));
/* Allocate a segment with one area for each segment in the striped LV */
if (!(seg_new = alloc_lv_segment(segtype, dlv,
- le, seg_from->area_len - seg_from->reshape_len,
- seg_from->reshape_len, status,
- seg_from->stripe_size, NULL, 1 /* area_count */,
- seg_from->area_len, seg_from->chunk_size,
- 0 /* region_size */, 0, NULL)))
+ le, seg_from->area_len,
+ 0 /* reshape_len */, status,
+ 0 /* stripe_size */, NULL, 1 /* area_count */,
+ seg_from->area_len, 1 /* data_copies */,
+ 0 /* chunk_size */, 0 /* region_size */, 0, NULL)))
return_0;
seg_type(seg_new, 0) = AREA_UNASSIGNED;
@@ -2708,70 +3871,59 @@ static int _striped_to_raid0_move_segs_to_raid0_lvs(struct logical_volume *lv,
le += seg_from->area_len;
/* Move the respective area across to our new segment */
- if (!move_lv_segment_area(seg_new, 0, seg_from, area_idx))
+ if (!move_lv_segment_area(seg_new, 0, seg_from, s))
return_0;
}
- /* Adjust le count and lv size */
+ /* Adjust le count and LV size */
dlv->le_count = le;
dlv->size = (uint64_t) le * lv->vg->extent_size;
- area_idx++;
+ s++;
}
/* Remove the empty segments from the striped LV */
- dm_list_iterate_items_safe(seg_from, tmp, &lv->segments)
- dm_list_del(&seg_from->list);
+ dm_list_init(&lv->segments);
return 1;
}
/*
- * Helper convert striped to raid0
+ * HM Helper: check that @lv has one stripe one, i.e. same stripe count in all of its segments
*
- * Add list of data device in @new_data_devs to @lv
- *
- * Returns: 1 on success, 0 on failure
+ * Superfluous if different stripe zones will ever be supported
*/
-static int _striped_to_raid0_alloc_raid0_segment(struct logical_volume *lv,
- uint32_t area_count,
- struct lv_segment *seg)
-{
- struct lv_segment *seg_new;
- struct segment_type *segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID0);
-
-PFLA("seg->stripe_size=%u seg->chunk_size=%u", seg->stripe_size, seg->chunk_size);
-
- /* Allocate single segment to hold the image component areas */
- if (!(seg_new = alloc_lv_segment(segtype, lv,
- 0 /* le */, lv->le_count /* len */,
- 0 /* reshape_len */,
- seg->status,
- seg->stripe_size, NULL /* log_lv */,
- area_count, lv->le_count, seg->chunk_size,
- 0 /* seg->region_size */, 0u /* extents_copied */ ,
- NULL /* pvmove_source_seg */)))
- return_0;
+static int _lv_has_one_stripe_zone(struct logical_volume *lv)
+{
+ struct lv_segment *seg;
+ unsigned area_count;
-PFLA("seg_new->stripe_size=%u seg_new->chunk_size=%u", seg_new->stripe_size, seg_new->chunk_size);
- /* Add new segment to LV */
- dm_list_add(&lv->segments, &seg_new->list);
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ area_count = seg->area_count;
+ dm_list_iterate_items(seg, &lv->segments)
+ if (seg->area_count != area_count)
+ return 0;
return 1;
}
-/* Check that @lv has one stripe one, i.e. same stripe count in all of its segements */
-static int _lv_has_one_stripe_zone(struct logical_volume *lv)
+/* HM Helper: check that @lv has segments with just @areas */
+static int _lv_has_segments_with_n_areas(struct logical_volume *lv, unsigned areas)
{
struct lv_segment *seg;
- unsigned area_count = first_seg(lv)->area_count;
+
+ RETURN_IF_ZERO(lv, "lv argument");
dm_list_iterate_items(seg, &lv->segments)
- if (seg->area_count != area_count)
+ if (seg->area_count != areas) {
+ log_error("Called on %s with segments != %u area", display_lvname(lv), areas);
return 0;
+ }
return 1;
}
+
/*
* HM
*
@@ -2785,123 +3937,161 @@ static int _lv_has_one_stripe_zone(struct logical_volume *lv)
*
* Returns: 1 on success, 0 on failure
*/
-static int _convert_striped_to_raid0(struct logical_volume *lv,
- int alloc_metadata_devs,
- int update_and_reload)
+static struct lv_segment *_convert_striped_to_raid0(struct logical_volume *lv,
+ int alloc_metadata_devs,
+ int update_and_reload,
+ struct dm_list *allocate_pvs)
{
- struct lv_segment *data_lv_seg, *seg = first_seg(lv);
- struct dm_list new_meta_lvs;
- struct dm_list new_data_lvs;
- struct dm_list *l;
- unsigned area_count = seg->area_count;
+ uint32_t area_count, area_len = 0, stripe_size;
+ struct lv_segment *seg, *raid0_seg;
+ struct segment_type *segtype;
+ struct dm_list data_lvs;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, (area_count = seg->area_count) - 1);
+ RETURN_IF_ZERO(allocate_pvs || dm_list_empty(allocate_pvs), "PVs to allocate");
if (!seg_is_striped(seg)) {
- log_error("Cannot convert non-striped LV %s/%s to raid0", lv->vg->name, lv->name);
- return 0;
+ log_error(INTERNAL_ERROR "Cannot convert non-%s LV %s to %s",
+ SEG_TYPE_NAME_STRIPED, display_lvname(lv), SEG_TYPE_NAME_RAID0);
+ return NULL;
}
- /* Check for not yet supported varying area_count on multi-segment striped LVs */
+ dm_list_iterate_items(seg, &lv->segments)
+ area_len += seg->area_len;
+
+ seg = first_seg(lv);
+ stripe_size = seg->stripe_size;
+
+ /* Check for not (yet) supported varying area_count on multi-segment striped LVs */
if (!_lv_has_one_stripe_zone(lv)) {
- log_error("Cannot convert striped LV %s/%s with varying stripe count to raid0",
- lv->vg->name, lv->name);
- return 0;
+ log_error("Cannot convert striped LV %s with varying stripe count to raid0",
+ display_lvname(lv));
+ return NULL;
}
- dm_list_init(&new_meta_lvs);
- dm_list_init(&new_data_lvs);
-
- /* FIXME: insert_layer_for_lv() not suitable */
- /* Allocate empty rimage components in order to be able to support multi-segment "striped" LVs */
- if (!_alloc_image_components(lv, 0, NULL, area_count, NULL, &new_data_lvs)) {
- log_error("Failed to allocate empty image components for raid0 LV %s/%s.", lv->vg->name, lv->name);
- return_0;
+ if (!seg->stripe_size ||
+ (seg->stripe_size & (seg->stripe_size - 1))) {
+ log_error("Cannot convert striped LV %s with non-power of 2 stripe size %u",
+ display_lvname(lv), seg->stripe_size);
+ log_error("Please use \"lvconvert --duplicate ...\"");
}
- /* Image components are being allocated with LV_REBUILD preset and raid0 does not need it */
- dm_list_iterate(l, &new_data_lvs)
- (dm_list_item(l, struct lv_list))->lv->status &= ~LV_REBUILD;
+ if (!(segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0)))
+ return_NULL;
- /* Move the AREA_PV areas across to the new rimage components */
- if (!_striped_to_raid0_move_segs_to_raid0_lvs(lv, &new_data_lvs)) {
- log_error("Failed to insert linear LVs underneath %s/%s.", lv->vg->name, lv->name);
- return_0;
+ /* Allocate empty rimage components */
+ dm_list_init(&data_lvs);
+ if (!_alloc_image_components(lv, area_count, NULL, &data_lvs, NULL)) {
+ log_error("Failed to allocate empty image components for raid0 LV %s.",
+ display_lvname(lv));
+ return_NULL;
}
- /* Allocate new top-level LV segment using credentials of first ne data lv for stripe_size... */
- data_lv_seg = first_seg(dm_list_item(dm_list_first(&new_data_lvs), struct lv_list)->lv);
- data_lv_seg->stripe_size = seg->stripe_size;
- if (!_striped_to_raid0_alloc_raid0_segment(lv, area_count, data_lv_seg)) {
- log_error("Failed to allocate new raid0 segement for LV %s/%s.", lv->vg->name, lv->name);
- return_0;
+ /* Move the AREA_PV areas across to the new rimage components; empties lv->segments */
+ if (!_striped_to_raid0_move_segs_to_raid0_lvs(lv, &data_lvs)) {
+ log_error("Failed to insert linear LVs underneath %s.", display_lvname(lv));
+ return_NULL;
}
- /* Get reference to new allocated raid0 segment _before_ adding the data lvs */
- seg = first_seg(lv);
+ /*
+ * Allocate single segment to hold the image component
+ * areas based on the first data LVs properties derived
+ * from the first new raid0 LVs first segment
+ */
+ seg = first_seg(dm_list_item(dm_list_first(&data_lvs), struct lv_list)->lv);
+ if (!(raid0_seg = alloc_lv_segment(segtype, lv,
+ 0 /* le */, lv->le_count /* len */,
+ 0 /* reshape_len */, seg->status,
+ stripe_size, NULL /* log_lv */,
+ area_count, area_len,
+ 1 /* data_copies */, 0 /* chunk_size */,
+ 0 /* seg->region_size */, 0u /* extents_copied */ ,
+ NULL /* pvmove_source_seg */))) {
+ log_error("Failed to allocate new raid0 segement for LV %s.", display_lvname(lv));
+ return_NULL;
+ }
- /* Add data lvs to the top-level lvs segment */
- if (!_add_image_component_list(seg, 1, 0, &new_data_lvs, 0))
- return 0;
+ /* Add new single raid0 segment to emptied LV segments list */
+ dm_list_add(&lv->segments, &raid0_seg->list);
- if (!(seg->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID0)))
- return 0;
+ /* Add data LVs to the top-level LVs segment; resets LV_REBUILD flag on them */
+ if (!_add_image_component_list(raid0_seg, 1, 0, &data_lvs, 0))
+ return NULL;
lv->status |= RAID;
- /* Allocate metadata lvs if requested */
- if (alloc_metadata_devs) {
- if (!_raid0_add_or_remove_metadata_lvs(lv, update_and_reload))
- return 0;
+ /* Allocate metadata LVs if requested */
+ if (alloc_metadata_devs && !_raid0_add_or_remove_metadata_lvs(lv, 0, allocate_pvs, NULL))
+ return NULL;
- } else if (update_and_reload &&
- !lv_update_and_reload(lv))
- return 0;
+ if (update_and_reload && !lv_update_and_reload(lv))
+ return NULL;
- return 1;
+ return raid0_seg;
}
/* END: striped -> raid0 conversion */
-
/* BEGIN: raid0 -> striped conversion */
-
-/* HM Helper: walk the segment lvs of a segment @seg and find smallest area at offset @area_le */
-static uint32_t _smallest_segment_lvs_area(struct lv_segment *seg, uint32_t area_le)
+/* HM Helper: walk the segment LVs of a segment @seg and find smallest area at offset @area_le */
+static uint32_t _smallest_segment_lvs_area(struct lv_segment *seg,
+ uint32_t area_le, uint32_t *area_len)
{
- uint32_t r = ~0, s;
+ uint32_t s;
+
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_NONZERO(area_le >= seg->area_len, "area logical extent argument");
+ RETURN_IF_ZERO(area_len, "area length argument");
+
+ *area_len = ~0U;
- /* Find smallest segment of each of the data image lvs at offset area_le */
+ /* Find smallest segment of each of the data image LVs at offset area_le */
for (s = 0; s < seg->area_count; s++) {
struct lv_segment *seg1 = find_seg_by_le(seg_lv(seg, s), area_le);
- r = min(r, seg1->le + seg1->len - area_le);
+ if (!seg1) {
+ log_error(INTERNAL_ERROR "Segment at logical extent %u not found in LV %s!",
+ area_le, display_lvname(seg_lv(seg, s)));
+ return 0;
+ }
+
+ *area_len = min(*area_len, seg1->len);
+
+PFLA("Segment at logical extent %u, len=%u found in LV %s, area_len=%u!",
+area_le, seg1->len, display_lvname(seg_lv(seg, s)), *area_len);
+
}
- return r;
+ return 1;
}
-/* HM Helper: Split segments in segment LVs in all areas of @seg at offset @area_le) */
+/* HM Helper: Split segments in segment LVs in all areas of @seg at offset @area_le */
static int _split_area_lvs_segments(struct lv_segment *seg, uint32_t area_le)
{
uint32_t s;
- /* Make sure that there's segments starting at area_le all data LVs */
- if (area_le < seg_lv(seg, 0)->le_count)
- for (s = 0; s < seg->area_count; s++)
- if (!lv_split_segment(seg_lv(seg, s), area_le)) {
- log_error(INTERNAL_ERROR "splitting data lv segment");
- return_0;
- }
+ RETURN_IF_ZERO(seg, "lv segment argument");
+
+ /* Make sure that there's segments starting at area_le in all data LVs */
+ for (s = 0; s < seg->area_count; s++)
+ if (area_le < seg_lv(seg, s)->le_count &&
+ !lv_split_segment(seg_lv(seg, s), area_le))
+ return_0;
return 1;
}
/* HM Helper: allocate a new striped segment and add it to list @new_segments */
-static int _alloc_new_striped_segment(struct logical_volume *lv,
- uint32_t le, uint32_t area_len,
- struct dm_list *new_segments)
+static int _alloc_and_add_new_striped_segment(struct logical_volume *lv,
+ uint32_t le, uint32_t area_len,
+ struct dm_list *new_segments)
{
- struct lv_segment *seg = first_seg(lv), *new_seg;
+ struct lv_segment *seg, *new_seg;
struct segment_type *striped_segtype;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(new_segments, "new segments argument");
+
if (!(striped_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
@@ -2909,7 +4099,7 @@ static int _alloc_new_striped_segment(struct logical_volume *lv,
if (!(new_seg = alloc_lv_segment(striped_segtype, lv, le, area_len * seg->area_count,
0 /* seg->reshape_len */, seg->status & ~RAID,
seg->stripe_size, NULL, seg->area_count,
- area_len, seg->chunk_size, 0, 0, NULL)))
+ area_len, 1 /* data_copies */ , seg->chunk_size, 0, 0, NULL)))
return_0;
dm_list_add(new_segments, &new_seg->list);
@@ -2925,8 +4115,8 @@ static int _alloc_new_striped_segment(struct logical_volume *lv,
* moved to @new_segments allocated.
*
* The metadata+data component LVs are being mapped to an
- * error target and linked to @removal_lvs for callers
- * disposal.
+ * error target and linked to @removal_lvs for disposal
+ * by the caller.
*
* Returns: 1 on success, 0 on failure
*/
@@ -2934,12 +4124,10 @@ static int _raid0_to_striped_retrieve_segments_and_lvs(struct logical_volume *lv
struct dm_list *removal_lvs)
{
uint32_t s, area_le, area_len, le;
- struct lv_segment *seg = first_seg(lv), *seg_to;
+ struct lv_segment *data_seg, *seg, *seg_to;
struct dm_list new_segments;
- struct lv_list *lvl_array, *lvl;
- if (!(lvl_array = dm_pool_alloc(lv->vg->vgmem, 2 * seg->area_count * sizeof(*lvl_array))))
- return_0;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
dm_list_init(&new_segments);
@@ -2949,22 +4137,22 @@ static int _raid0_to_striped_retrieve_segments_and_lvs(struct logical_volume *lv
*/
area_le = le = 0;
while (le < lv->le_count) {
- area_len = _smallest_segment_lvs_area(seg, area_le);
+ if (!_smallest_segment_lvs_area(seg, area_le, &area_len))
+ return_0;
+
+PFLA("le=%u area_len=%u area_le=%u area_count=%u", le, area_len, area_le, seg->area_count);
area_le += area_len;
- if (!_split_area_lvs_segments(seg, area_le))
- return 0;
- if (!_alloc_new_striped_segment(lv, le, area_len, &new_segments))
+ if (!_split_area_lvs_segments(seg, area_le) ||
+ !_alloc_and_add_new_striped_segment(lv, le, area_len, &new_segments))
return 0;
- le += area_len * seg->area_count;
+ le = area_le * seg->area_count;
}
/* Now move the prepared split areas across to the new segments */
area_le = 0;
dm_list_iterate_items(seg_to, &new_segments) {
- struct lv_segment *data_seg;
-
for (s = 0; s < seg->area_count; s++) {
data_seg = find_seg_by_le(seg_lv(seg, s), area_le);
@@ -2977,36 +4165,19 @@ static int _raid0_to_striped_retrieve_segments_and_lvs(struct logical_volume *lv
area_le += data_seg->len;
}
- /* Loop the areas and set any metadata LVs and all data LVs to error segments and remove them */
- for (s = 0; s < seg->area_count; s++) {
- /* If any metadata lvs -> remove them */
- lvl = &lvl_array[seg->area_count + s];
- if (seg->meta_areas &&
- (lvl->lv = seg_metalv(seg, s))) {
- dm_list_add(removal_lvs, &lvl->list);
- if (!_remove_and_set_error_target(lvl->lv, seg))
- return_0;
- }
-
-
- lvl = &lvl_array[s];
- lvl->lv = seg_lv(seg, s);
- dm_list_add(removal_lvs, &lvl->list);
- if (!_remove_and_set_error_target(lvl->lv, seg))
- return_0;
- }
+ /* Extract any metadata LVs and the empty data LVs for disposal by the caller */
+ log_debug_metadata("Extracting image comonent pairs");
+ if ((seg->meta_areas && !_extract_image_component_list(seg, RAID_META, 0, removal_lvs)) ||
+ !_extract_image_component_list(seg, RAID_IMAGE, 0, removal_lvs))
+ return_0;
/*
* Remove the one segment holding the image component areas
- * from the top-level LV and add the new segments to it
+ * from the top-level LV, then add the new segments to it
*/
dm_list_del(&seg->list);
dm_list_splice(&lv->segments, &new_segments);
- lv->status &= ~RAID;
- lv->status |= LVM_READ | LVM_WRITE;
- lv_set_visible(lv);
-
return 1;
}
@@ -3019,40 +4190,52 @@ static int _raid0_to_striped_retrieve_segments_and_lvs(struct logical_volume *lv
*
* Returns: 1 on success, 0 on failure
*
- * HM FIXME: check last_seg(lv)->reshape_len and reduce LV aprropriately
*/
static int _convert_raid0_to_striped(struct logical_volume *lv,
- const struct segment_type *new_segtype)
+ int update_and_reload,
+ struct dm_list *removal_lvs)
{
- struct lv_segment *seg = first_seg(lv);
- struct dm_list removal_lvs;
-
- if (!new_segtype)
- return 1;
+ struct lv_segment *seg;
- dm_list_init(&removal_lvs);
-PFLA("seg->segtype=%s", seg->segtype->name);
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
/* Caller should ensure, but... */
- if (!seg_is_any_raid0(seg) &&
- !((segtype_is_linear(new_segtype) && seg->area_count == 1) ||
- segtype_is_striped(new_segtype))) {
- log_error(INTERNAL_ERROR "Can't cope with %s -> %s", seg->segtype->name, new_segtype->name);
+ if (!seg_is_any_raid0(seg)) {
+ log_error(INTERNAL_ERROR "Cannot convert non-%s LV %s to %s",
+ SEG_TYPE_NAME_RAID0, display_lvname(lv), SEG_TYPE_NAME_STRIPED);
return 0;
}
-PFL();
+
+ /* Reshape space should be freed already, but... */
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s", display_lvname(lv));
+ return 0;
+ }
+
+ /* Remove metadata devices */
+ if (seg_is_raid0_meta(seg) &&
+ !_raid0_add_or_remove_metadata_lvs(lv, 0 /* update_and_reload */, NULL, removal_lvs))
+ return_0;
+
/* Move the AREA_PV areas across to new top-level segments of type "striped" */
- if (!_raid0_to_striped_retrieve_segments_and_lvs(lv, &removal_lvs)) {
+ if (!_raid0_to_striped_retrieve_segments_and_lvs(lv, removal_lvs)) {
log_error("Failed to retrieve raid0 segments from %s.", lv->name);
return_0;
}
-PFL();
- if (!lv_update_and_reload(lv))
+
+ lv->status &= ~RAID;
+
+ if (!(first_seg(lv)->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
-PFL();
- /* Eliminate the residual LVs, write VG, commit it and take a backup */
- return _eliminate_extracted_lvs(lv->vg, &removal_lvs);
+ if (update_and_reload) {
+ if (!lv_update_and_reload(lv))
+ return_0;
+
+ /* Eliminate the residual LVs, write VG, commit it and take a backup */
+ return _eliminate_extracted_lvs(lv->vg, removal_lvs);
+ }
+ return 1;
}
/* END: raid0 -> striped conversion */
@@ -3076,80 +4259,642 @@ PFL();
static int _reshaped_state(struct logical_volume *lv, const unsigned dev_count,
unsigned *devs_health, unsigned *devs_in_sync)
{
- unsigned d;
uint32_t kernel_devs;
- char *raid_health;
- *devs_health = *devs_in_sync = 0;
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(devs_health, "sevices health argument");
+ RETURN_IF_ZERO(devs_in_sync, "sevices in-sync argument");
- if (!lv_raid_dev_count(lv, &kernel_devs)) {
- log_error("Failed to get device count");
- return_0;
+ if (!_get_dev_health(lv, &kernel_devs, devs_health, devs_in_sync, NULL))
+ return 0;
+
+PFLA("kernel_devs=%u dev_count=%u", kernel_devs, dev_count);
+ if (kernel_devs == dev_count)
+ return 1;
+
+ return kernel_devs < dev_count ? 2 : 3;
+}
+
+/*
+ * Return new length for @lv based on @old_image_count and @new_image_count in @*len
+ *
+ * Subtracts any reshape space and provide data lenght only!
+ */
+static int _lv_reshape_get_new_len(struct logical_volume *lv,
+ uint32_t old_image_count, uint32_t new_image_count,
+ uint32_t *len)
+{
+ struct lv_segment *seg;
+ uint32_t di_old, di_new;
+ uint32_t old_lv_reshape_len, new_lv_reshape_len;
+ uint64_t r;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(len, "reshape length pointer argument");
+ RETURN_IF_ZERO((di_old = _data_rimages_count(seg, old_image_count)), "old data images");
+ RETURN_IF_ZERO((di_new = _data_rimages_count(seg, new_image_count)), "new data images");
+
+ old_lv_reshape_len = _reshape_len_per_dev(seg) * _data_rimages_count(seg, old_image_count);
+ new_lv_reshape_len = _reshape_len_per_dev(seg) * _data_rimages_count(seg, new_image_count);
+
+ r = (uint64_t) lv->le_count;
+ r -= old_lv_reshape_len;
+ RETURN_IF_NONZERO((r = new_lv_reshape_len + r * di_new / di_old) > UINT_MAX, "proper new segment length!");
+ *len = (uint32_t) r;
+
+ return 1;
+}
+
+/*
+ * Extend/reduce size of @lv and it's first segment during reshape to @extents
+ */
+static int _reshape_adjust_to_size(struct logical_volume *lv,
+ uint32_t old_image_count, uint32_t new_image_count)
+{
+ struct lv_segment *seg;
+ uint32_t new_le_count;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &new_le_count))
+ return 0;
+
+ /* Externally visible LV size w/o reshape space */
+ lv->le_count = seg->len = new_le_count;
+ lv->size = (lv->le_count - new_image_count * _reshape_len_per_dev(seg)) * lv->vg->extent_size;
+ /* seg->area_len does not change */
+
+ if (old_image_count < new_image_count) {
+ _lv_set_reshape_len(lv, _reshape_len_per_dev(seg));
+
+PFLA("lv->size=%s seg->len=%u seg->area_len=%u seg->area_count=%u old_image_count=%u new_image_count=%u", display_size(lv->vg->cmd, lv->size), seg->len, seg->area_len, seg->area_count, old_image_count, new_image_count);
+ /* Extend from raid1 mapping */
+ if (old_image_count == 2 &&
+ !seg->stripe_size)
+ seg->stripe_size = DEFAULT_STRIPESIZE;
+
+ /* Reduce to raid1 mapping */
+ } else if (new_image_count == 2)
+ seg->stripe_size = 0;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * Reshape: add disks to existing raid lv
+ *
+ */
+static int _raid_reshape_add_images(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force,
+ uint32_t old_image_count, uint32_t new_image_count,
+ const unsigned new_stripes, const unsigned new_stripe_size,
+ struct dm_list *allocate_pvs)
+{
+ uint32_t grown_le_count, current_le_count, s;
+ struct volume_group *vg;
+ struct logical_volume *slv;
+ struct lv_segment *seg;
+ struct lvinfo info = { 0 };
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ vg = lv->vg;
+
+ if (!lv_info(vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
+ log_error("lv_info failed: aborting");
+ return 0;
}
- if (!lv_raid_dev_health(lv, &raid_health)) {
- log_error("Failed to get device health");
- return_0;
+ if (seg->segtype != new_segtype)
+ log_print_unless_silent("Ignoring layout change on device adding reshape");
+PFL();
+ if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &grown_le_count))
+ return 0;
+PFL();
+ current_le_count = lv->le_count - _reshape_len_per_lv(lv);
+ grown_le_count -= _reshape_len_per_dev(seg) * _data_rimages_count(seg, new_image_count);
+ log_warn("WARNING: Adding stripes to active%s logical volume %s "
+ "will grow it from %u to %u extents!",
+ info.open_count ? " and open" : "",
+ display_lvname(lv), current_le_count, grown_le_count);
+ log_print_unless_silent("Run \"lvresize -l%u %s\" to shrink it or use the additional capacity",
+ current_le_count, display_lvname(lv));
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count,
+ seg->data_copies, new_stripes, new_stripe_size))
+ return 0;
+PFL();
+ /* Allocate new image component pairs for the additional stripes and grow LV size */
+ log_debug_metadata("Adding %u data and metadata image LV pair%s to %s",
+ new_image_count - old_image_count, new_image_count - old_image_count > 1 ? "s" : "",
+ display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, NULL))
+ return 0;
+PFL();
+ /* Reshape adding image component pairs -> change sizes/counters accordingly */
+ if (!_reshape_adjust_to_size(lv, old_image_count, new_image_count)) {
+ log_error("Failed to adjust LV %s to new size!", display_lvname(lv));
+ return 0;
}
+PFL();
+ /* Allocate forward out of place reshape space at the beginning of all data image LVs */
+ log_debug_metadata("(Re)allocating reshape space for %s", display_lvname(lv));
+ if (!_lv_alloc_reshape_space(lv, alloc_begin, NULL, allocate_pvs))
+ return 0;
- d = (unsigned) strlen(raid_health);
- while (d--) {
- (*devs_health)++;
- if (raid_health[d] == 'A')
- (*devs_in_sync)++;
+PFLA("lv->size=%s", display_size(vg->cmd, lv->size));
+PFLA("lv->le_count=%u", lv->le_count);
+PFLA("seg->len=%u", first_seg(lv)->len);
+ /*
+ * Reshape adding image component pairs:
+ *
+ * - reset rebuild flag on new image LVs
+ * - set delta disks plus flag on new image LVs
+ */
+ log_debug_metadata("Setting delta disk flag on new data LVs of %s",
+ display_lvname(lv));
+ if (old_image_count < seg->area_count) {
+ if (!vg_write(vg) || !vg_commit(vg) || !backup(vg)) {
+ log_error("metadata commit/backup failed");
+ return 0;
+ }
+
+ for (s = old_image_count; s < seg->area_count; s++) {
+ slv = seg_lv(seg, s);
+PFLA("seg_lv(seg, %u)=%s", s, slv);
+ slv->status &= ~LV_REBUILD;
+ slv->status |= LV_RESHAPE_DELTA_DISKS_PLUS;
+ if (!activate_lv_excl_local(vg->cmd, slv) ||
+ !activate_lv_excl_local(vg->cmd, seg_metalv(seg, s)))
+ return_0;
+ }
}
- if (kernel_devs == dev_count)
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * Reshape: remove disks from existing raid lv
+ *
+ */
+static int _raid_reshape_remove_images(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force,
+ uint32_t old_image_count, uint32_t new_image_count,
+ const unsigned new_stripes, const unsigned new_stripe_size,
+ struct dm_list *allocate_pvs, struct dm_list *removal_lvs)
+{
+ uint32_t active_lvs, current_le_count, reduced_le_count, removed_lvs, s;
+ uint64_t extend_le_count;
+ unsigned devs_health, devs_in_sync;
+ struct lv_segment *seg;
+ struct lvinfo info = { 0 };
+
+PFL();
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+PFL();
+ switch (_reshaped_state(lv, new_image_count, &devs_health, &devs_in_sync)) {
+ case 3:
+ /*
+ * Disk removal reshape step 1:
+ *
+ * we got more disks active than requested via @new_stripes
+ *
+ * -> flag the ones to remove
+ *
+ */
+PFL();
+ if (seg->segtype != new_segtype)
+ log_print_unless_silent("Ignoring layout change on device removing reshape");
+
+ if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
+ log_error("lv_info failed: aborting");
+ return 0;
+ }
+
+ if (!_lv_reshape_get_new_len(lv, old_image_count, new_image_count, &reduced_le_count))
+ return 0;
+
+ reduced_le_count -= seg->reshape_len * _data_rimages_count(seg, new_image_count);
+ current_le_count = lv->le_count - seg->reshape_len * _data_rimages_count(seg, old_image_count);
+ extend_le_count = current_le_count * current_le_count / reduced_le_count;
+PFLA("new_image_count=%u _data_rimages_count(seg, new_image_count)=%u current_le_count=%u", new_image_count, _data_rimages_count(seg, new_image_count), current_le_count);
+ log_warn("WARNING: Removing stripes from active%s logical "
+ "volume %s will shrink it from %s to %s!",
+ info.open_count ? " and open" : "", display_lvname(lv),
+ display_size(lv->vg->cmd, (uint64_t) current_le_count * lv->vg->extent_size),
+ display_size(lv->vg->cmd, (uint64_t) reduced_le_count * lv->vg->extent_size));
+ log_warn("THIS MAY DESTROY (PARTS OF) YOUR DATA!");
+ if (!yes)
+ log_warn("Interrupt the conversion and run \"lvresize -y -l%u %s\" to "
+ "keep the current size if not done already!",
+ (uint32_t) extend_le_count, display_lvname(lv));
+ log_print_unless_silent("If that leaves the logical volume larger than %u extents due to stripe rounding,",
+ reduced_le_count);
+ log_print_unless_silent("you may want to grow the content afterwards (filesystem etc.)");
+ log_warn("WARNING: too remove freed stripes after the conversion has finished, you have to run \"lvconvert --stripes %u %s\"",
+ new_stripes, display_lvname(lv));
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count,
+ seg->data_copies, new_stripes, new_stripe_size))
+ return 0;
+
+ if (!force) {
+ log_warn("WARNING: Can't remove stripes without --force option");
+ return 0;
+ }
+
+ /*
+ * Allocate backward out of place reshape space at the
+ * _end_ of all data image LVs, because MD reshapes backwards
+ * to remove disks from a raid set
+ */
+ if (!_lv_alloc_reshape_space(lv, alloc_end, NULL, allocate_pvs))
+ return 0;
+
+ /* Flag all disks past new images as delta disks minus to kernel */
+ for (s = new_image_count; s < old_image_count; s++)
+ seg_lv(seg, s)->status |= LV_RESHAPE_DELTA_DISKS_MINUS;
+
+ if (seg_is_any_raid5(seg) && new_image_count == 2)
+ seg->data_copies = 2;
+
+ break;
+
+ case 1:
+ /*
+ * Disk removal reshape step 2:
+ *
+ * we got the proper (smaller) amount of devices active
+ * for a previously finished disk removal reshape
+ *
+ * -> remove the freed up images and reduce LV size
+ *
+ */
+PFL();
+ for (active_lvs = removed_lvs = s = 0; s < seg->area_count; s++) {
+ struct logical_volume *slv;
+
+ RETURN_IF_NONZERO(seg_type(seg, s) != AREA_LV ||
+ !(slv = seg_lv(seg, s)), "image sub lv");
+ if (slv->status & LV_RESHAPE_REMOVED)
+ removed_lvs++;
+ else
+ active_lvs++;
+ }
+
+ RETURN_IF_ZERO(devs_in_sync == active_lvs, "correct kernel/lvm active LV count");
+ RETURN_IF_ZERO(active_lvs + removed_lvs == old_image_count, "correct kernel/lvm total LV count");
+
+ /* Reshape removing image component pairs -> change sizes accordingly */
+ if (!_reshape_adjust_to_size(lv, old_image_count, new_image_count)) {
+ log_error("Failed to adjust LV %s to new size!", display_lvname(lv));
+ return 0;
+ }
+
+ log_debug_metadata("Removing %u data and metadata image LV pair%s from %s",
+ old_image_count - new_image_count, old_image_count - new_image_count > 1 ? "s" : "",
+ display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, removal_lvs))
+ return 0;
+
+ break;
+
+ default:
+PFL();
+ log_error(INTERNAL_ERROR "Bad return provided to %s.", __func__);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * adjust @lv size for lv_{extend/reduce} to work.
+ *
+ * Set all segments type (should be just one) to raid0_meta
+ * for resize (for extend, last one would suffice, but...).
+ */
+static int _adjust_raid10_lv_size(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ const struct segment_type *raid0_meta_segtype,
+ const uint32_t data_copies, int to_exposed_size)
+{
+ struct lv_segment *seg, *seg1;
+ const struct segment_type *segtype;
+
+ if (data_copies == 1)
return 1;
- return kernel_devs < dev_count ? 2 : 3;
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(raid0_meta_segtype, "no valid raid0_meta_segtype argument");
+ RETURN_IF_ZERO(data_copies > 1, "valid #data_copies");
+ RETURN_IF_ZERO(to_exposed_size == 0 || to_exposed_size == 1,
+ "valid to_exposed_size argument");
+
+ if (to_exposed_size) {
+ lv->le_count /= data_copies;
+ segtype = new_segtype;
+ seg->data_copies = data_copies;
+ } else {
+ lv->le_count *= data_copies;
+ segtype = raid0_meta_segtype;
+ seg->data_copies = 1;
+ }
+
+ seg->len = lv->le_count;
+ dm_list_iterate_items(seg1, &lv->segments)
+ seg1->segtype = segtype;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * resize striped/raid0/raid10 number of slices in @lv to hold raid10 @new_data_copies >= 2 and <= stripes
+ *
+ * This function could be used to change number of data copies
+ * on raid10_{new,offset} as well, but MD kernel does not
+ * supprt that (yet).
+ */
+static int _lv_raid10_resize_data_copies(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ const uint32_t new_data_copies,
+ struct dm_list *allocate_pvs)
+{
+ uint32_t data_copies, extents_per_data_copy, extents;
+ struct lv_segment *seg;
+ const struct segment_type *raid0_meta_segtype, *segtype;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ /* HM FIXME: accept raid10* once (if ever) MD kernel supports it */
+ RETURN_IF_ZERO(seg_is_striped(seg) || seg_is_any_raid0(seg) || seg_is_raid10_far(seg),
+ "processable segment type");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, new_data_copies - 1);
+ RETURN_IF_ZERO(seg->area_count > 1, "area count > 1");
+ RETURN_IF_NONZERO((seg_is_striped(seg) || seg_is_any_raid0(seg)) && seg->data_copies != 1,
+ "#data_copies == 1 with striped/raid0");
+ RETURN_IF_NONZERO(seg_is_raid10(seg) && seg->data_copies < 2,
+ "#data_copies < 2 with raid10_");
+ RETURN_IF_NONZERO(new_data_copies == seg->data_copies, "change in #data_copies");
+ RETURN_IF_NONZERO(lv->le_count % seg->area_count, "divisibility of LV size by stripes");
+ RETURN_IF_ZERO((raid0_meta_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META)),
+ "raid0_meta segment type found?");
+
+ segtype = seg->segtype;
+ data_copies = seg->data_copies;
+ extents_per_data_copy = lv->le_count; /* Netto extents exposed = size per data copy needed */
+ extents = (new_data_copies > data_copies ? /* Extents to extend/reduce LV */
+ new_data_copies - data_copies :
+ data_copies - new_data_copies) * extents_per_data_copy;
+
+ /* Adjust to total (internal) LV size */
+ if (!_adjust_raid10_lv_size(lv, new_segtype, raid0_meta_segtype, data_copies, 0))
+ return 0;
+
+ log_debug_metadata("%sing %s LV %s before conversion to %s",
+ new_data_copies > seg->data_copies ? "Extend": "Reduc",
+ segtype->name, display_lvname(lv), new_segtype->name);
+
+ if (new_data_copies > data_copies) {
+ if (!lv_extend(lv, raid0_meta_segtype,
+ seg->area_count, seg->stripe_size, 1, 0,
+ extents, allocate_pvs, lv->alloc, 0)) {
+ log_error("Failed to extend %s LV %s before conversion to %s",
+ segtype->name, display_lvname(lv), new_segtype->name);
+ return 0;
+ }
+
+ } else if (!lv_reduce(lv, extents)) {
+ log_error("Failed to reduce %s LV %s", segtype->name, display_lvname(lv));
+ return 0;
+ }
+
+ /* Adjust to externally LV exposed size */
+ return _adjust_raid10_lv_size(lv, new_segtype, raid0_meta_segtype, new_data_copies, 1);
+}
+
+/*
+ * HM Helper:
+ *
+ * Reshape: keep disks in RAID @lv but change stripe size or data copies
+ *
+ */
+static int _raid_reshape_keep_disks(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force,
+ int *force_repair,
+ const int new_data_copies, const unsigned new_stripe_size,
+ struct dm_list *allocate_pvs)
+{
+ int alloc_reshape_space = 1;
+ enum alloc_where where = alloc_anywhere;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(force_repair, "force repair pointer argument");
+ RETURN_IF_ZERO(allocate_pvs || dm_list_empty(allocate_pvs), "PVs to allocate");
+
+PFLA("seg->data_copies=%u new_data_copies=%u", seg->data_copies, new_data_copies);
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0,
+ seg->area_count, seg->data_copies,
+ seg->area_count - seg->segtype->parity_devs, new_stripe_size))
+ return 0;
+PFL();
+ /* Check a request to change the number of data copies in a raid10 LV */
+ if (seg->data_copies != new_data_copies) {
+ if (seg_is_raid10_far(seg)) {
+ /*
+ * Ensure resynchronisation of new data copies
+ * No reshape space needed, because raid10_far uses distinct stripe zones
+ * for its data copies rather than rotating them in individual stripes.
+ */
+ *force_repair = new_data_copies > seg->data_copies;
+ alloc_reshape_space = 0;
+
+ if (!_lv_raid10_resize_data_copies(lv, new_segtype, new_data_copies, allocate_pvs))
+ return 0;
+
+ } else if (seg_is_any_raid10(seg)) {
+ /* Ensure resynchronisation */
+ *force_repair = 1;
+ if (new_data_copies > seg->data_copies) {
+ where = alloc_end;
+ if (!_lv_raid10_resize_data_copies(lv, new_segtype, new_data_copies, allocate_pvs))
+ return 0;
+ } else {
+ where = alloc_begin;
+ seg->data_copies = new_data_copies;
+ }
+ }
+ }
+
+ /*
+ * Reshape layout or chunksize:
+ *
+ * Allocate free out of place reshape space unless raid10_far.
+ *
+ * If other raid10, allocate it appropriatly.
+ *
+ * Allocate it anywhere for raid4/5 to avoid remapping
+ * it in case it is already allocated.
+ *
+ * The dm-raid target is able to use the space whereever it
+ * is found by appropriately selecting forward or backward reshape.
+ */
+ if (alloc_reshape_space &&
+ !_lv_alloc_reshape_space(lv, where, NULL, allocate_pvs))
+ return 0;
+
+ seg->segtype = new_segtype;
+
+ return 1;
+}
+
+/* Helper: callback function to activate any new image component pairs @lv */
+static int _activate_sub_lvs(struct logical_volume *lv, uint32_t start_idx)
+{
+ uint32_t s;
+ struct logical_volume *lv1;
+ struct lv_segment *seg;
+ struct cmd_context *cmd;
+
+ /* seg->area_count may be 0 here! */
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ cmd = lv->vg->cmd;
+
+ log_debug_metadata("Activating %u image component%s of LV %s",
+ seg->area_count - start_idx, seg->meta_areas ? "pairs" : "s",
+ display_lvname(lv));
+ for (s = start_idx; s < seg->area_count; s++) {
+ if (seg_type(seg, s) == AREA_LV &&
+ (lv1 = seg_lv(seg, s)) &&
+ !activate_lv_excl_local(cmd, lv1))
+ return 0;
+ if ((lv1 = _seg_metalv_checked(seg, s)) &&
+ !activate_lv_excl_local(cmd, lv1))
+ return 0;
+ }
+
+ return activate_lv_excl_local(cmd, lv);
+
+ return 1;
+}
+
+/* Helper: callback function to activate any new image component pairs @lv */
+static int _pre_raid_reshape(struct logical_volume *lv, void *data)
+{
+ uint32_t old_image_count;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO((old_image_count = *((uint32_t *) data)), "proper data argument");
+
+ /* Activate any new image component pairs */
+ if (old_image_count < seg->area_count)
+ return _activate_sub_lvs(lv, old_image_count);
+
+ return 1;
}
/*
* Reshape logical volume @lv by adding/removing stripes
* (absolute new stripes given in @new_stripes), changing
- * stripe size set in @new_stripe_size.
- * Any PVs listed in @allocate_pvs will be tried for
- * allocation of new stripes.
+ * layout (e.g. raid5_ls -> raid5_ra) or changing
+ * stripe size to @new_stripe_size.
+ *
+ * In case of disk addition, any PVs listed in
+ * mandatory @allocate_pvs will be used for allocation of
+ * new stripes.
*/
-
static int _raid_reshape(struct logical_volume *lv,
- const struct segment_type *new_segtype,
- int yes, int force,
- const unsigned new_stripes,
- const unsigned new_stripe_size,
- struct dm_list *allocate_pvs)
+ const struct segment_type *new_segtype,
+ int yes, int force,
+ const unsigned new_data_copies,
+ const unsigned new_region_size,
+ const unsigned new_stripes,
+ const unsigned new_stripe_size,
+ struct dm_list *allocate_pvs)
{
- int r;
- int flag_cleared, update_and_reload = 1, too_few = 0;
- uint32_t new_len;
- struct lv_segment *seg = first_seg(lv);
- unsigned old_dev_count = seg->area_count;
- unsigned new_dev_count = new_stripes + seg->segtype->parity_devs;
+ int force_repair = 0, r, too_few = 0;
unsigned devs_health, devs_in_sync;
- struct lvinfo info = { 0 };
+ uint32_t new_image_count, old_image_count;
+ enum alloc_where where;
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
-PFLA("old_dev_count=%u new_dev_count=%u", old_dev_count, new_dev_count);
- if (seg->segtype == new_segtype &&
- old_dev_count == new_dev_count &&
- seg->stripe_size == new_stripe_size) {
- log_error("Nothing to do");
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ if (!seg_is_reshapable_raid(seg))
return 0;
- }
- if (segtype_is_any_raid0(new_segtype) &&
- (old_dev_count != new_dev_count || seg->stripe_size != new_stripe_size)) {
- log_error("Can't reshape raid0");
- log_error("You may want to convert to raid4/5/6 first");
+ RETURN_IF_ZERO(is_same_level(seg->segtype, new_segtype), "reshape request");
+ RETURN_IF_NONZERO(!seg_is_reshapable_raid(seg) && !seg_is_raid1(seg), "reshapable/processable segment");
+ RETURN_IF_ZERO((old_image_count = seg->area_count), "old device count calculated");
+ RETURN_IF_NONZERO((new_image_count = new_stripes + seg->segtype->parity_devs) < 2 && !seg_is_raid1(seg),
+ "raid set with less than parity devices");
+ RETURN_IF_ZERO(allocate_pvs , "allocate pvs list argument");
+
+ if (!_raid_in_sync(lv)) {
+ log_error("Unable to convert %s while it is not in-sync",
+ display_lvname(lv));
return 0;
}
+PFLA("old_image_count=%u new_image_count=%u new_region_size=%u", old_image_count, new_image_count, new_region_size);
+ dm_list_init(&removal_lvs);
+
+ if (seg->segtype == new_segtype &&
+ seg->data_copies == new_data_copies &&
+ seg->region_size == new_region_size &&
+ old_image_count == new_image_count &&
+ seg->stripe_size == new_stripe_size) {
+ /*
+ * No change in segment type, image count, region or stripe size has been requested ->
+ * user requests this to remove any reshape space from the @lv
+ */
+ if (!_lv_free_reshape_space_with_status(lv, &where)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (where == alloc_none) {
+ log_print_unless_silent("LV %s does not have reshape space allocated",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /*
+ * Only in case reshape space was freed at the beginning,
+ * which is indicated by "where == alloc_begin",
+ * tell kernel to adjust data_offsets on raid devices to 0
+ *
+ * Special value '1' for seg->data_offset will be
+ * changed to 0 when emitting the segment line
+ */
+ log_print_unless_silent("No change in RAID LV %s layout, freeing reshape space", display_lvname(lv));
+ if (where == alloc_begin)
+ seg->data_offset = 1;
+
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, NULL, NULL))
+ return_0;
+
+ log_print_unless_silent("%s", display_lvname(lv));
+ return 1;
+ }
+
/* raid4/5 with N image component pairs (i.e. N-1 stripes): allow for raid4/5 reshape to 2 devices, i.e. raid1 layout */
if (seg_is_raid4(seg) || seg_is_any_raid5(seg)) {
if (new_stripes < 1)
too_few = 1;
- /* raid6 device count: check for 2 stripes minimum */
+ /* raid6 (raid10 can't shrink reshape) device count: check for 2 stripes minimum */
} else if (new_stripes < 2)
too_few = 1;
@@ -3158,31 +4903,31 @@ PFLA("old_dev_count=%u new_dev_count=%u", old_dev_count, new_dev_count);
return 0;
}
- seg->stripe_size = new_stripe_size;
- switch ((r = _reshaped_state(lv, old_dev_count, &devs_health, &devs_in_sync))) {
+ switch ((r = _reshaped_state(lv, old_image_count, &devs_health, &devs_in_sync))) {
case 1:
/*
- * old_dev_count == kernel_dev_count
+ * old_image_count == kernel_dev_count
*
* Check for device health
*/
if (devs_in_sync < devs_health) {
- log_error("Can't reshape out of sync LV %s/%s", lv->vg->name, lv->name);
+ log_error("Can't reshape out of sync LV %s", display_lvname(lv));
return 0;
}
-PFL()
- /* device count and health are good -> ready to add disks */
+PFL();
+ /* device count and health are good -> ready to go */
break;
case 2:
- if (devs_in_sync == new_dev_count)
+PFLA("devs_in_sync=%u old_image_count=%u new_image_count=%u", devs_in_sync,old_image_count, new_image_count);
+ if (devs_in_sync == new_image_count)
break;
/* Possible after a shrinking reshape and forgotten device removal */
log_error("Device count is incorrect. "
- "Forgotten \"lvconvert --stripes %d %s/%s\" to remove %u images after reshape?",
- devs_in_sync - seg->segtype->parity_devs, lv->vg->name, lv->name,
- old_dev_count - devs_in_sync);
+ "Forgotten \"lvconvert --stripes %d %s\" to remove %u images after reshape?",
+ devs_in_sync - seg->segtype->parity_devs, display_lvname(lv),
+ old_image_count - devs_in_sync);
return 0;
default:
@@ -3191,289 +4936,220 @@ PFL()
}
/* Handle disk addition reshaping */
- if (old_dev_count < new_dev_count) {
+ if (old_image_count < new_image_count) {
PFL();
- /* Conversion to raid1 */
- if (old_dev_count == 2)
- new_segtype = seg->segtype;
-
- if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
- log_error("lv_info failed: aborting");
- return 0;
- }
-
- new_len = _data_rimages_count(seg, new_dev_count) * (seg->len / _data_rimages_count(seg, seg->area_count));
- log_warn("WARNING: Adding stripes to active%s logical volume %s/%s will grow "
- "it from %u to %u extents!\n"
- "You may want to run \"lvresize -l%u %s/%s\" to shrink it after\n"
- "the conversion has finished or make use of the gained capacity",
- info.open_count ? " and open" : "",
- lv->vg->name, lv->name, seg->len, new_len,
- new_len, lv->vg->name, lv->name);
- if (!yes && yes_no_prompt("WARNING: Do you really want to add %u stripes to %s/%s extending it? [y/n]: ",
- new_dev_count - old_dev_count, lv->vg->name, lv->name) == 'n') {
- log_error("Logical volume %s/%s NOT converted to extend", lv->vg->name, lv->name);
+ if (!_raid_reshape_add_images(lv, new_segtype, yes, force,
+ old_image_count, new_image_count,
+ new_stripes, new_stripe_size, allocate_pvs))
return 0;
- }
- if (sigint_caught())
- return_0;
- /*
- * Allocate free forward out of place reshape space at the beginning of all data image LVs
- */
- if (!_lv_alloc_reshape_space(lv, alloc_begin, allocate_pvs))
- return 0;
-
- if (!_lv_raid_change_image_count(lv, new_segtype, new_dev_count, allocate_pvs))
+ /* Handle disk removal reshaping */
+ } else if (old_image_count > new_image_count) {
+ if (!_raid_reshape_remove_images(lv, new_segtype, yes, force,
+ old_image_count, new_image_count,
+ new_stripes, new_stripe_size,
+ allocate_pvs, &removal_lvs))
return 0;
- update_and_reload = 0;
-
- if (seg->segtype != new_segtype)
- log_warn("Ignoring layout change on device adding reshape");
-
/*
- * HM FIXME: I don't like the flow doing this here and in _raid_add_images on addition
+ * Handle raid set layout reshaping
+ * (e.g. raid5_ls -> raid5_n or stripe size change or change #data_copies on raid10)
*/
+ } else if (!_raid_reshape_keep_disks(lv, new_segtype, yes, force, &force_repair,
+ new_data_copies, new_stripe_size, allocate_pvs))
+ return 0;
- /* Handle disk removal reshaping */
- } else if (old_dev_count > new_dev_count) {
- uint32_t s;
-
- switch (_reshaped_state(lv, new_dev_count, &devs_health, &devs_in_sync)) {
- case 3:
- /*
- * Disk removal reshape step 1:
- *
- * we got more disks active than requested via @new_stripes
- *
- * -> flag the ones to remove
- *
- */
PFL();
- if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
- log_error("lv_info failed: aborting");
- return 0;
- }
- new_len = _data_rimages_count(seg, new_dev_count) *
- (seg->len / _data_rimages_count(seg, seg->area_count));
-PFLA("new_dev_count=%u _data_rimages_count(seg, new_dev_count)=%u new_len=%u", new_dev_count, _data_rimages_count(seg, new_dev_count), new_len);
- log_warn("WARNING: Removing stripes from active%s logical volume %s/%s will shrink "
- "it from %u to %u extents!\n"
- "THIS MAY DESTROY (PARTS OF) YOUR DATA!\n"
- "You may want to run \"lvresize -y -l%u %s/%s\" _before_ the conversion starts!\n"
- "If that leaves the logical volume larger than %u extents, grow the filesystem etc. as well",
- info.open_count ? " and open" : "",
- lv->vg->name, lv->name, seg->len, new_len,
- seg->len * _data_rimages_count(seg, seg->area_count) / _data_rimages_count(seg, new_dev_count),
- lv->vg->name, lv->name, new_len);
-
- if (!yes && yes_no_prompt("Do you really want to remove %u stripes from %s/%s? [y/n]: ",
- old_dev_count - new_dev_count, lv->vg->name, lv->name) == 'n') {
- log_error("Logical volume %s/%s NOT converted to reduce", lv->vg->name, lv->name);
- return 0;
- }
- if (sigint_caught())
- return_0;
-
- if (!force) {
- log_warn("WARNING: Can't remove stripes without --force option");
- return 0;
- }
-
- /*
- * Allocate free backward out of place reshape space at the end of all data image LVs
- */
- if (!_lv_alloc_reshape_space(lv, alloc_end, allocate_pvs))
- return 0;
-
- for (s = new_dev_count; s < old_dev_count; s++)
- seg_lv(seg, s)->status |= LV_RESHAPE_DELTA_DISKS_MINUS;
+ seg->stripe_size = new_stripe_size;
- update_and_reload = 1;
+ /* HM FIXME: workaround for not resetting "nosync" flag */
+ init_mirror_in_sync(0);
+PFLA("new_segtype=%s seg->area_count=%u", new_segtype->name, seg->area_count);
- if (seg->segtype != new_segtype)
- log_warn("Ignoring layout change on device removing reshape");
+ /* _pre_raid_reshape to acivate any added image component pairs to avoid unsafe table loads */
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs,
+ _pre_raid_reshape, &old_image_count, NULL))
+ return 0;
- break;
+ return force_repair ? _lv_cond_repair(lv) : 1;
+}
- case 1:
- /*
- * Disk removal reshape step 2:
- *
- * we got the proper (smaller) amount of devices active
- * for a previously finished disk removal reshape
- *
- * -> remove the freed up images
- *
- */
-PFL();
- if (!_lv_raid_change_image_count(lv, new_segtype, new_dev_count, allocate_pvs))
- return 0;
+/*
+ * Check for reshape request defined by:
+ *
+ * - raid type is reshape capable
+ * - no raid level change
+ * - # of stripes requested to change
+ * (i.e. add/remove disks from a striped raid set)
+ * -or-
+ * - stripe size change requestd
+ * (e.g. 32K -> 128K)
+ *
+ * Returns:
+ *
+ * 0 -> no reshape request
+ * 1 -> allowed reshape request
+ * 2 -> prohibited reshape request
+ * 3 -> allowed region size change request
+ */
+static int _reshape_requested(const struct logical_volume *lv, const struct segment_type *segtype,
+ const int data_copies, const uint32_t region_size,
+ const uint32_t stripes, const uint32_t stripe_size)
+{
+ struct lv_segment *seg;
- if (!vg_write(lv->vg) || !vg_commit(lv->vg)) {
- log_error("Failed to write reshaped %s/%s", lv->vg->name, lv->name);
- return 0;
- }
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), segtype);
- backup(lv->vg);
+ /* No layout change -> allow for removal of reshape space */
+ if (seg->segtype == segtype &&
+ data_copies == seg->data_copies &&
+ region_size == seg->region_size &&
+ stripes == _data_rimages_count(seg, seg->area_count) &&
+ stripe_size == seg->stripe_size)
+ return 1;
- update_and_reload = 0;
- break;
+ /* Ensure region size is >= stripe size */
+ if (!seg_is_striped(seg) &&
+ !seg_is_any_raid0(seg) &&
+ (region_size || stripe_size) &&
+ ((region_size ?: seg->region_size) < (stripe_size ?: seg->stripe_size))) {
+ log_error("region size may not be smaller than stripe size on LV %s",
+ display_lvname(lv));
+ return 2;
+ }
- default:
+PFLA("data_copies=%u seg->data_copies=%u stripes=%u seg->area_count=%u", data_copies, seg->data_copies, stripes, seg->area_count);
+ if ((_lv_is_duplicating(lv) || lv_is_duplicated(lv)) &&
+ ((seg_is_raid1(seg) ? 0 : (stripes != _data_rimages_count(seg, seg->area_count))) ||
+ data_copies != seg->data_copies))
+ goto err;
PFL();
- log_error(INTERNAL_ERROR "Bad return provided to %s.", __func__);
- return 0;
+ if ((!seg_is_striped(seg) && segtype_is_raid10_far(segtype)) ||
+ (seg_is_raid10_far(seg) && !segtype_is_striped(segtype))) {
+ if (data_copies == seg->data_copies &&
+ region_size == seg->region_size) {
+ log_error("Can't convert raid10_far");
+ goto err;
}
-
- /* Handle raid set layout reshaping (e.g. raid5_ls -> raid5_n) */
- } else {
- /*
- * Allocate free data image LVs space for out-of-place reshape anywhere
- */
- if (!_lv_alloc_reshape_space(lv, alloc_anywhere, allocate_pvs))
- return 0;
-
- seg->segtype = new_segtype;
- update_and_reload = 1;
}
-PFLA("new_segtype=%s seg->area_count=%u", new_segtype->name, seg->area_count);
+ if (seg_is_raid10_far(seg)) {
+ if (stripes != _data_rimages_count(seg, seg->area_count)) {
+ log_error("Can't change stripes in raid10_far");
+ goto err;
+ }
- if (update_and_reload) {
- if (!lv_update_and_reload_origin(lv))
- return_0;
+ if (stripe_size != seg->stripe_size) {
+ log_error("Can't change stripe size in raid10_far");
+ goto err;
+ }
+ }
PFL();
- /* HM FIXME: i don't like the flow doing this here and in _raid_add_images on addition of component images */
- /*
- * Now that the 'RESHAPE_DELTA_DISKS_MINUS' has made its way to
- * the kernel, we must remove the flag so that the individual devices
- * are not reshaped upon every activation.
- */
- if (!_reset_flags_passed_to_kernel(lv, &flag_cleared))
- return_0;
-
- if (flag_cleared &&
- !lv_update_and_reload_origin(lv))
- return_0;
+ /* region_size may change on any raid LV but raid0 including raid10_far */
+ if (region_size &&
+ region_size != seg->region_size &&
+ segtype == seg->segtype) {
+ int may = 1;
+
+ if (seg_is_raid10_far(seg) &&
+ (stripes != _data_rimages_count(seg, seg->area_count) ||
+ stripe_size != seg->stripe_size))
+ may = 0;
+PFLA("may=%d", may);
+ return may ? 3 : 2;
+ }
PFL();
+ if (seg_is_any_raid10(seg) && seg->area_count > 2 &&
+ stripes && stripes < seg->area_count - seg->segtype->parity_devs) {
+ log_error("Can't remove stripes from raid10");
+ goto err;
}
-
- return 1;
-}
-
-/* Process one level up takeover on @lv to @segtype allocating fron @allocate_pvs */
-static int _raid_takeover(struct logical_volume *lv,
- int up,
- const struct segment_type *segtype,
- struct dm_list *allocate_pvs,
- const char *error_msg)
-{
- struct lv_segment *seg = first_seg(lv);
- uint32_t new_count = seg->area_count + segtype->parity_devs - seg->segtype->parity_devs;
-
- /* Make sure to set default region size on takeover from raid0 */
- _check_and_init_region_size(lv);
-
-PFLA("segtype=%s old_count=%u new_count=%u", segtype->name, seg->area_count, new_count);
- /* Takeover raid4 <-> raid5_n */
- if (new_count == seg->area_count) {
PFL();
- if (seg->area_count == 2 ||
- ((segtype_is_raid5_n(seg->segtype) && segtype_is_raid4(segtype)) ||
- (segtype_is_raid4(seg->segtype) && segtype_is_raid5_n(segtype)))) {
- seg->segtype = segtype;
- return lv_update_and_reload(lv);
- }
-
+ /* This segment type is not reshapable */
+ if (!seg_is_reshapable_raid(seg))
return 0;
+PFL();
+ /* Switching raid levels is a takeover, no reshape */
+ if (!is_same_level(seg->segtype, segtype))
+ return 0;
+PFL();
+ if ((seg_is_raid10_near(seg) || seg_is_raid10_offset(seg)) &&
+ data_copies != seg->data_copies) {
+ log_error("Can't change number of data copies on %s LV %s",
+ lvseg_name(seg), display_lvname(lv));
+ goto err;
}
- if (seg_is_any_raid5(seg) && segtype_is_raid1(segtype) && seg->area_count != 2) {
- uint32_t remove_count = seg->area_count - 2;
+ /* raid10_{near,offset} case */
+ if ((seg_is_raid10_near(seg) && segtype_is_raid10_offset(segtype)) ||
+ (seg_is_raid10_offset(seg) && segtype_is_raid10_near(segtype))) {
+ if (stripes >= seg->area_count)
+ return 1;
- log_error("Device count is incorrect. "
- "Forgotten \"lvconvert --stripes 1 %s/%s\" to remove %u image%s after reshape?",
- lv->vg->name, lv->name, remove_count, remove_count > 1 ? "s" : "");
- return 0;
+ goto err;
}
-
+PFL();
/*
- * Takeover of raid sets with 2 image component pairs (2-legged):
- *
- * - in case of raid1 -> raid5, takeover will run a degraded 2 disk raid5 set with the same content
- * in each leg (i.e. a redundant raid1 mapping) which will get an additional disk allocated afterwards
- * and reloaded starting reshaping to reach the raid4/5 layout.
- *
- * - in case of raid4/raid5_n -> all set, just reload
- *
- * - in case of raid1 -> raid0, remove the second leg and conditionally the meta device
- * of the first leg if raid0 requested and reload
+ * raid10_far is not reshapable in MD at all;
+ * lvm/dm adds reshape capability to add/remove data_copies
*/
- if (!seg_is_any_raid0(seg) && seg->area_count == 2) {
- int valid_conversion = 0;
+ if (seg_is_raid10_far(seg) && segtype_is_raid10_far(segtype)) {
+ if (stripes && stripes == seg->area_count &&
+ data_copies > 1 &&
+ data_copies <= seg->area_count &&
+ data_copies != seg->data_copies)
+ return 1;
- /* Current segtype is raid1 */
- if (seg_is_raid1(seg)) {
- if (segtype_is_any_raid0(segtype))
- valid_conversion = 1;
+ goto err;
- if (segtype_is_raid4(segtype) || segtype_is_any_raid5(segtype))
- valid_conversion = 1;
- }
+ } else if (seg_is_any_raid10(seg) && segtype_is_any_raid10(segtype) &&
+ data_copies > 1 && data_copies != seg->data_copies)
+ goto err;
+PFL();
+ /* raid10_{near,offset} can't reshape removing devices, just add some */
+ if (seg_is_any_raid10(seg) &&
+ seg->segtype == segtype) {
+ if (stripes &&
+ (stripes < seg->area_count || stripes < seg->data_copies)) {
+ log_error("Can't reshape %s LV %s removing devices.",
+ lvseg_name(seg), display_lvname(lv));
+ goto err;
- /* Current segtype is raid4 or any raid5 */
- if (seg_is_raid4(seg) || seg_is_any_raid5(seg))
- if (segtype_is_any_raid0(segtype) || segtype_is_raid1(segtype))
- valid_conversion = 1;
+ } else
+ return 1;
+ }
- if (!valid_conversion) {
- log_error(error_msg, lv->vg->name, lv->name);
- return 0;
- }
PFL();
- /* raid1 does not preset stripe size */
- if (!seg->stripe_size &&
- !(seg->stripe_size = find_config_tree_int(lv->vg->cmd, global_raid_stripe_size_default_CFG, NULL)))
- return 0;
-
+ /* Change layout (e.g. raid5_ls -> raid5_ra) keeping # of stripes */
+ if (seg->segtype != segtype) {
+PFL();
+ if (stripes && stripes != _data_rimages_count(seg, seg->area_count))
+ goto err;
PFL();
- if (segtype_is_any_raid0(segtype))
- new_count--;
+ if (stripe_size && stripe_size != seg->stripe_size)
+ goto err;
- else {
- seg->segtype = segtype;
- // return lv_update_and_reload_origin(lv);
- return resume_lv(lv->vg->cmd, lv_lock_holder(lv));
- }
+PFL();
+ return 1;
}
+PFL();
+ if (stripes && stripes == _data_rimages_count(seg, seg->area_count)) {
+ log_error("LV %s already has %u stripes.",
+ display_lvname(lv), stripes);
+ return 2;
+ }
PFL();
- /*
- * The top-level LV is being reloaded and the VG
- * written and committed in the course of this call
- */
- return _lv_raid_change_image_count(lv, segtype, new_count, allocate_pvs);
-}
+ return (region_size || stripes || stripe_size) ? 1 : 0;
-/* Process one level up takeover on @lv to @segtype */
-static int _raid_level_up(struct logical_volume *lv,
- const struct segment_type *segtype,
- struct dm_list *allocate_pvs)
-{
- return _raid_takeover(lv, 1, segtype, allocate_pvs,
- "raid1 set %s/%s has to have 2 operational disks.");
-}
+err:
+ if (lv_is_duplicated(lv))
+ log_error("Conversion of duplicating sub LV %s rejected", display_lvname(lv));
+ else
+ log_error("Use \"lvconvert --duplicate --type %s ... %s", segtype->name, display_lvname(lv));
-/* Process one level down takeover on @lv to @segtype */
-static int _raid_level_down(struct logical_volume *lv,
- const struct segment_type *segtype,
- struct dm_list *allocate_pvs)
-{
- return _raid_takeover(lv, 0, segtype, allocate_pvs,
- "raid1/4/5 set %s/%s has to have 1 mirror/stripe. Use \"lvconvert --stripes 1 ...\"");
+ return 2;
}
/*
@@ -3482,26 +5158,26 @@ static int _raid_level_down(struct logical_volume *lv,
* TAKEOVER: copes with all raid level switches aka takeover of @lv
*
* Overwrites the users "--type level_algorithm" request (e.g. --type raid6_zr)
- * with the appropriate, constrained one to allow for takeover.
+ * with the appropriate, constrained one to allow for takeover (e.g. raid6_n_6).
*
* raid0 can take over:
- * raid4 - if all data disks are active.
- * raid5 - providing it is Raid4 layout and one disk is faulty
- * raid10 - assuming we have all necessary active disks
- * raid1 - with (N -1) mirror drives faulty
+ * raid4
+ * raid5
+ * raid10_{near,far} - assuming we have all necessary active disks
+ * raid1
*
* raid1 can take over:
* raid5 with 2 devices, any layout or chunk size
*
- * raid10 can take over:
- * raid0 - providing it has only two drives
+ * raid10_{near,far} can take over:
+ * raid0 - with any number of drives
*
* raid4 can take over:
- * raid0 - if there is only one strip zone
- * raid5 - if layout is right
+ * raid0 - if there is only one stripe zone
+ * raid5 - if layout is right (parity on last disk)
*
* raid5 can take over:
- * raid0 - if there is only one strip zone - make it a raid4 layout
+ * raid0 - if there is only one stripe zone - make it a raid4 layout
* raid1 - if there are two drives. We need to know the chunk size
* raid4 - trivial - just use a raid4 layout.
* raid6 - Providing it is a *_6 layout
@@ -3509,845 +5185,4514 @@ static int _raid_level_down(struct logical_volume *lv,
* raid6 currently can only take over a (raid4/)raid5. We map the
* personality to an equivalent raid6 personality
* with the Q block at the end.
+ *
+ *
+ * DUPLICATE:
+ *
+ * restrictions on --mirrors/--stripes/--stripesize are checked
*/
+#define ALLOW_NONE 0x0
+#define ALLOW_DATA_COPIES 0x1
+#define ALLOW_STRIPES 0x2
+#define ALLOW_STRIPE_SIZE 0x4
+#define ALLOW_REGION_SIZE 0x8
+
+struct possible_takeover_reshape_type {
+ /* First 2 have to stay... */
+ const uint64_t possible_types;
+ const uint32_t options;
+ const uint64_t current_types;
+ const uint32_t current_areas;
+};
+
+struct possible_duplicate_type {
+ /* First 2 have to stay... */
+ const uint64_t possible_types;
+ const uint32_t options;
+ const uint32_t new_areas;
+};
+
struct possible_type {
- const uint64_t current_type;
+ /* ..to be handed back via this struct */
const uint64_t possible_types;
+ const uint32_t options;
};
+
+static struct possible_takeover_reshape_type _possible_takeover_reshape_types[] = {
+ /* striped -> */
+ { .current_types = SEG_AREAS_STRIPED, /* linear, i.e. seg->area_count = 1 */
+ .possible_types = SEG_RAID1,
+ .current_areas = 1,
+ .options = ALLOW_REGION_SIZE|ALLOW_DATA_COPIES },
+ { .current_types = SEG_AREAS_STRIPED, /* linear, i.e. seg->area_count = 1 */
+ .possible_types = SEG_RAID0|SEG_RAID0_META,
+ .current_areas = 1,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_AREAS_STRIPED, /* linear, i.e. seg->area_count = 1 */
+ .possible_types = SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ .current_areas = 1,
+ .options = ALLOW_REGION_SIZE },
+ { .current_types = SEG_AREAS_STRIPED, /* linear, i.e. seg->area_count = 1 */
+ .possible_types = SEG_RAID10_NEAR,
+ .current_areas = 1,
+ .options = ALLOW_REGION_SIZE },
+ { .current_types = SEG_AREAS_STRIPED,
+ .possible_types = SEG_RAID10_NEAR|SEG_RAID10_FAR,
+ .current_areas = ~0,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+ { .current_types = SEG_AREAS_STRIPED, /* striped, i.e. seg->area_count > 1 */
+ .possible_types = SEG_RAID01,
+ .current_areas = ~0U,
+ .options = ALLOW_REGION_SIZE|ALLOW_DATA_COPIES },
+ { .current_types = SEG_AREAS_STRIPED, /* striped, i.e. seg->area_count > 1 */
+ .possible_types = SEG_RAID0|SEG_RAID0_META,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_AREAS_STRIPED, /* striped, i.e. seg->area_count > 1 */
+ .possible_types = SEG_RAID4|SEG_RAID5_N|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_REGION_SIZE },
+
+ /* raid0* -> */
+ { .current_types = SEG_RAID0|SEG_RAID0_META, /* seg->area_count = 1 */
+ .possible_types = SEG_RAID1,
+ .current_areas = 1,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+ { .current_types = SEG_RAID0|SEG_RAID0_META, /* seg->area_count > 1 */
+ .possible_types = SEG_RAID10_NEAR|SEG_RAID10_FAR|SEG_RAID01,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+ { .current_types = SEG_RAID0|SEG_RAID0_META, /* seg->area_count > 1 */
+ .possible_types = SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_REGION_SIZE },
+ { .current_types = SEG_RAID0|SEG_RAID0_META, /* raid0 striped, i.e. seg->area_count > 0 */
+ .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid1 -> */
+ { .current_types = SEG_RAID1,
+ .possible_types = SEG_RAID1|SEG_MIRROR,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+ { .current_types = SEG_RAID1, /* Only if seg->area_count = 2 */
+ .possible_types = SEG_RAID10_NEAR|SEG_RAID4| \
+ SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ .current_areas = 2,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID1, /* seg->area_count != 2 */
+ .possible_types = SEG_RAID10_NEAR,
+ .current_areas = ~0U,
+ .options = ALLOW_REGION_SIZE },
+ { .current_types = SEG_RAID1, /* seg->area_count != 2 allowing for -m0 */
+ .possible_types = SEG_AREAS_STRIPED,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES },
+
+ /* mirror -> raid1 */
+ { .current_types = SEG_MIRROR,
+ .possible_types = SEG_MIRROR|SEG_RAID1,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES },
+
+ /* raid4 */
+ { .current_types = SEG_RAID4,
+ .possible_types = SEG_RAID1,
+ .current_areas = 2,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID4,
+ .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META|SEG_RAID5_N|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid5 -> */
+ { .current_types = SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ .possible_types = SEG_RAID1,
+ .current_areas = 2,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ .possible_types = SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPES },
+
+ { .current_types = SEG_RAID5_LS,
+ .possible_types = SEG_RAID6_LS_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID5_LS,
+ .possible_types = SEG_RAID5_LS|SEG_RAID5_N|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID5_RS,
+ .possible_types = SEG_RAID6_RS_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID5_RS,
+ .possible_types = SEG_RAID5_RS|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID5_LA,
+ .possible_types = SEG_RAID6_LA_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID5_LA,
+ .possible_types = SEG_RAID5_LA|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID5_RA,
+ .possible_types = SEG_RAID6_RA_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+ { .current_types = SEG_RAID5_RA,
+ .possible_types = SEG_RAID5_RA|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID5_N,
+ .possible_types = SEG_RAID5_N|SEG_RAID4| \
+ SEG_RAID5_LA|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID5_N,
+ .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid6 -> */
+ { .current_types = SEG_RAID6_ZR,
+ .possible_types = SEG_RAID6_ZR|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_NC,
+ .possible_types = SEG_RAID6_NC|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_NR,
+ .possible_types = SEG_RAID6_NR|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_ZR|SEG_RAID6_N_6,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_LS_6,
+ .possible_types = SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6|SEG_RAID5_LS,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_RS_6,
+ .possible_types = SEG_RAID6_RS_6|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6|SEG_RAID5_RS,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_LA_6,
+ .possible_types = SEG_RAID6_LA_6|SEG_RAID6_LS_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6|SEG_RAID5_LA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_RA_6,
+ .possible_types = SEG_RAID6_RA_6|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6|SEG_RAID5_RA,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_N_6,
+ .possible_types = SEG_RAID6_N_6|SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID6_ZR,
+ .current_areas = ~0U,
+ .options = ALLOW_STRIPE_SIZE },
+ { .current_types = SEG_RAID6_N_6,
+ .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META|SEG_RAID4|SEG_RAID5_N,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid10_near <-> raid10_near*/
+ { .current_types = SEG_RAID10_NEAR,
+ .possible_types = SEG_RAID10_NEAR,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES|ALLOW_STRIPES },
+
+ /* raid10_far <-> raid10_far */
+ { .current_types = SEG_RAID10_FAR,
+ .possible_types = SEG_RAID10_FAR,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES },
+
+ /* raid10 -> striped/raid0 */
+ { .current_types = SEG_RAID10_NEAR|SEG_RAID10_FAR,
+ .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid10 (2 legs) -> raid1 */
+ { .current_types = SEG_RAID10_NEAR,
+ .possible_types = SEG_RAID1,
+ .current_areas = 2,
+ .options = ALLOW_REGION_SIZE },
+
+ /* raid01 -> striped */
+ { .current_types = SEG_RAID01,
+ .possible_types = SEG_AREAS_STRIPED,
+ .current_areas = ~0U,
+ .options = ALLOW_NONE },
+
+ /* raid01 -> raid01 */
+ { .current_types = SEG_RAID01,
+ .possible_types = SEG_RAID01,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES },
+
+ /* raid01 -> raid10 */
+ { .current_types = SEG_RAID01,
+ .possible_types = SEG_RAID10,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+
+ /* raid10 -> raid01 */
+ { .current_types = SEG_RAID10_NEAR,
+ .possible_types = SEG_RAID01,
+ .current_areas = ~0U,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE },
+
+ /* END */
+ { .current_types = 0 }
+};
+
+static struct possible_duplicate_type _possible_duplicate_types[] = {
+ { .possible_types = SEG_RAID1|SEG_MIRROR,
+ .options = ALLOW_DATA_COPIES|ALLOW_REGION_SIZE,
+ .new_areas = ~0U },
+ { .possible_types = SEG_THIN_VOLUME,
+ .options = ALLOW_NONE,
+ .new_areas = ~0 },
+ { .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META,
+ .options = ALLOW_NONE,
+ .new_areas = 1 },
+ { .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META,
+ .options = ALLOW_STRIPES|ALLOW_STRIPE_SIZE,
+ .new_areas = ~0U },
+ { .possible_types = SEG_RAID01|SEG_RAID10_NEAR|SEG_RAID10_FAR|SEG_RAID10_OFFSET,
+ .options = ALLOW_DATA_COPIES|ALLOW_STRIPES|ALLOW_STRIPE_SIZE|ALLOW_REGION_SIZE,
+ .new_areas = ~0U },
+ { .possible_types = SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N| \
+ SEG_RAID6_ZR|SEG_RAID6_NC|SEG_RAID6_NR| \
+ SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6|SEG_RAID6_N_6,
+ .options = ALLOW_STRIPES|ALLOW_STRIPE_SIZE|ALLOW_REGION_SIZE,
+ .new_areas = ~0U },
+
+ /* END */
+ { .possible_types = 0 }
+};
+
/*
- * Check, if @new_segtype is possible to convert to for given segment type of @lv
+ * Return possible_type struct for current type in @seg
*
* HM FIXME: complete?
*/
-static int _is_possible_segtype(struct logical_volume *lv,
- const struct segment_type *new_segtype)
-{
- unsigned cn;
- const struct lv_segment *seg = first_seg(lv);
- struct possible_type pt[] = {
- { .current_type = SEG_AREAS_STRIPED, /* linear, i.e. seg->area_count = 1 */
- .possible_types = SEG_RAID1|SEG_RAID10|SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA| \
- SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N },
- { .current_type = SEG_AREAS_STRIPED, /* striped, i.e. seg->area_count > 1 */
- .possible_types = SEG_RAID0|SEG_RAID0_META|SEG_RAID10|SEG_RAID4|SEG_RAID5_N|SEG_RAID6_N_6 },
- { .current_type = SEG_RAID0|SEG_RAID0_META,
- .possible_types = SEG_RAID1|SEG_RAID10|SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA| \
- SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N },
- { .current_type = SEG_RAID0|SEG_RAID0_META,
- .possible_types = SEG_AREAS_STRIPED|SEG_RAID10|SEG_RAID4|SEG_RAID5_N|SEG_RAID6_N_6},
- { .current_type = SEG_RAID1,
- .possible_types = SEG_RAID10|SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N },
- { .current_type = SEG_RAID4,
- .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META|SEG_RAID5_N|SEG_RAID6_N_6 },
- { .current_type = SEG_RAID5,
- .possible_types = SEG_RAID1|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_LA|SEG_RAID5_RA|SEG_RAID6_LS_6 },
- { .current_type = SEG_RAID5_LS,
- .possible_types = SEG_RAID1|SEG_RAID5|SEG_RAID5_N|SEG_RAID5_RS|SEG_RAID5_LA|SEG_RAID5_RA|SEG_RAID6_LS_6 },
- { .current_type = SEG_RAID5_RS,
- .possible_types = SEG_RAID1|SEG_RAID5|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_LA| SEG_RAID5_RA|SEG_RAID6_RS_6 },
- { .current_type = SEG_RAID5_LA,
- .possible_types = SEG_RAID1|SEG_RAID5|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID6_LA_6 },
- { .current_type = SEG_RAID5_RA,
- .possible_types = SEG_RAID1|SEG_RAID5|SEG_RAID5_N|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_LA|SEG_RAID6_RA_6 },
- { .current_type = SEG_RAID5_N,
- .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META|SEG_RAID1|SEG_RAID4| \
- SEG_RAID5|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_LA|SEG_RAID5_RA|SEG_RAID6_N_6 },
- { .current_type = SEG_RAID6_ZR,
- .possible_types = SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_N_6 },
- { .current_type = SEG_RAID6_NC,
- .possible_types = SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_N_6,},
- { .current_type = SEG_RAID6_NR,
- .possible_types = SEG_RAID6_ZR|SEG_RAID6_NC|SEG_RAID6_N_6 },
- { .current_type = SEG_RAID6_N_6,
- .possible_types = SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
- SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC| \
- SEG_RAID5_N|SEG_RAID0|SEG_RAID0_META|SEG_RAID4|SEG_AREAS_STRIPED },
- { .current_type = SEG_RAID6_LS_6,
- .possible_types = SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_LA_6| \
- SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID5_LS },
- { .current_type = SEG_RAID6_RS_6,
- .possible_types = SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RA_6| \
- SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID5_RS },
- { .current_type = SEG_RAID6_LA_6,
- .possible_types = SEG_RAID6_LS_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
- SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID5_LA },
- { .current_type = SEG_RAID6_RA_6,
- .possible_types = SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6| \
- SEG_RAID6_ZR|SEG_RAID6_NR|SEG_RAID6_NC|SEG_RAID5_RA },
- { .current_type = SEG_RAID10,
- .possible_types = SEG_AREAS_STRIPED|SEG_RAID0|SEG_RAID0_META }
- };
+static struct possible_takeover_reshape_type *__get_possible_takeover_reshape_type(const struct lv_segment *seg_from,
+ const struct segment_type *segtype_to,
+ struct possible_type *last_pt)
+{
+ struct possible_takeover_reshape_type *lpt = (struct possible_takeover_reshape_type *) last_pt;
+ struct possible_takeover_reshape_type *pt = lpt ? lpt + 1 : _possible_takeover_reshape_types;
+
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+
+PFLA("seg_from=%s segtype_to=%s", lvseg_name(seg_from), segtype_to ? segtype_to->name : "NIL");
+
+ for ( ; pt->current_types; pt++) {
+ if ((seg_from->segtype->flags & pt->current_types) &&
+ (segtype_to ? (segtype_to->flags & pt->possible_types) : 1))
+ if (seg_from->area_count <= pt->current_areas)
+ return pt;
+ }
+
+ return NULL;
+}
+
+static struct possible_duplicate_type *__get_possible_duplicate_type(const struct segment_type *segtype_to,
+ uint32_t new_image_count,
+ struct possible_type *last_pt)
+{
+ struct possible_duplicate_type *lpt = (struct possible_duplicate_type *) last_pt;
+ struct possible_duplicate_type *pt = lpt ? lpt + 1 : _possible_duplicate_types;
+
+ RETURN_IF_ZERO(segtype_to, "segment type to argument");
+
+ for ( ; pt->possible_types; pt++) {
+ if (segtype_to->flags & pt->possible_types)
+ if (new_image_count <= pt->new_areas)
+ return pt;
+ }
+
+ return NULL;
+}
+
+static struct possible_type *_get_possible_type(const struct lv_segment *seg_from,
+ const struct segment_type *segtype_to,
+ uint32_t new_image_count,
+ struct possible_type *last_pt)
+{
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+
+ return new_image_count ?
+ (struct possible_type *) __get_possible_duplicate_type(segtype_to, new_image_count, last_pt) :
+ (struct possible_type *) __get_possible_takeover_reshape_type(seg_from, segtype_to, last_pt);
+}
+
+/*
+ * Return allowed options (--stripes, ...) for conversion from @seg_from -> @seg_to
+ */
+static int _get_allowed_conversion_options(const struct lv_segment *seg_from,
+ const struct segment_type *segtype_to,
+ uint32_t new_image_count, uint32_t *options)
+{
+ struct possible_type *pt;
+
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+ RETURN_IF_ZERO(options, "options argument");
+
+ if ((pt = _get_possible_type(seg_from, segtype_to, new_image_count, NULL))) {
+ *options = pt->options;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Log any possible conversions for @lv
+ */
+/* HM FIXME: use log_info? */
+typedef int (*type_flag_fn_t)(void *data);
+/* HM Helper: loop through @pt->flags calling @tfn with argument @data */
+static int _process_type_flags(struct logical_volume *lv, struct possible_type *pt, type_flag_fn_t tfn, void *data)
+{
+ unsigned i;
+ uint64_t t;
+ const struct lv_segment *seg;
+ const struct segment_type *segtype;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ for (i = 0; i < 64; i++) {
+ t = 1ULL << i;
+ if ((t & pt->possible_types) &&
+ !(t & seg->segtype->flags) &&
+ ((segtype = get_segtype_from_flag(lv->vg->cmd, t))))
+ if (!tfn(data ?: (void *) segtype))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* HM Helper: callback to increment unsigned possible conversion types in @*data */
+static int _count_possible_conversions(void *data)
+{
+ unsigned *possible_conversions;
-PFLA("seg->segtype=%s new_segtype=%s", seg->segtype->name, new_segtype->name);
- for (cn = 0; cn < ARRAY_SIZE(pt); cn++)
- if (seg->segtype->flags & pt[cn].current_type) {
-PFLA("current segtype=%s new_segtype=%s", seg->segtype->name);
- /* Skip to striped */
- if (seg_is_striped(seg) && (pt[cn].possible_types & SEG_RAID1))
- continue;
+ RETURN_IF_ZERO((possible_conversions = data), "data pointer argument");
- /* Skip to striped raid0 */
- if (seg_is_any_raid0(seg) && seg->area_count > 1 && (pt[cn].possible_types & SEG_RAID1))
- continue;
+ (*possible_conversions)++;
- return (new_segtype->flags & pt[cn].possible_types) ? 1 : 0;
+ return 1;
+}
+
+/* HM Helper: callback to log possible conversion to segment type in @*data */
+static int _log_possible_conversion(void *data)
+{
+ struct segment_type *segtype = data;
+
+ RETURN_IF_SEGTYPE_ZERO(segtype);
+
+ log_warn("%s%s%s", segtype->name, segtype->descr ? " -> " : "", segtype->descr ?: "");
+
+ return 1;
+}
+
+static int _log_possible_conversion_types(struct logical_volume *lv, const struct segment_type *new_segtype)
+{
+ int duplicating;
+ unsigned possible_conversions = 0;
+ const struct lv_segment *seg;
+ struct possible_type *pt = NULL;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ duplicating = _lv_is_duplicating(lv);
+
+ /* Count any possible segment types @seg an be directly converted to */
+ while ((pt = _get_possible_type(seg, NULL, 0, pt)))
+ RETURN_IF_ZERO(_process_type_flags(lv, pt, _count_possible_conversions, &possible_conversions),
+ "flags processed");
+
+ if (possible_conversions) {
+ if (!duplicating && !silent_mode()) {
+ const char *alias = _get_segtype_alias(seg->segtype);
+
+ log_print("Converting %s from %s%s%s%c (without --duplicate) is "
+ "directly possible to the following layout%s:",
+ display_lvname(lv), _get_segtype_name(seg->segtype, seg->area_count),
+ *alias ? " (same as " : "", alias, *alias ? ')' : 0,
+ possible_conversions > 1 ? "s" : "");
+
+ pt = NULL;
+
+ /* Print any possible segment types @seg can be directly converted to */
+ while ((pt = _get_possible_type(seg, NULL, 0, pt)))
+ RETURN_IF_ZERO(_process_type_flags(lv, pt, _log_possible_conversion, NULL),
+ "flags processed");
}
+ } else
+ log_warn("Direct conversion of %s LV %s is not possible",
+ lvseg_name(seg), display_lvname(lv));
+
+ if (duplicating)
+ log_warn("To add more duplicated sub LVs, use \"lvconvert --duplicate ...\"");
+ else
+ log_warn("To convert to other arbitrary layouts by duplication, use \"lvconvert --duplicate ...\"");
+
+ return 0;
+}
+
+/*
+ * Find takeover raid flag for segment type flag of @seg
+ */
+/* Segment type flag correspondence raid5 <-> raid6 conversions */
+static uint64_t _r5_to_r6[][2] = {
+ { SEG_RAID5_LS, SEG_RAID6_LS_6 },
+ { SEG_RAID5_LA, SEG_RAID6_LA_6 },
+ { SEG_RAID5_RS, SEG_RAID6_RS_6 },
+ { SEG_RAID5_RA, SEG_RAID6_RA_6 },
+ { SEG_RAID5_N, SEG_RAID6_N_6 },
+};
+
+
+/* Return segment type flag for raid5 -> raid6 conversions */
+static uint64_t _get_r56_flag(const struct lv_segment *seg, unsigned idx)
+{
+ unsigned elems = ARRAY_SIZE(_r5_to_r6);
+
+ RETURN_IF_ZERO(seg, "lv segment argument");
+ RETURN_IF_NONZERO(idx > 1, "proper index");
+
+ while (elems--)
+ if (seg->segtype->flags & _r5_to_r6[elems][idx])
+ return _r5_to_r6[elems][!idx];
+
+ return 0;
+}
+
+/* Return segment type flag for raid5 -> raid6 conversions */
+static uint64_t _raid_seg_flag_5_to_6(const struct lv_segment *seg)
+{
+ RETURN_IF_ZERO(seg, "lv segment argument");
+
+ return _get_r56_flag(seg, 0);
+}
+
+/* Return segment type flag for raid6 -> raid5 conversions */
+static uint64_t _raid_seg_flag_6_to_5(const struct lv_segment *seg)
+{
+ RETURN_IF_ZERO(seg, "lv segment argument");
+
+PFL();
+ return _get_r56_flag(seg, 1);
+}
+/******* END: raid <-> raid conversion *******/
+
+
+
+/****************************************************************************/
+/****************************************************************************/
+/****************************************************************************/
+/* Construction site of takeover handler function jump table solution */
+
+/* Display error message and return 0 if @lv is not synced, else 1 */
+static int _lv_is_synced(struct logical_volume *lv)
+{
+ RETURN_IF_ZERO(lv, "lv argument");
+
+ if (lv->status & LV_NOTSYNCED) {
+ log_error("Can't convert out-of-sync LV %s"
+ " use 'lvchange --resync %s' first",
+ display_lvname(lv), display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Begin: various conversions between layers (aka MD takeover) */
+/*
+ * takeover function argument list definition
+ *
+ * All takeover functions and helper functions
+ * to support them have this list of arguments
+ */
+#define TAKEOVER_FN_ARGUMENTS \
+ struct logical_volume *lv, \
+ const struct segment_type *new_segtype, \
+ int yes, int force, \
+ unsigned new_image_count, \
+ const unsigned new_data_copies, \
+ const unsigned new_stripes, \
+ unsigned new_stripe_size, \
+ unsigned new_region_size, \
+ struct dm_list *allocate_pvs
+
+#if 0
+ unsigned new_extents,
+#endif
+/*
+ * a matrix with types from -> types to holds
+ * takeover function pointers this prototype
+ */
+typedef int (*takeover_fn_t)(TAKEOVER_FN_ARGUMENTS);
+
+/* Return takeover function table index for @segtype */
+static unsigned _takeover_fn_idx(const struct segment_type *segtype, uint32_t area_count)
+{
+ static uint64_t _segtype_to_idx[] = {
+ 0, /* linear, seg->area_count = 1 */
+ SEG_AREAS_STRIPED,
+ SEG_MIRROR,
+ SEG_RAID0,
+ SEG_RAID0_META,
+ SEG_RAID1,
+ SEG_RAID4|SEG_RAID5_LS|SEG_RAID5_LA|SEG_RAID5_LS|SEG_RAID5_RS|SEG_RAID5_RA|SEG_RAID5_N,
+ SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6| \
+ SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_ZR|SEG_RAID6_N_6,
+ SEG_RAID10_NEAR|SEG_RAID10_FAR|SEG_RAID10_OFFSET,
+ SEG_RAID01
+ };
+ unsigned r = ARRAY_SIZE(_segtype_to_idx);
+
+ RETURN_IF_ZERO(segtype, "segment type argument");
+ RETURN_IF_ZERO(area_count, "area count != 0 argument");
+
+PFLA("segtype=%s area_count=%u", segtype->name, area_count);
+ /* Linear special case */
+ if (segtype_is_striped(segtype) && area_count == 1)
+ return 0;
+
+ while (r-- > 0)
+ if (segtype->flags & _segtype_to_idx[r])
+{
+PFLA("r=%u", r);
+ return r;
+}
+
+ return 0;
+}
+
+/* Macro to define raid takeover helper function header */
+#define TAKEOVER_FN(function_name) \
+static int function_name(TAKEOVER_FN_ARGUMENTS)
+
+/* Macro to spot takeover helper functions easily */
+#define TAKEOVER_HELPER_FN(function_name) TAKEOVER_FN(function_name)
+#define TAKEOVER_HELPER_FN_REMOVAL_LVS(function_name) \
+static int function_name(TAKEOVER_FN_ARGUMENTS, struct dm_list *removal_lvs)
+
+/*
+ * noop and error takoover handler functions
+ * to allow for logging that an LV already
+ * has the requested type or that the requested
+ * conversion is not possible
+ */
+/* Noop takeover handler for @lv: logs that LV already is of the requested type */
+TAKEOVER_FN(_noop)
+{
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+
+ log_warn("Logical volume %s already is of requested type %s",
+ display_lvname(lv), lvseg_name(first_seg(lv)));
+
+ return 0;
+}
+
+/* Error takeover handler for @lv: logs what's (im)possible to convert to (and mabye added later) */
+TAKEOVER_FN(_error)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ _log_possible_conversion_types(lv, new_segtype);
+
return 0;
}
+/****************************************************************************/
/*
- * Check, if @new_segtype is possible to convert to for
- * given segment type of @lv to reach any @final_segtype.
+ * Conversion via creation of a new LV to put
+ * top-level raid1 on top of initial maping and
+ * N addtitional ones with arbitrayr supported layout.
+ */
+/* Create a new LV with type @segtype */
+static struct logical_volume *_lv_create(struct volume_group *vg, const char *lv_name,
+ const struct segment_type *segtype,
+ const uint32_t data_copies, const uint32_t region_size,
+ const uint32_t stripes, const uint32_t stripe_size,
+ uint32_t extents, enum activation_change change,
+ int zero,
+ const char *pool_lv_name,
+ struct dm_list *pvs)
+{
+ struct logical_volume *r;
+ struct lvcreate_params lp = {
+ .activate = change,
+ .alloc = ALLOC_INHERIT,
+ .extents = pool_lv_name ? 0 : extents,
+ .virtual_extents = pool_lv_name ? extents : 0,
+ .major = -1,
+ .minor = -1,
+ .log_count = 0,
+ .lv_name = lv_name,
+ .mirrors = data_copies,
+ .nosync = 0,
+ .permission = LVM_READ | LVM_WRITE,
+ /* HM FIXME: inherit on stacking LVs? */
+ .read_ahead = DM_READ_AHEAD_AUTO,
+ .region_size = region_size,
+ .segtype = segtype,
+ .stripes = stripes,
+ .stripe_size = stripe_size,
+ .tags = DM_LIST_HEAD_INIT(lp.tags),
+ .temporary = 0,
+ .zero = zero,
+ .pool_name = pool_lv_name,
+ };
+
+ RETURN_IF_ZERO(vg, "vg argument");
+ RETURN_IF_ZERO(lv_name, "lv name argument");
+ RETURN_IF_ZERO(extents, "extents != 0 argument");
+ RETURN_IF_ZERO(data_copies, "data copies argument");
+ RETURN_IF_ZERO(stripes, "stripes argument");
+ RETURN_IF_ZERO(segtype, "new segment argument");
+
+ lp.pvh = pvs ?: &vg->pvs;
+
+PFLA("lv_name=%s segtype=%s data_copies=%u stripes=%u region_size=%u stripe_size=%u extents=%u",
+ lv_name, segtype->name, data_copies, stripes, region_size, stripe_size, extents);
+
+ if (segtype_is_striped(segtype) && stripes == 1) {
+ lp.mirrors = lp.stripes = 1;
+ lp.stripe_size = 0;
+
+ /* Caller should ensure all this... */
+ } else if (segtype_is_raid1(segtype) && stripes != 1) {
+ log_warn("Adjusting stripes to 1i for raid1");
+ lp.stripes = 1;
+ }
+
+ else if (segtype_is_striped_raid(segtype)) {
+ if (stripes < 2) {
+ log_warn("Adjusting stripes to the minimum of 2");
+ lp.stripes = 2;
+ }
+ if (!lp.stripe_size) {
+ log_warn("Adjusting stripesize to 32KiB");
+ lp.stripe_size = 64;
+ }
+ }
+
+ else if (segtype_is_any_raid10(segtype)) {
+ if (data_copies < 2)
+ lp.mirrors = 2;
+
+ if (data_copies > stripes) {
+ log_error("raid10 data_copies may not be more than stripes (i.e. -mN with N < #stripes)");
+ return_NULL;
+ }
+
+ } else if (segtype_is_mirror(segtype)) {
+ lp.mirrors = data_copies > 1 ? data_copies : 2;
+ lp.log_count = 1;
+ lp.stripes = 1;
+ lp.stripe_size = 0;
+ }
+
+ log_debug_metadata("Creating new logical volume %s/%s.", vg->name, lp.lv_name);
+
+ init_silent(1);
+ r = lv_create_single(vg, &lp);
+ init_silent(0);
+
+ return r;
+}
+
+/* Helper: create a unique name from @lv->name and string @(suffix + 1) adding a number */
+static char *_generate_unique_raid_name(struct logical_volume *lv, const char *suffix)
+{
+ char *name;
+ uint32_t s = 0;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(suffix, "name suffix argument");
+
+ /* Loop until we found an available one */
+ while (!(name = _generate_raid_name(lv, suffix, s)))
+ s++;
+
+ if (!name)
+ log_error("Failed to create unique sub LV name for %s", display_lvname(lv));
+
+ return name;
+}
+
+/* Helper: rename single @lv from sub string @from to @to; strings must have the same length */
+static int _rename_lv(struct logical_volume *lv, const char *from, const char *to)
+{
+ size_t sz;
+ char *name, *p;
+
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO((name = (char *) lv->name), "lv name");
+
+ /* Catch being called on non-raid namespace */
+ if (!(p = strstr(lv->name, from)))
+ return 1;
+
+ sz = p - lv->name + strlen(to) + (strlen(p) - strlen(from)) + 1;
+ RETURN_IF_ZERO((name = dm_pool_alloc(lv->vg->vgmem, sz)), "space for name");
+
+ sz = p - lv->name;
+ strncpy(name, lv->name, sz);
+ strncpy(name + sz, to, strlen(to));
+ strcpy(name + sz + strlen(to), p + strlen(from));
+ lv->name = name;
+
+ return 1;
+}
+
+/* HM Helper: rename all @lv to string @to and replace all sub LV names substring @from to @to */
+static int _rename_lv_and_sub_lvs(struct logical_volume *lv, const char *from, const char *to)
+{
+ uint32_t s;
+ struct logical_volume *mlv;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (seg->area_count > 1)
+ for (s = 0; s < seg->area_count; s++) {
+ if ((mlv = _seg_metalv_checked(seg, s)) &&
+ !_rename_lv(mlv, from, to))
+ return 0;
+ if (!_rename_lv(seg_lv(seg, s), from, to))
+ return 0;
+ }
+
+ lv->name = to;
+
+ return 1;
+}
+
+/* Get maximum name index suffix from all sub LVs of @lv and report in @*max_idx */
+static int _get_max_sub_lv_name_index(struct logical_volume *lv, uint32_t *max_idx)
+{
+ uint32_t s, idx;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(max_idx, "max index argument");
+
+ *max_idx = 0;
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg_type(seg, s) != AREA_LV)
+ return 0;
+
+ if (!_lv_name_get_string_index(seg_lv(seg, s), &idx))
+ return 0;
+
+ if (*max_idx < idx)
+ *max_idx = idx;
+ }
+
+ return 1;
+}
+
+/*
+ * Prepare first segment of @lv to suit _shift_image_components()
+ *
+ * Being called with areas arrays one larger than seg->area_count
+ * and all slots shifted to the front with the last one unassigned.
*
- * HM FIXME: complete?
+ * HM FIXME: simplify
*/
-static int _adjust_segtype(struct logical_volume *lv,
- struct segment_type **new_segtype,
- const struct segment_type *final_segtype)
-{
- if (!_is_possible_segtype(lv, *new_segtype)) {
- const char *interim_type = "", *type;
- const struct lv_segment *seg = first_seg(lv);
-
- if (seg_is_striped(seg) || seg_is_any_raid0(seg)) {
- if (segtype_is_any_raid5(*new_segtype))
- interim_type = "raid5_n";
- else if (segtype_is_any_raid6(*new_segtype))
- interim_type = "raid6_n_6";
-
- } else if (seg_is_any_raid6(seg)) {
- if (segtype_is_any_raid5(*new_segtype))
- interim_type = "raid6_ls_6, raid6_la_6, raid6_rs_6, raid6_ra_6 or raid6_n_6";
- else
- interim_type = "raid6_n_6";
-
- } else if (seg_is_raid4(seg) || seg_is_any_raid5(seg)) {
- if (((final_segtype && (segtype_is_linear(final_segtype) ||
- segtype_is_striped(final_segtype))) ||
- segtype_is_any_raid0(*new_segtype)) &&
- seg->area_count == 2)
- goto ok;
- else
- interim_type = "raid5_n";
+static int _prepare_seg_for_name_shift(struct logical_volume *lv)
+{
+ int s;
+ uint32_t idx, max_idx;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ if (!_get_max_sub_lv_name_index(lv, &max_idx))
+ return 0;
- } else if (seg_is_striped(seg))
- interim_type = "raid5_n";
+ seg->area_count++;
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, max_idx);
- else {
- log_error("Can't takeover %s to %s", seg->segtype->name, (*new_segtype)->name);
+ for (s = seg->area_count - 1; s > -1; s--) {
+ if (seg_type(seg, s) != AREA_LV)
+ continue;
+
+ RETURN_IF_ZERO(seg_metatype(seg, s) == AREA_LV, "valid metadata sub LV")
+
+ if (!_lv_name_get_string_index(seg_lv(seg, s), &idx))
return 0;
+
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, idx);
+
+ if (idx != s) {
+ seg->areas[idx] = seg->areas[s];
+ seg->meta_areas[idx] = seg->meta_areas[s];
+ seg_type(seg, s) = seg_metatype(seg, s) = AREA_UNASSIGNED;
}
+ }
- /* Adjust to interim type */
- type = strrchr(interim_type, ' ');
- type = type ? type + 1 : interim_type;
- if (!(*new_segtype = get_segtype_from_string(lv->vg->cmd, type)))
- return_0;
+ return 1;
+}
- log_warn("Conversion to %s is possible", interim_type);
- log_warn("Selecting %s", (*new_segtype)->name);
+/* HM Helper: rename sub LVs to avoid conflict on creation of new metadata LVs */
+enum rename_dir { to_flat = 0, from_flat, from_dup, to_dup };
+static int _rename_sub_lvs(struct logical_volume *lv, enum rename_dir dir)
+{
+ int type;
+ static const int *ft, from_to[][2] = { { 0, 2 }, { 2, 0 } };
+ uint32_t s;
+ static const char *names[][4] = {
+ { "_rimage_", "_rmeta_", "__rimage_", "__rmeta_" }, /* flat */
+ { "_dup_", "_rmeta_", "__dup_", "__rmeta_" }, /* dup */
+ };
+ struct logical_volume *mlv;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ if (seg_is_thin(seg))
+ return 1;
+
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+PFL();
+ if (!lv_is_raid(lv) && !lv_is_raid_image(lv))
+ return 1;
+PFL();
+ RETURN_IF_NONZERO(dir < to_flat || dir > to_dup, "valid rename request");
+ type = dir / 2; /* flat or dup names */
+ RETURN_IF_ZERO(type < ARRAY_SIZE(names), "valid type");
+ ft = from_to[!(dir % 2)]; /* from or to indexes */
+
+ log_debug_metadata("Renaming %s sub LVs to avoid name collision", display_lvname(lv));
+ for (s = 0; s < seg->area_count; s++) {
+ if ((mlv = _seg_metalv_checked(seg, s)) &&
+ !_rename_lv(mlv, names[type][ft[0]+1], names[type][ft[1]+1]))
+ return 0;
+
+ if (seg_type(seg, s) == AREA_LV &&
+ !_rename_lv(seg_lv(seg, s), names[type][ft[0]], names[type][ft[1]]))
+ return 0;
}
-ok:
+PFL();
return 1;
}
+/* rename @dlv and @mlv to / from ddup to avoid name collisions during name shift */
+static int _rename_split_duplicate_lv_and_sub_lvs(struct logical_volume *dlv,
+ struct logical_volume *mlv,
+ enum rename_dir dir)
+{
+ const char *in[] = { "_dup_", "__dup_" };
+ const char *mn[] = { "_rmeta_", "__rmeta_" };
+ int d;
+
+ switch (dir) {
+ case to_dup:
+ d = 1; break;
+ case from_dup:
+ d = 0; break;
+ default:
+ RETURN_IF_ZERO(0, "proper direction to rename");
+ }
+
+ return _rename_sub_lvs(dlv, dir) &&
+ _rename_lv(dlv, in[!d], in[d]) &&
+ _rename_lv(mlv, mn[!d], mn[d]);
+}
+
/*
- * Convert a RAID set in @lv to another RAID level and algorithm defined
- * by @requested_segtype, stripe size set by @new_stripe_size or number
- * of RAID devices requested by @new_stripes.
+ * HM Helper:
*
- * Returns: 1 on success, 0 on failure
+ * remove layer from @lv keeping @sub_lv,
+ * add lv to be removed to @removal_lvs,
+ * rename from flat "_rimage_|_rmeta_" namespace
+ * to "__rimage_|__rmeta_" to avoid name collision,
+ * reset duplicated flag and make visible
*/
-static int _convert_raid_to_raid(struct logical_volume *lv,
- struct segment_type *new_segtype,
- const struct segment_type *final_segtype,
- int yes, int force,
- const unsigned new_stripes,
- const unsigned new_stripe_size,
- struct dm_list *allocate_pvs)
+static int _remove_duplicating_layer(struct logical_volume *lv,
+ struct dm_list *removal_lvs)
{
- struct lv_segment *seg = first_seg(lv);
- unsigned stripes = new_stripes ?: _data_rimages_count(seg, seg->area_count);
- unsigned stripe_size = new_stripe_size ?: seg->stripe_size;
+ struct logical_volume *slv;
+ struct lv_segment *seg;
-PFLA("seg->segtype=%s new_segtype=%s final_segtype=%s stripes=%u new_stripes=%u", seg->segtype->name, new_segtype->name, final_segtype ? final_segtype->name : "NULL", stripes, new_stripes);
- if (new_segtype == seg->segtype &&
- stripes == _data_rimages_count(seg, seg->area_count) &&
- stripe_size == seg->stripe_size) {
-PFLA("stripes=%u stripe_size=%u seg->stripe_size=%u", stripes, stripe_size, seg->stripe_size);
- log_error("Nothing to do");
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(seg->area_count == 1, "no single area");
+ RETURN_IF_ZERO((slv = seg_lv(seg, 0)), "no first sub LV");
+PFLA("lv=%s", display_lvname(lv));
+PFLA("slv=%s", display_lvname(slv));
+ /* Ensure proper size of LV, sub LV may be larger due to rounding. */
+ lv->le_count = slv->le_count;
+ lv->size = lv->le_count * lv->vg->extent_size;
+PFLA("lv->le_count=%u lv->size=%llu", lv->le_count, (unsigned long long) lv->size);
+
+ if (!_lv_reset_raid_add_to_list(slv, removal_lvs))
+ return 0;
+PFLA("first_seg(slv)=%s", first_seg(slv) ? lvseg_name(first_seg(slv)) : "");
+ /* Remove the raid1 layer from the LV */
+ if (!remove_layer_from_lv(lv, slv))
+ return_0;
+
+ /* Adjust any thin volume flag not addressed by remove_layer_from_lv() */
+ slv->status &= ~THIN_VOLUME;
+
+PFLA("first_seg(lv)=%s", first_seg(lv) ? lvseg_name(first_seg(lv)) : "");
+ /* HM FIXME: in case of _lv_reduce() recursion bugs, this may hit */
+ RETURN_IF_ZERO((seg = first_seg(lv)), "first segment!?");
+PFL();
+ if (!_rename_sub_lvs(lv, from_flat))
+ return 0;
+PFL();
+ lv->status &= ~LV_DUPLICATED;
+ if (seg_is_thin(seg))
+ lv->status |= THIN_VOLUME;
+
+ lv_set_visible(lv);
+
+ return 1;
+}
+
+/* HM Helper: callback to rename duplicated @lv and is sub LVs to __ namespace to avoid collisions */
+static int _pre_raid_split_duplicate_rename_lv_and_sub_lvs(struct logical_volume *lv, void *data)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(data, "data argument allowed");
+
+ /* Rename all remaning sub LVs temporarilly to allow for name shift w/o name collision */
+ log_debug_metadata("Renaming split duplicate LV and sub LVs of %s", display_lvname(lv));
+ for (s = 0; s < seg->area_count; s++)
+ if (!_rename_split_duplicate_lv_and_sub_lvs(seg_lv(seg, s), seg_metalv(seg, s), to_dup))
+ return 0;
+
+ return 1;
+}
+
+/* HM Helper: callback to rename duplicated @lv and its sub LVs back from "__" infixed namespace */
+static int _post_raid_split_duplicate_rename_lv_and_sub_lvs(struct logical_volume *lv, void *data)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+ struct logical_volume *split_lv;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO((split_lv = data), "valid split LV");
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+
+ if (!lv_update_and_reload(split_lv))
+ return_0;
+
+ /* Rename all remaning sub LVs temporarilly to allow for name shift w/o name collision */
+ log_debug_metadata("Renaming duplicate LV and sub LVs of %s", display_lvname(lv));
+ for (s = 0; s < seg->area_count; s++)
+ if (seg_type(seg, s) == AREA_LV &&
+ !_rename_split_duplicate_lv_and_sub_lvs(seg_lv(seg, s), seg_metalv(seg, s), from_dup))
+ return 0;
+
+ /* Shift area numerical indexes down */
+ log_debug_metadata("Shifting image components of %s", display_lvname(lv));
+
+ return _prepare_seg_for_name_shift(lv) &&
+ _shift_image_components(seg);
+}
+
+/* HM Helper: callback to extract last metadata image of @lv and remove top-level raid1 layer */
+static int _pre_raid_split_duplicate_remove_layer(struct logical_volume *lv, void *data)
+{
+ struct dm_list *removal_lvs;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO((removal_lvs = data), "remova LVs list");
+
+ log_debug_metadata("Extracting last metadata LV of %s", display_lvname(lv));
+ if (!_extract_image_component_sublist(seg, RAID_META, 0, 1, removal_lvs, 1))
+ return 0;
+
+ log_debug_metadata("Removing top-level raid1 LV %s", display_lvname(lv));
+ return _remove_duplicating_layer(lv, removal_lvs);
+}
+
+/* HM Helper: callback to rename all sub_lvs of @lv (if raid) to flat namespace */
+static int _post_raid_split_duplicate_rename_sub_lvs(struct logical_volume *lv, void *data)
+{
+ struct logical_volume *split_lv;
+
+ RETURN_IF_ZERO((split_lv = data), "split LV");
+
+ if (!lv_update_and_reload(split_lv))
+ return 0;
+
+ log_debug_metadata("Renaming sub LVs of %s", display_lvname(lv));
+
+ return _rename_sub_lvs(lv, to_flat);
+}
+
+/*
+ * HM Helper:
+ *
+ * split off a sub LV of a duplicatting top-level raid1 @lv
+ *
+ * HM FIXME: allow for splitting off duplicated lv with "lvconvert --splitmirrors N # (N > 1)"?
+ * need ***sub_lv_names for this?
+ */
+static int _valid_name_requested(struct logical_volume **lv, const char **sub_lv_name,
+ int layout_properties_requested, const char *what);
+static int _raid_split_duplicate(struct logical_volume *lv, int yes,
+ const char *split_name, uint32_t new_image_count)
+{
+ uint32_t s;
+ void *fn_pre_data;
+ const char *lv_name;
+ struct dm_list removal_lvs;
+ struct lv_segment *seg;
+ struct logical_volume *split_lv = NULL;
+ fn_on_lv_t fn_pre_on_lv, fn_post_on_lv;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(_lv_is_duplicating(lv), "Called with non-duplicating LV");
+ RETURN_IF_ZERO(split_name, "split name argument");
+ RETURN_IF_ZERO(seg->meta_areas, "metadata segment areas");
+
+ if (!_lv_is_active((lv)))
+ return 0;
+
+ dm_list_init(&removal_lvs);
+
+ /* If user passed in the sub LV name to split off and no --name option, use it */
+ if (!_valid_name_requested(&lv, &split_name, 0 /* properties */, "split"))
+ return 0;
+
+ /* Try to find @split_name amongst sub LVs */
+ if (!(split_lv = _find_lv_in_sub_lvs(lv, split_name, &s))) {
+ log_error("No sub LV %s to split off duplicating LV %s", split_name, display_lvname(lv));
return 0;
}
- /* Check + apply stripe size change */
- if (stripe_size &&
- (stripe_size & (stripe_size - 1) ||
- stripe_size < 8)) {
- log_error("Invalid stripe size on %s", lv->name);
+ /* HM FIXME: limited to one sub LV to split off */
+ if (seg->area_count - new_image_count != 1) {
+ log_error("Only one duplicated sub LV can be split off duplicating LV %s at once",
+ display_lvname(lv));
return 0;
}
- if (seg->stripe_size != stripe_size) {
- if (seg_is_striped(seg) || seg_is_any_raid0(seg)) {
- log_error("Cannot change stripe size on \"%s\"", lv->name);
+ /* Create unique split LV name to use (previous splits may exist) */
+ RETURN_IF_ZERO((lv_name = _generate_unique_raid_name(lv, "split_")), "unique LV name created");
+
+ /* Allow for intentionally splitting off unsynchronized LV in case user e.g. created a duplicated LV in error */
+ if (!_dev_in_sync(lv, s)) {
+ log_warn("Splitting off unsynchronized sub LV %s!",
+ display_lvname(split_lv));
+ if (yes_no_prompt("Do you really want to split off out-of-sync %s sub LV %s into %s/%s [y/n]: ",
+ lvseg_name(first_seg(split_lv)), display_lvname(split_lv),
+ lv->vg->name, lv_name) == 'n')
return 0;
- }
- if (stripe_size > lv->vg->extent_size) {
- log_error("Stripe size for %s too large for volume group extent size", lv->name);
+ } else if (!_raid_in_sync(lv) &&
+ _devs_in_sync_count(lv) < 2) {
+ log_error("Can't split off %s into %s/%s when LV %s is not in sync",
+ split_name, lv->vg->name, lv_name, display_lvname(lv));
+ return 0;
+
+ } else if (!yes && yes_no_prompt("Do you really want to split off %s sub LV %s into %s/%s [y/n]: ",
+ lvseg_name(first_seg(split_lv)), display_lvname(split_lv),
+ lv->vg->name, lv_name) == 'n')
+ return 0;
+
+ if (sigint_caught())
+ return_0;
+
+ log_debug_metadata("Extract metadata image for split LV %s", split_name);
+ if (!_extract_image_component_sublist(seg, RAID_META, s, s + 1, &removal_lvs, 1))
+ return 0;
+
+ /* remove reference from @seg to @split_lv */
+ if (!remove_seg_from_segs_using_this_lv(split_lv, seg))
+ return 0;
+
+ seg_type(seg, s) = AREA_UNASSIGNED;
+
+ log_debug_metadata("Rename duplicated LV %s and and any of its sub LVs before splitting them off",
+ display_lvname(split_lv));
+ if (!_rename_lv_and_sub_lvs(split_lv, split_lv->name, lv_name))
+ return 0;
+
+ seg->area_count--;
+ seg->data_copies--;
+ RETURN_IF_ZERO(seg->area_count == seg->data_copies, "valid data copies");
+
+ lv_set_visible(split_lv);
+ split_lv->status &= ~(LV_NOTSYNCED|LV_DUPLICATED);
+
+ /* Shift areas down if not last one */
+ for ( ; s < seg->area_count; s++) {
+ seg->areas[s] = seg->areas[s + 1];
+ seg->meta_areas[s] = seg->meta_areas[s + 1];
+ }
+
+ /* We have more than one sub LVs -> set up pre/post fns to rename and shift down */
+ if (seg->area_count > 1) {
+ fn_pre_data = NULL;
+ fn_pre_on_lv = _pre_raid_split_duplicate_rename_lv_and_sub_lvs;
+ fn_post_on_lv = _post_raid_split_duplicate_rename_lv_and_sub_lvs;
+
+ /* We are down to the last sub LV -> set up pre/post fns to remove the top-level raid1 mapping */
+ } else {
+ log_print_unless_silent("The duplicating layer will be removed keeping %s sub LV %s for %s",
+ lvseg_name(first_seg(seg_lv(seg, 0))),
+ display_lvname(seg_lv(seg, 0)), display_lvname(lv));
+
+ fn_pre_data = &removal_lvs;
+ fn_pre_on_lv = _pre_raid_split_duplicate_remove_layer;
+ fn_post_on_lv = _post_raid_split_duplicate_rename_sub_lvs;
+ }
+
+ log_debug_metadata("Updating VG metadata; reloading %s and activating split LV %s",
+ display_lvname(lv), display_lvname(split_lv));
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs,
+ fn_post_on_lv, split_lv,
+ fn_pre_on_lv, fn_pre_data);
+}
+
+/* HM Helper: callback to rename any sub LVs of @lv to flat namespace */
+static int _post_raid_unduplicate_rename_sub_lvs(struct logical_volume *lv, void *data)
+{
+ if (!lv_is_raid(lv))
+ return 1;
+
+ return _rename_sub_lvs(lv, to_flat) &&
+ lv_update_and_reload(lv);
+}
+
+/*
+ * HM Helper:
+ *
+ * extract all rmeta images of the top-level @lv and all but @keep_idx
+ * data image and put the extracted ones on @removal_lvs.
+ */
+static int _lv_extract_all_images_but_one(struct logical_volume *lv,
+ uint32_t keep_idx,
+ struct dm_list *removal_lvs)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(removal_lvs, "removal LVs list pointer argument");
+
+ if (!_extract_image_component_list(seg, RAID_META, 0, removal_lvs) ||
+ (keep_idx &&
+ !_extract_image_component_sublist(seg, RAID_IMAGE, 0, keep_idx, removal_lvs, 0)) ||
+ (keep_idx < seg->area_count - 1 &&
+ !_extract_image_component_sublist(seg, RAID_IMAGE, keep_idx + 1, seg->area_count, removal_lvs, 0))) {
+ log_error(INTERNAL_ERROR "Failed to extract top-level LVs %s images", display_lvname(lv));
+ return 0;
+ }
+
+ /* If we don't keep the first sub LV, move sub LV at @keep_idx areas across */
+ if (keep_idx)
+ seg->areas[0] = seg->areas[keep_idx];
+
+ seg->area_count = 1;
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * remove top-level raid1 @lv and replace with requested @sub_lv_name
+ */
+static int _raid_unduplicate(struct logical_volume *lv,
+ int yes, const char *sub_lv_name)
+{
+ uint32_t keep_idx;
+ struct logical_volume *keep_lv;
+ struct lv_segment *seg, *seg1;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(sub_lv_name, "sub LV name");
+
+ if (!_lv_is_duplicating(lv)) {
+ log_error(INTERNAL_ERROR "Called with non-duplicating LV %s",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /* Find the requested sub LV by name */
+ if (!(keep_lv = _find_lv_in_sub_lvs(lv, sub_lv_name, &keep_idx))) {
+ log_error("Duplicated sub LV name %s does not exist in duplicating LV %s",
+ sub_lv_name, display_lvname(lv));
+ return 0;
+ }
+
+ /* Keeping a leg other than the master requires it to be fully in sync! */
+ if (keep_idx && !_raid_in_sync(lv)) {
+ log_error("Can't convert to duplicated sub LV %s when LV %s is not in sync",
+ display_lvname(keep_lv), display_lvname(lv));
+ return 0;
+ }
+
+ if (!yes)
+ log_warn("Really unduplicate LV %s keeping %s LV %s? Check resilience!",
+ display_lvname(lv), lvseg_name(first_seg(keep_lv)), display_lvname(keep_lv));
+ if (lv->le_count != keep_lv->le_count)
+ log_warn("You may want to resize your LV content (e.g. filesystem) after unduplicating it");
+ if (!yes) {
+ RETURN_IF_ZERO((seg1 = first_seg(keep_lv)), "segment in induplicate LV");
+
+ if (yes_no_prompt("Do you want to convert %s to type %s thus "
+ "unduplicating it and removing %u duplicated LV(s)? [y/n]: ",
+ display_lvname(lv),
+ _get_segtype_name(seg1->segtype, seg1->area_count),
+ seg->area_count - 1) == 'n')
return 0;
- }
- if (stripe_size > seg->region_size) {
- log_error("New stripe size for %s is larger than region size", lv->name);
+ if (sigint_caught())
+ return_0;
+ }
+
+ dm_list_init(&removal_lvs);
+
+ /*
+ * Extract all rmeta images of the raid1 top-level LV
+ * and all but the @keep_idx indexed data image.
+ */
+ if (!_lv_extract_all_images_but_one(lv, keep_idx, &removal_lvs))
+ return 0;
+
+ /* Remove top-level raid1 layer keeping first sub LV and update+reload LV */
+ return _remove_duplicating_layer(lv, &removal_lvs) &&
+ _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs,
+ _post_raid_unduplicate_rename_sub_lvs, lv, NULL);
+
+}
+
+/*
+ * HM Helper:
+ *
+ * create a new duplicating LV for @lv based on parameters
+ * @new_segtype, ...* and utilize PVs on @allocate_pvs list
+ * for allocation
+ *
+ * If creation succeeds but any other step fails, try removing
+ * it so that the user only has to remove any created *_dup_* lv
+ * manually in case of a crash.
+ */
+static struct logical_volume *_dup_lv_create(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ const char *lv_name,
+ const uint32_t new_data_copies, const uint32_t region_size,
+ const uint32_t new_stripes, const uint32_t new_stripe_size,
+ const uint32_t extents, const char *pool_lv_name,
+ struct dm_list *allocate_pvs)
+{
+ struct logical_volume *r;
+
+ RETURN_IF_LV_SEGTYPE_ZERO(lv, new_segtype);
+
+ log_debug_metadata("Creating unique LV name for destination sub LV");
+ RETURN_IF_ZERO(lv_name, "lv_name argument");
+
+ /* Create the destination LV deactivated, then change names and activate to avoid unsafe table loads */
+ log_debug_metadata("Creating destination sub LV");
+ if (!(r = _lv_create(lv->vg, lv_name, new_segtype, new_data_copies, region_size,
+ new_stripes, new_stripe_size, extents, CHANGE_ALN, 0 /* zero */,
+ pool_lv_name, allocate_pvs))) {
+ log_error("Failed to create destination LV %s/%s", lv->vg->name, lv_name);
+ return 0;
+ }
+
+ if (extents != r->le_count)
+ log_warn("Destination LV with %u extents is larger than source "
+ "with %u due to stripe boundary rounding", r->le_count, extents);
+
+ r->status |= RAID_IMAGE | LV_DUPLICATED;
+ lv_set_hidden(r);
+
+ return r;
+}
+
+/* Helper: callback function to rename metadata sub LVs of top-level duplicating @lv */
+static int _pre_raid_duplicate_rename_metadata_sub_lvs(struct logical_volume *lv, void *data)
+{
+ uint32_t s;
+ struct logical_volume *dup_lv, *lv1;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO((dup_lv = data), "duplicated LV argument");
+ RETURN_IF_ZERO(seg->meta_areas, "metadata areas");
+
+ /* Rename top-level raid1 metadata sub LVs to their temporary names */
+ log_debug_metadata("Renaming sub LVs of %s to temporary names",
+ display_lvname(lv));
+ for (s = 0; s < seg->area_count; s++)
+ if ((lv1 = _seg_metalv_checked(seg, s)) &&
+ !_rename_lv(lv1, "_rmeta_", "__rmeta_"))
return 0;
+
+ return _activate_sub_lvs(dup_lv, 0);
+}
+
+/* HM Helper: callback function to rename metadata sub LVs of top-level duplicating@lv back */
+static int _post_raid_duplicate_rename_metadata_sub_lvs_back(struct logical_volume *lv, void *data)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+ struct logical_volume *mlv;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(data, "duplicated LV argument allowed");
+ RETURN_IF_ZERO(seg->meta_areas, "metadata areas");
+
+ /* Rename top-level raid1 metadata sub LVs to their final names */
+ log_debug_metadata("Renaming sub LVs of %s to final names",
+ display_lvname(lv));
+ for (s = 0; s < seg->area_count; s++)
+ if ((mlv = _seg_metalv_checked(seg, s)) &&
+ !_rename_lv(mlv, "__rmeta_", "_rmeta_"))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * HM Helper: raid to raid conversion by duplication
+ *
+ * Inserts a layer on top of the given @lv (if not duplicating already),
+ * creates and allocates a destination LV of ~ the same size (may be rounded)
+ * with the requested @new_segtype and properties (e.g. stripes).
+ */
+static int _raid_duplicate(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force,
+ const int new_data_copies,
+ const uint32_t new_region_size,
+ const uint32_t new_stripes,
+ const uint32_t new_stripe_size,
+ const char *pool_lv_name,
+ struct dm_list *allocate_pvs)
+{
+ int duplicating;
+ uint32_t data_copies = new_data_copies, extents, new_area_idx, raid1_image_count, s;
+ char *lv_name, *p, *suffix;
+ struct logical_volume *dup_lv;
+ struct lv_segment *seg;
+
+ /* new_segtype is allowed to be naught */
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_NONZERO(!seg_is_thin(seg) && !seg->area_count, "lv segment areas");
+ RETURN_IF_ZERO(data_copies, "data copies argument");
+ RETURN_IF_ZERO(allocate_pvs, "allocate pvs");
+
+ raid1_image_count = seg->area_count + 1;
+
+ if (data_copies < 2 &&
+ (segtype_is_mirror(new_segtype) ||
+ segtype_is_raid1(new_segtype) ||
+ segtype_is_any_raid10(new_segtype))) {
+ data_copies = seg->data_copies;
+ log_warn("Adjusting data copies to %u", data_copies);
+ }
+
+ if (!(duplicating = _lv_is_duplicating(lv)) &&
+ lv_is_duplicated(lv)) {
+ log_error("Can't duplicate duplicated sub LV %s", display_lvname(lv));
+ if ((p = strchr(lv->name, '_'))) {
+ *p = '\0';
+ log_error("Use \"lvconvert --duplicate --type ...\" on top-level duplicating LV %s!",
+ display_lvname(lv));
+ *p = '_';
}
+
+ return 0;
}
- /* linear/raid1 do not preset stripe size */
- if (!seg->stripe_size &&
- !(seg->stripe_size = find_config_tree_int(lv->vg->cmd, global_raid_stripe_size_default_CFG, NULL)))
+ if (duplicating && !_raid_in_sync(lv)) {
+ log_error("Duplicating LV %s must be in-sync before adding another duplicated sub LV",
+ display_lvname(lv));
return 0;
+ }
-PFLA("seg->segtype=%s new_segtype->name=%s", seg->segtype->name, new_segtype->name);
- if (!is_same_level(seg->segtype, new_segtype) &&
- !_adjust_segtype(lv, &new_segtype, final_segtype))
+ log_warn("A new duplicated %s LV will be allocated and LV %s will be synced to it.",
+ _get_segtype_name(new_segtype, new_stripes), display_lvname(lv));
+ log_warn("When unduplicating LV %s or splitting off a sub LV from %s, you can select any sub LV providing its name via:",
+ display_lvname(lv), display_lvname(lv));
+ log_warn("'lvconvert --unduplicate --name sub-lv-name %s' or 'lvconvert --splitmirror 1 --name sub-lv-name %s'",
+ display_lvname(lv), display_lvname(lv));
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 1, raid1_image_count, data_copies, new_stripes, new_stripe_size))
return 0;
/*
- * raid0 <-> raid0_meta adding metadata image devices
- * on converting from raid0 -> raid0_meta or removing
- * them going the other way.
+ * Creation of destination LV with intended layout and insertion of raid1 top-layer from here on
*/
- if ((seg_is_raid0(seg) && segtype_is_raid0_meta(new_segtype)) ||
- (seg_is_raid0_meta(seg) && segtype_is_raid0(new_segtype)))
- return _raid0_add_or_remove_metadata_lvs(lv, 1);
+ if (segtype_is_raid1(new_segtype) &&
+ new_data_copies < 2)
+ new_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED);
+
+ /* Requested size has to be netto, i.e. w/o reshape space */
+ extents = lv->le_count - _reshape_len_per_lv(lv);
/*
- * Staying on the same level -> reshape required to change
- * stripes (i.e. # of disks), stripe size or algorithm
+ * By default, prevent any PVs holding image components from
+ * being used for allocation unless --force provided or
+ * unless new segment type is thin.
*/
- if (is_same_level(seg->segtype, new_segtype))
- return _raid_reshape(lv, new_segtype, yes, force, stripes, stripe_size, allocate_pvs);
+ if (!force && !segtype_is_thin(new_segtype)) {
+ log_debug_metadata("Avoiding coallocation on PVs holding other LVs of %s",
+ display_lvname(lv));
+ if (!_avoid_pvs_with_other_images_of_lv(lv, allocate_pvs))
+ return 0;
+ }
/*
- * HM
- *
- * Up/down takeover of raid levels
+ * Get name for new duplicated LV
*
- * In order to takeover the raid set level N to M (M > N) in @lv, all existing
- * rimages in that set need to be paired with rmeta devs (if not yet present)
- * to store superblocks and bitmaps of the to be taken over raid0/raid1/raid4/raid5/raid6
- * set plus another rimage/rmeta pair has to be allocated for dedicated xor/q.
- *
- * In order to postprocess the takeover of a raid set from level M to M (M > N)
- * in @lv, the last rimage/rmeta devs pair need to be droped in the metadata.
+ * Pick a unique sub LV name when already duplicating
+ * The initial duplicated LV shall be suffixed sith '1',
+ * because the master leg shall get '0'
*/
+ if (duplicating) {
+ RETURN_IF_ZERO((lv_name = _generate_unique_raid_name(lv, "dup_")), "unique LV name created");
+ } else {
+ RETURN_IF_ZERO((lv_name = _generate_raid_name(lv, "dup_", 1)), "lv_name created");
+ }
-PFLA("segtype=%s new_segtype->name=%s", seg->segtype->name, new_segtype->name);
- /* Down convert from raid4/5 to linear in case of more than 2 legs */
- if (segtype_is_linear(new_segtype) && seg->area_count > 2) {
- log_error("Can't convert striped %s/%s from %s to %s directly",
- lv->vg->name, lv->name, seg->segtype->name, new_segtype->name);
- log_error("Convert to single stripe first!");
+ if (!(dup_lv = _dup_lv_create(lv, new_segtype, lv_name,
+ new_data_copies, new_region_size,
+ new_stripes, new_stripe_size,
+ extents, pool_lv_name, allocate_pvs)))
return 0;
+
+ /* HM FIXME: remove dup_lv in case of any following failure */
+
+ /* If not yet duplicating -> add the top-level raid1 mapping with given LV as master leg */
+ if (!duplicating) {
+ char *first_name;
+
+ log_debug_metadata("Creating unique LV name for source sub LV");
+ ERR_IF_ZERO((first_name = _generate_raid_name(lv, "dup_", 0)), "first sub LV name created");
+ ERR_IF_ZERO((suffix = strstr(first_name, "_dup_")), "source prefix found");
+ log_debug_metadata("Inserting layer LV on top of source LV %s", display_lvname(lv));
+ lv->status |= LV_DUPLICATED; /* set duplicated flag on LV before it moves a level down */
+ ERR_IF_ZERO((seg = _convert_lv_to_raid1(lv, suffix)), "conversion to raid1 possible");
+ seg->meta_areas = NULL;
+ }
+
+ /* Grow areas arrays for data and metadata devs to add new duplicated LV */
+ log_debug_metadata("Reallocating areas array of %s", display_lvname(lv));
+ ERR_IF_ZERO(_realloc_meta_and_data_seg_areas(lv, seg->area_count + 1),
+ "reallocation of areas array possible");
+
+ new_area_idx = seg->area_count;
+ seg->area_count++; /* Must update area count after resizing it */
+ seg->data_copies = seg->area_count;
+
+ /* Set @layer_lv as the LV of @area of @lv */
+ log_debug_metadata("Add duplicated LV %s to top-level LV %s as raid1 leg %u",
+ display_lvname(dup_lv), display_lvname(lv), new_area_idx);
+ if (!set_lv_segment_area_lv(seg, new_area_idx, dup_lv, dup_lv->le_count, dup_lv->status)) {
+ log_error("Failed to add duplicated sub LV %s to LV %s",
+ display_lvname(dup_lv), display_lvname(lv));
+ goto err;
+ }
+
+ _pvs_allow_allocation(allocate_pvs);
+
+PFLA("seg->area_count=%u", seg->area_count);
+ /* If not duplicating yet, allocate first top-level raid1 metadata LV */
+ if (!duplicating) {
+ struct logical_volume *mlv;
+
+ if (!_alloc_rmeta_for_lv_add_set_hidden(lv, 0, allocate_pvs))
+ goto err;
+
+ mlv = seg_metalv(seg, 0);
+ if (!_avoid_pvs_with_other_images_of_lv(mlv, allocate_pvs))
+ goto err;
}
- if (!(is_level_up(seg->segtype, new_segtype) ?
- _raid_level_up : _raid_level_down)(lv, new_segtype, allocate_pvs))
+PFL();
+ /* Allocate new metadata LV for duplicated sub LV */
+ if (!_alloc_rmeta_for_lv_add_set_hidden(lv, new_area_idx, allocate_pvs))
return 0;
-PFLA("seg->segtype=%s new_segtype->name=%s", seg->segtype->name, new_segtype->name);
+PFL();
+ _pvs_allow_allocation(allocate_pvs);
- return 1;
-} /* _convert_raid_to_raid() */
-/******* END: raid <-> raid conversion *******/
+ for (s = 0; s < new_area_idx; s++)
+ seg_lv(seg, s)->status &= ~LV_REBUILD;
+PFL();
+ dup_lv->status |= (LV_REBUILD|LV_NOTSYNCED);
+ lv_set_visible(lv);
+
+PFLA("lv0->le_count=%u lv1->le_count=%u", seg_lv(seg, 0)->le_count, seg_lv(seg, 1)->le_count);
+ init_mirror_in_sync(0);
+
+ /* _post_raid_duplicate_rename_metadata_sub_lvs_back() will be called between the 2 update+reload calls */
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, NULL,
+ _post_raid_duplicate_rename_metadata_sub_lvs_back, NULL,
+ _pre_raid_duplicate_rename_metadata_sub_lvs, dup_lv);
+err:
+ lv_remove(dup_lv);
+ return 0;
+}
/*
- * Linear/raid0 <-> raid0/1/4/5 conversions of @lv defined by @new_segtype
+ * Begin takeover helper funtions
*/
-static int _convert_linear_or_raid0_to_raid0145(struct logical_volume *lv,
- const struct segment_type *new_segtype,
- uint32_t new_image_count,
- uint32_t new_stripes,
- uint32_t new_stripe_size,
- struct dm_list *allocate_pvs)
+/* Helper: linear -> raid0* */
+TAKEOVER_HELPER_FN(_linear_raid0)
{
- int convert;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
+ struct dm_list meta_lvs;
- /* linear -> raid1 with N > 1 images */
- if ((convert = seg_is_linear(seg) &&
- (segtype_is_raid1(new_segtype) || new_image_count > 1)))
- /* "lvconvert --type raid1 ..." does not set new_image_count */
- new_image_count = new_image_count > 1 ? new_image_count : 2;
-
- /* linear -> raid4/5 with 2 images */
- else if ((convert = (seg_is_linear(seg) &&
- (segtype_is_raid4(new_segtype) || segtype_is_any_raid5(new_segtype))))) {
- new_image_count = 2;
-
- /* raid0 with _one_ image -> raid1/4/5 with 2 images */
- } else if ((convert = (seg_is_any_raid0(seg) && seg->area_count == 1 &&
- ((segtype_is_raid1(new_segtype) || new_image_count == 2) ||
- segtype_is_raid4(new_segtype) ||
- segtype_is_any_raid5(new_segtype))))) {
- if (seg->segtype == new_segtype)
- if (!(new_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1)))
- return_0;
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+
+ dm_list_init(&meta_lvs);
- new_image_count = 2;
-
- /* raid1 with N images -> linear with one image */
- } else if ((convert = (seg_is_raid1(seg) && segtype_is_linear(new_segtype))))
- new_image_count = 1;
-
- /* raid1 with N images -> raid0 with 1 image */
- else if ((convert = (seg_is_raid1(seg) && segtype_is_any_raid0(new_segtype))))
- new_image_count = 1;
-
- /* raid1 <-> raid10/4/5 with 2 images */
- else if ((convert = ((seg_is_raid1(seg) || seg_is_raid4(seg) || seg_is_any_raid5(seg)) &&
- seg->area_count == 2 &&
- !new_stripes &&
- (segtype_is_raid1(new_segtype) ||
- segtype_is_raid10(new_segtype) ||
- segtype_is_raid4(new_segtype) ||
- segtype_is_any_raid5(new_segtype))))) {
- if (seg->segtype == new_segtype) {
- log_error("No change requested");
+ if ((!seg_is_linear(seg) && !seg_is_any_raid0(seg)) ||
+ seg->area_count != 1 ||
+ new_image_count != 1) {
+ log_error(INTERNAL_ERROR "Can't convert non-(linear|raid0) LV or from/to image count != 1");
+ return 0;
+ }
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /* Convert any linear segment to raid1 by inserting a layer and presetting segtype as raid1 */
+ if (seg_is_linear(seg)) {
+ log_debug_metadata("Converting logical volume %s to raid",
+ display_lvname(lv));
+ if (!(seg = _convert_lv_to_raid1(lv, "_rimage_0")))
return 0;
- }
+ }
- if (new_image_count != 2)
- log_warn("Ignoring new image count");
+ /* raid0_meta: need to add an rmeta device to pair it with the given linear device as well */
+ if (segtype_is_raid0_meta(new_segtype)) {
+ log_debug_metadata("Adding raid metadata device to %s",
+ display_lvname(lv));
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
+ return 0;
+ }
- if (new_stripes)
- log_warn("Ignoring stripes");
+ /* HM FIXME: overloading force argument here! */
+ /* We may be called to convert to !raid0*, i.e. an interim conversion on the way to radi4/5/6 */
+ if (force)
+ return 1;
- if (new_stripe_size)
- log_warn("Ignoring stripe size");
+ seg->segtype = new_segtype;
+ seg->region_size = 0;
+ seg->stripe_size = new_stripe_size;
- seg->segtype = new_segtype;
+ log_debug_metadata("Updating metadata and reloading mappings for %s",
+ display_lvname(lv));
- return lv_update_and_reload(lv);
+ return lv_update_and_reload_origin(lv);
+}
- /* raid10 with 2 images -> raid1 with 2 images */
- } else if (seg_is_raid10(seg) && seg->area_count == 2 &&
- segtype_is_raid1(new_segtype)) {
- seg->segtype = new_segtype;
+/* Helper: linear/raid0 with 1 image <-> raid1/4/5 takeover handler for @lv */
+TAKEOVER_HELPER_FN(_linear_raid14510)
+{
+ struct lv_segment *seg;
+ struct dm_list data_lvs, meta_lvs;
+ struct segment_type *segtype;
- return lv_update_and_reload(lv);
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
-#if 0
- /* raid10 with N > 2 images -> raid10 with M images */
- } else if ((convert = (seg_is_raid10(seg) && seg->segtype == new_segtype &&
- new_image_count != seg->area_count))) {
- /* Need to support raid10_copies and raid_format! */
- ;
+ dm_list_init(&data_lvs);
+ dm_list_init(&meta_lvs);
+
+ if ((segtype_is_raid4(new_segtype) || segtype_is_any_raid5(new_segtype)) &&
+ (seg->area_count != 1 || new_image_count != 2)) {
+ log_error("Can't convert %s from %s to %s != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_LINEAR, new_segtype->name);
+ return 0;
+ }
+#if 1
+ /* HM FIXME: elaborate this raid4 restriction more! */
+ if (segtype_is_raid4(new_segtype)) {
+ log_error("Can't convert %s from %s to %s, please use %s",
+ display_lvname(lv), SEG_TYPE_NAME_LINEAR,
+ SEG_TYPE_NAME_RAID4, SEG_TYPE_NAME_RAID5);
+ return 0;
+ }
#endif
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_data_copies, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ new_image_count = new_image_count > 1 ? new_image_count : 2;
+
+ /* HM FIXME: overloading force argument to avoid metadata update in _linear_raid0() */
+ /* Use helper _linear_raid0() to create the initial raid0_meta with one image pair up */
+ if (!(segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META)) ||
+ !_linear_raid0(lv, segtype, 0, 1 /* force */, 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs))
+ return 0;
+
+ /* Allocate the additional meta and data LVs requested */
+ log_debug_metadata("Allocating %u additional data and metadata image pairs for %s",
+ new_image_count - 1, display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, NULL))
+ return 0;
+
+ seg = first_seg(lv);
+ seg->segtype = new_segtype;
+ seg->data_copies = new_data_copies;
+ seg->stripe_size = new_stripe_size;
+ seg->region_size = new_region_size;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, NULL, NULL);
+}
+
+/* Helper: striped/raid0* -> raid4/5/6/10 */
+TAKEOVER_HELPER_FN(_striped_raid0_raid45610)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg->area_count, "area count");
- /* raid4/5 with 2 images -> linear/raid0 with 1 image */
- } else if ((convert = (seg_is_raid4(seg) || seg_is_any_raid5(seg)) && seg->area_count == 2 &&
- (segtype_is_linear(new_segtype) || segtype_is_any_raid0(new_segtype))))
- new_image_count = 1;
+PFLA("data_copies=%u", new_data_copies);
- /* No way to convert raid4/5/6 with > 2 images -> linear! */
- else if ((seg_is_raid4(seg) || seg_is_any_raid5(seg) || seg_is_any_raid6(seg)) &&
- segtype_is_linear(new_segtype))
+ if (segtype_is_raid10_offset(new_segtype)) {
+ log_error("Can't convert LV %s to %s",
+ display_lvname(lv), new_segtype->name);
return 0;
+ }
- if (!seg->stripe_size)
- seg->stripe_size = new_stripe_size ?: DEFAULT_STRIPESIZE;
+ RETURN_IF_NONZERO(segtype_is_any_raid10(new_segtype) && new_data_copies < 2, "#data_copies > 1");
- if (convert) {
- if ((segtype_is_raid0(new_segtype) || segtype_is_raid1(new_segtype)) && new_stripes) {
- log_error("--stripes N incompatible with raid0/1");
+ if (new_data_copies > (segtype_is_raid10_far(new_segtype) ? seg->area_count : new_image_count)) {
+ log_error("N number of data_copies \"--mirrors N-1\" may not be larger than number of stripes");
+ return 0;
+ }
+
+ if (new_stripes && new_stripes != seg->area_count) {
+ log_error("Can't restripe LV %s during conversion", display_lvname(lv));
+ return 0;
+ }
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_data_copies, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /* This helper can be used to convert from striped/raid0* -> raid10 too */
+ if (seg_is_striped(seg)) {
+ log_debug_metadata("Coverting LV %s from %s to %s",
+ display_lvname(lv), SEG_TYPE_NAME_STRIPED, SEG_TYPE_NAME_RAID0);
+ if (!(seg = _convert_striped_to_raid0(lv, 1 /* alloc_metadata_devs */, 0 /* update_and_reload */, allocate_pvs)))
+ return 0;
+ }
+PFL();
+ /* Add metadata LVs */
+ if (seg_is_raid0(seg)) {
+ log_debug_metadata("Adding metadata LVs to %s", display_lvname(lv));
+ if (!_raid0_add_or_remove_metadata_lvs(lv, 0 /* !update_and_reload */, allocate_pvs, NULL))
return 0;
+ }
+PFL();
+ /* For raid10_far, we don't need additional image component pairs, just a size extension */
+ if (!segtype_is_raid10_far(new_segtype)) {
+ /* Add the additional component LV pairs */
+ log_debug_metadata("Adding component LV pairs to %s", display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, NULL))
+ return 0;
+ }
+PFL();
+ /* If this is any raid5 conversion request -> enforce raid5_n, because we convert from striped/raid0* */
+ if (segtype_is_any_raid5(new_segtype)) {
+ if (!segtype_is_raid5_n(new_segtype)) {
+ log_warn("Overwriting requested raid type %s with %s to allow for conversion",
+ new_segtype->name, SEG_TYPE_NAME_RAID5_N);
+ if (!(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID5_N)))
+ return 0;
}
+PFL();
+
+ /* If this is any raid6 conversion request -> enforce raid6_n_6, because we convert from striped/raid0* */
+ } else if (segtype_is_any_raid6(new_segtype)) {
+ if (!segtype_is_raid6_n_6(new_segtype)) {
+ log_warn("Overwriting requested raid type %s with %s to allow for conversion",
+ new_segtype->name, SEG_TYPE_NAME_RAID6_N_6);
+ if (!(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID6_N_6)))
+ return 0;
+ }
+PFL();
+
+ /* If this is a raid10 conversion request -> reorder the areas to suit it */
+ /* If user wants raid10_offset, reshape afterwards */
+ } else if (segtype_is_raid10_near(new_segtype)) {
+ seg->data_copies = new_data_copies;
+
+ log_debug_metadata("Reordering areas for raid0 -> raid10 takeover");
+ if (!_reorder_raid10_near_seg_areas(seg, reorder_to_raid10_near))
+ return 0;
+PFL();
-PFLA("linear/raid1/4/5 new_image_count=%u stripe_size=%u", new_image_count, seg->stripe_size);
- return _lv_raid_change_image_count(lv, new_segtype, new_image_count, allocate_pvs);
+ } else if (segtype_is_raid10_far(new_segtype)) {
+ seg->data_copies = 1;
+ if (!_lv_raid10_resize_data_copies(lv, new_segtype, new_data_copies, allocate_pvs))
+ return 0;
+PFL();
+ } else if (!segtype_is_raid4(new_segtype)) {
+ /* Can't convert striped/raid0* to e.g. raid10_offset */
+ log_error("Can't convert %s to %s", display_lvname(lv), new_segtype->name);
+ return 0;
}
+PFL();
+ seg->segtype = new_segtype;
+ seg->data_copies = new_data_copies;
+ seg->region_size = new_region_size;
+
+ if (!_check_and_init_region_size(lv))
+ return 0;
+
+ log_debug_metadata("Updating VG metadata and reloading %s LV %s",
+ lvseg_name(seg), display_lvname(lv));
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, NULL, NULL))
+ return 0;
+PFL();
+
+ /* If conversion to raid10, there are no rebuild images/slices -> trigger repair */
+ if ((seg_is_raid10_near(seg) || seg_is_raid10_far(seg)) &&
+ !_lv_cond_repair(lv))
+ return 0;
+PFL();
return 1;
}
-/* Return "linear" for striped @segtype instead of "striped" */
-static const char *_get_segtype_name(const struct segment_type *segtype, unsigned new_image_count)
+
+/* raid0 -> linear */
+TAKEOVER_HELPER_FN(_raid0_linear)
{
- return (segtype_is_striped(segtype) && new_image_count == 1) ? "linear" : segtype->name;
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg->area_count == 1, "area count == 1");
+
+ dm_list_init(&removal_lvs);
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ log_debug_metadata("Converting %s from %s to %s",
+ display_lvname(lv),
+ SEG_TYPE_NAME_RAID0, SEG_TYPE_NAME_LINEAR);
+ if (!_convert_raid_to_linear(lv, &removal_lvs))
+ return_0;
+
+ if (!(first_seg(lv)->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ /* HM FIXME: overloading force argument here! */
+ if (force)
+ return 1;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
}
-/*
- * Report current number of redundant disks for @total_images and @segtype
- */
-static void _seg_get_redundancy(const struct segment_type *segtype, unsigned total_images, unsigned *nr)
+/* Helper: raid0* with one image -> mirror */
+TAKEOVER_HELPER_FN(_raid0_mirror)
{
- if (segtype_is_raid10(segtype))
- *nr = total_images / 2; /* Only if one in each stripe failing */
+ struct lv_segment *seg;
+ struct segment_type *segtype;
- else if (segtype_is_raid1(segtype))
- *nr = total_images - 1;
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
- else if (segtype_is_raid4(segtype) ||
- segtype_is_any_raid5(segtype) ||
- segtype_is_any_raid6(segtype))
- *nr = segtype->parity_devs;
+ if (seg->area_count != 1)
+ return _error(lv, new_segtype, yes, force, 0, 1 /* data_copies */, 0, 0, 0, NULL);
- else
- *nr = 0;
+ new_image_count = new_image_count > 1 ? new_image_count : 2;
+
+ if (!_check_max_mirror_devices(new_image_count))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_image_count, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (seg_is_raid0(first_seg(lv))) {
+ log_debug_metadata("Adding raid metadata device to %s",
+ display_lvname(lv));
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
+ return 0;
+ }
+
+ /* First convert to raid1... */
+ if (!(segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)))
+ return 0;
+
+ log_debug_metadata("Converting %s from %s to %s adding %u image component pairs",
+ display_lvname(lv),
+ lvseg_name(seg), new_segtype->name,
+ new_image_count - seg->area_count);
+ if (!_linear_raid14510(lv, segtype, 0, 0, new_image_count, new_image_count, 0 /* new_stripes */,
+ new_stripe_size, new_region_size, allocate_pvs))
+ return 0;
+
+ seg->region_size = new_region_size;
+
+ /* ...second convert to mirror */
+ log_debug_metadata("Converting %s from %s to %s",
+ display_lvname(lv),
+ segtype->name, new_segtype->name);
+ return _convert_raid1_to_mirror(lv, new_segtype, new_image_count, new_region_size,
+ allocate_pvs, 1 /* !update_and_reload */, NULL);
}
-/*
- * lv_raid_convert
- * @lv
- * @new_segtype
- *
- * Convert @lv from one RAID type (or 'mirror' segtype) to @new_segtype,
- * change RAID algorithm (e.g. left symmetric to right asymmetric),
- * add/remove LVs to/from a RAID LV or change stripe sectors
- *
- * Non dm-raid changes are factored in e.g. "mirror" and "striped" related
- * fucntions called from here.
- * All the rest of the raid <-> raid conversions go into a function
- * _convert_raid_to_raid() of their own called from here.
- *
- * Returns: 1 on success, 0 on failure
- */
-/*
- * [18:42] <lvmguy> agk: what has to be changed when getting "Performing unsafe table load while..."
- * [18:50] <agk> Ah, that depends on the context
- * [18:51] <agk> as you're doing something new, we need to look at the trace and work out what to do
- * [18:51] <agk> What it means is:
- * [18:52] <agk> if a device is suspended, i/o might get blocked and you might be unable to allocate memory
- * [18:52] <agk> doing a table load needs memory
- * [18:52] <agk> So if you have suspend + load, then you could get deadlock
- * [18:52] <agk> and it's warning about that
- * [18:52] <agk> but not every situation is like that - there are false positives
- * [18:53] <agk> So get the -vvvv trace from the command, then grep out the ioctls
- * [18:53] <agk> and look at the sequence and see what is supended at the time of the load
- * [18:54] <agk> IOW a suspend can cause a later table load to block - and it won't clear until you get a resume - but that resume depends on the load completing, which isn't going to happen
- * [18:54] <lvmguy> I thought it was trying to prevent OOM. need analyze the details...
- * [18:54] <agk> so the code normally does: load, suspend, resume in that order
- * [18:54] <agk> never suspend, load, resume
- * [18:55] <agk> but when you get complex operations all that dependency tree code tries to deal with this
- * [18:56] <lvmguy> yep, the sequences I have to do look like they fall into this latter realm ;)
- * [18:56] <agk> - it tries to sort all the operations on the various devices into a safe order in which to perform them
- * [18:58] <agk> So normally, (1) get the actual list of operations it's performing. (2) work out if there is an easy fix by performing them in a different order - if so, we work out how to change the code to do that (often needs hacks)
- * [18:59] <agk> - if not, then we look for an alternative strategy, usually by splitting operations into more than one step which can be done within the dependency rules
- * [19:02] <lvmguy> let me figure out dependency details then we can discuss
- * [19:03] <agk> - kabi is *very* familiar with fixing these sorts of problems:)
- * [19:04] <agk> - we had to go through it all for thin and cache
- * [19:04] <agk> But so far, we've not yet hit a situation we couldn't solve
- * [19:04] <lvmguy> k
- * */
-/*
- * TODO:
- * - review size calculations in raid1 <-> raid4/5
- * - review stripe size usage on conversion from/to striped/nonstriped segment types
- * - review reshape space alloc/free
- * - conversion raid0 -> raid10 only mentions redundancy = 1 instead of 1..#stripes maximum
- * - keep ti->len small on initial disk adding reshape and grow after it has finished
- * in order to avoid bio_endio in the targets map method?
- */
-int lv_raid_convert(struct logical_volume *lv,
- const struct segment_type *new_segtype,
- int yes, int force,
- unsigned new_image_count,
- const unsigned new_stripes,
- unsigned new_stripe_size,
- struct dm_list *allocate_pvs)
+/* raid0 with one image -> raid1 */
+TAKEOVER_HELPER_FN(_raid0_raid1)
{
- int r, segtype_change, stripe_size_change, y;
- unsigned cur_redundancy, new_redundancy;
- struct lv_segment *seg = first_seg(lv);
- const struct segment_type *final_segtype = NULL;
- const struct segment_type *new_segtype_tmp = new_segtype;
- const struct segment_type *striped_segtype;
- struct lvinfo info = { 0 };
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg->area_count == 1, "single area");
+ RETURN_IF_ZERO(seg_is_any_raid0(seg) && seg->area_count == 1,
+ "converson of non-raid0 LV or with area count != 1");
+
+ new_image_count = new_image_count > 1 ? new_image_count : 2;
- if (!new_segtype) {
- log_error(INTERNAL_ERROR "New segtype not specified");
+ if (!_check_max_raid_devices(new_image_count))
return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_image_count, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (seg_is_raid0(seg)) {
+ log_debug_metadata("Adding raid metadata device to %s",
+ display_lvname(lv));
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
+ return 0;
}
- if (!(striped_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ log_debug_metadata("Converting %s from %s to %s adding %u image component pairs",
+ display_lvname(lv),
+ lvseg_name(seg), new_segtype->name,
+ new_image_count - seg->area_count);
+ seg->segtype = new_segtype;
+ seg->region_size = new_region_size;
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, NULL))
+ return 0;
+
+ /* Master leg is the first sub LV */
+ seg_lv(seg, 0)->status &= ~LV_REBUILD;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, NULL, NULL);
+}
+
+/* Helper: mirror -> raid0* */
+TAKEOVER_HELPER_FN(_mirror_raid0)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (!seg_is_mirrored(seg)) {
+ log_error(INTERNAL_ERROR "Can't convert non-mirrored segment of LV %s",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!_lv_is_synced(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
return_0;
- /* Given segtype of @lv */
- if (!seg_is_striped(seg) && /* Catches linear = "overloaded striped with one area" as well */
- !seg_is_mirror(seg) &&
- !seg_is_raid(seg))
- goto err;
+ log_debug_metadata("Converting mirror LV %s to raid", display_lvname(lv));
+ if (!_convert_mirror_to_raid(lv, new_segtype, 1, new_region_size, allocate_pvs,
+ 0 /* update_and_reaload */, &removal_lvs))
+ return 0;
- /* Requested segtype */
- if (!segtype_is_linear(new_segtype) &&
- !segtype_is_striped(new_segtype) &&
- !segtype_is_mirror(new_segtype) &&
- !segtype_is_raid(new_segtype))
- goto err;
+ if (segtype_is_raid0(new_segtype)) {
+ /* Remove rmeta LVs */
+ log_debug_metadata("Extracting and renaming metadata LVs from LV %s",
+ display_lvname(lv));
+ if (!_extract_image_component_list(seg, RAID_META, 0, &removal_lvs))
+ return 0;
+ }
- /* Can't convert from linear to raid6 directly! */
- if (seg_is_linear(seg) &&
- segtype_is_any_raid6(new_segtype))
- goto err;
-
- /* Define new image count if not passed in */
- new_image_count = new_image_count ?: seg->area_count;
+ seg->segtype = new_segtype;
+ seg->region_size = 0;
- /* Define new stripe size if not passed in */
- new_stripe_size = new_stripe_size ?: seg->stripe_size;
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
- segtype_change = seg->segtype != new_segtype;
- stripe_size_change = !seg->stripe_size && seg->stripe_size != new_stripe_size;
- if (segtype_change && stripe_size_change) {
- log_error("Can't change raid type and stripe size at once on %s/%s",
- lv->vg->name, lv->name);
+/* Helper: convert mirror with 2 images <-> raid4/5 */
+TAKEOVER_HELPER_FN(_mirror_r45)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (!seg_is_mirror(seg) ||
+ seg->area_count != 2) {
+ log_error("Can't convert %s between %s and %s/%s with != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_MIRROR,
+ SEG_TYPE_NAME_RAID4, SEG_TYPE_NAME_RAID5);
return 0;
}
- /* @lv has to be active locally */
- if (vg_is_clustered(lv->vg) && !lv_is_active_exclusive_locally(lv)) {
- log_error("%s/%s must be active exclusive locally to"
- " perform this operation.", lv->vg->name, lv->name);
+ if (!_lv_is_synced(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 2, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (segtype_is_mirror(new_segtype)) {
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)) ||
+ !_convert_raid1_to_mirror(lv, new_segtype, 2, new_region_size, allocate_pvs,
+ 0 /* !update_and_reload */, &removal_lvs))
+ return 0;
+
+ } else if (!_convert_mirror_to_raid(lv, new_segtype, 0, new_region_size, NULL, 0 /* update_and_reaload */, NULL))
+ return 0;
+
+ seg->region_size = new_region_size;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* Helper: raid1 -> raid0* */
+TAKEOVER_HELPER_FN(_raid1_raid0)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (!seg_is_raid1(seg)) {
+ log_error(INTERNAL_ERROR "Can't convert non-raid1 LV %s",
+ display_lvname(lv));
return 0;
}
- if (!_raid_in_sync(lv)) {
- log_error("Unable to convert %s/%s while it is not in-sync",
- lv->vg->name, lv->name);
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ seg->segtype = new_segtype;
+ if (!_lv_change_image_count(lv, 1, allocate_pvs, &removal_lvs))
+ return 0;
+
+ /* Remove rmeta last LV if raid0 */
+ if (segtype_is_raid0(new_segtype)) {
+ log_debug_metadata("Extracting and renaming metadata LVs frim LV %s",
+ display_lvname(lv));
+ if (!_extract_image_component_list(seg, RAID_META, 0, &removal_lvs))
+ return 0;
+ }
+
+ seg->region_size = 0;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* raid45 -> raid0* / striped */
+TAKEOVER_HELPER_FN(_r456_r0_striped)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+PFLA("new_stripes=%u new_image_count=%u", new_stripes, new_image_count);
+ if (!seg_is_raid4(seg) && !seg_is_raid5_n(seg) && !seg_is_raid6_n_6(seg)) {
+ log_error("LV %s has to be of type raid4/raid5_n/raid6_n_6 to allow for this conversion",
+ display_lvname(lv));
return 0;
}
- if (!lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
- log_error("Unable to retrieve logical volume information: aborting");
+ /* Necessary when convering to raid0/striped w/o redundancy? */
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ dm_list_init(&removal_lvs);
+
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s",
+ display_lvname(lv));
return 0;
}
- /* Get number of redundant disk for current and new segtype */
- _seg_get_redundancy(seg->segtype, seg->area_count, &cur_redundancy);
- _seg_get_redundancy(new_segtype, new_image_count = new_image_count ?: lv_raid_image_count(lv), &new_redundancy);
+ /* Remove meta and data LVs requested */
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, &removal_lvs))
+ return 0;
- /* Check conversions to "linear" and adjust type including support for "-m0" */
- if (((seg_is_raid4(seg) && seg->area_count == 2) ||
- (seg_is_any_raid5(seg) && seg->area_count == 2) ||
- seg_is_raid1(seg)) &&
- new_image_count == 1)
- new_segtype_tmp = striped_segtype;
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META)))
+ return_0;
+
+ if (segtype_is_striped(new_segtype)) {
+PFLA("seg->area_count=%u seg->len=%u seg->area_len=%u", seg->area_count, seg->len, seg->area_len);
+ if (!_convert_raid0_to_striped(lv, 0, &removal_lvs))
+ return_0;
+
+ } else if (segtype_is_raid0(new_segtype) &&
+ !_raid0_add_or_remove_metadata_lvs(lv, 0 /* update_and_reload */, allocate_pvs, &removal_lvs))
+ return_0;
+
+ seg = first_seg(lv);
+ seg->data_copies = 1;
+ seg->region_size = 0;
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* helper raid1 with N images or raid4/5* with 2 images <-> linear */
+TAKEOVER_HELPER_FN(_raid14510_linear)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ dm_list_init(&removal_lvs);
+PFL();
+ /* Only raid1 may have != 2 images when converting to linear */
+ if (!seg_is_raid1(seg) && seg->area_count > 2) {
+ log_error("Can't convert type %s LV %s with %u images",
+ lvseg_name(seg), display_lvname(lv), seg->area_count);
+ return 0;
+ }
+PFL();
+ if (!_raid_in_sync(lv))
+ return 0;
+PFL();
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+PFL();
/*
- * In case of any resilience related conversion -> ask the user unless "-y/--yes" on command line
+ * Have to remove any reshape space which my be a the beginning of
+ * the component data images or linear ain't happy about data content
*/
- /* HM FIXME: need to reorder redundany and conversion checks to avoid bogus user messages */
-PFLA("cur_redundancy=%u new_redundancy=%u", cur_redundancy, new_redundancy);
- y = yes;
- if (new_redundancy == cur_redundancy) {
- if (!new_stripes)
- log_info("INFO: Converting active%s %s/%s %s%s%s%s will keep "
- "resilience of %u disk failure%s",
- info.open_count ? " and open" : "", lv->vg->name, lv->name,
- seg->segtype != new_segtype_tmp ? "from " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(seg->segtype, seg->area_count) : "",
- seg->segtype != new_segtype_tmp ? " to " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(new_segtype_tmp, new_image_count) : "",
- cur_redundancy,
- (!cur_redundancy || cur_redundancy > 1) ? "s" : "");
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s",
+ display_lvname(lv));
+ return 0;
+ }
- else
- y = 1;
+ /* Reduce image count to one */
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)) ||
+ !_lv_change_image_count(lv, 1, allocate_pvs, &removal_lvs))
+ return 0;
- } else if (new_redundancy > cur_redundancy)
- log_info("INFO: Converting active%s %s/%s %s%s%s%s will extend "
- "resilience from %u disk failure%s to %u",
- info.open_count ? " and open" : "", lv->vg->name, lv->name,
- seg->segtype != new_segtype_tmp ? "from " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(seg->segtype, seg->area_count) : "",
- seg->segtype != new_segtype_tmp ? " to " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(new_segtype_tmp, new_image_count) : "",
- cur_redundancy,
- (!cur_redundancy || cur_redundancy > 1) ? "s" : "",
- new_redundancy);
+ if (!_convert_raid_to_linear(lv, &removal_lvs))
+ return_0;
- else if (new_redundancy &&
- new_redundancy < cur_redundancy)
- log_warn("WARNING: Converting active%s %s/%s %s%s%s%s will reduce "
- "resilience from %u disk failures to just %u",
- info.open_count ? " and open" : "", lv->vg->name, lv->name,
- seg->segtype != new_segtype_tmp ? "from " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(seg->segtype, seg->area_count) : "",
- seg->segtype != new_segtype_tmp ? " to " : "",
- seg->segtype != new_segtype_tmp ? _get_segtype_name(new_segtype_tmp, new_image_count) : "",
- cur_redundancy, new_redundancy);
+ first_seg(lv)->region_size = 0;
- else if (!new_redundancy && cur_redundancy)
- log_warn("WARNING: Converting active%s %s/%s from %s to %s will loose "
- "all resilience to %u disk failure%s",
- info.open_count ? " and open" : "", lv->vg->name, lv->name,
- _get_segtype_name(seg->segtype, seg->area_count),
- _get_segtype_name(new_segtype_tmp, new_image_count),
- cur_redundancy, cur_redundancy > 1 ? "s" : "");
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
- else
- y = 1;
+/* Helper: raid1 with N images to M images (N != M) and raid4/5 to raid6* */
+TAKEOVER_HELPER_FN(_raid145_raid1_raid6)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_NONZERO(!seg_is_raid1(seg) && !seg_is_raid4(seg) && !seg_is_any_raid5(seg),
+ "valid segment type");
+
+ dm_list_init(&removal_lvs);
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_data_copies, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, &removal_lvs))
+ return 0;
+
+ seg->segtype = new_segtype;
+ if (segtype_is_raid1(new_segtype)) {
+ seg->stripe_size = 0;
+ seg->data_copies = new_image_count;
+ } else
+ seg->data_copies = new_data_copies;
+
+ seg->region_size = new_region_size;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* Helper: raid1/5 with 2 images <-> raid4/5/10 or raid4 <-> raid5_n with any image count (no change to count!) */
+TAKEOVER_HELPER_FN(_raid145_raid4510)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ if (!seg_is_raid1(seg) &&
+ !seg_is_raid4(seg) &&
+ !seg_is_any_raid5(seg)) {
+ log_error(INTERNAL_ERROR "Called on LV %s with wrong segment type %s",
+ display_lvname(lv), lvseg_name(seg));
+ return 0;
+ }
+
+ if (segtype_is_any_raid10(new_segtype)) {
+ if (!segtype_is_raid10_near(new_segtype)) {
+ log_error("Conversion of LV %s to raid10 has to be to raid10_near",
+ display_lvname(lv));
+ return 0;
+ }
+ seg->data_copies = seg->area_count;
+ }
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (new_image_count)
+ log_error("Ignoring new image count for %s", display_lvname(lv));
+
+ /* Overwrite image count */
+ new_image_count = seg->area_count;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 2, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /*
+ * In case I convert to a non-reshapable mapping, I have to remove
+ * any reshape space which may be at the beginning of the component
+ * data images or the data content will be mapped to an offset
+ */
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s", display_lvname(lv));
+ return 0;
+ }
- /* Support "-mN" option from linear to raid1 */
- if ((seg_is_linear(seg) || (seg_is_any_raid0(seg) && seg->area_count == 1)) &&
- seg->segtype == new_segtype && new_image_count > 1)
- if (!(new_segtype_tmp = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID1)))
+ if (seg_is_raid4(seg) && segtype_is_any_raid5(new_segtype)) {
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID5_N)))
return_0;
+ } else
+ seg->segtype = new_segtype;
- if (!y) {
- if (segtype_change &&
- yes_no_prompt("Do you really want to convert %s/%s with type %s to %s? [y/n]: ",
- lv->vg->name, lv->name,
- _get_segtype_name(seg->segtype, seg->area_count),
- _get_segtype_name(new_segtype_tmp, new_image_count)) == 'n') {
- log_error("Logical volume %s/%s NOT converted", lv->vg->name, lv->name);
+ seg->stripe_size = new_stripe_size ?: DEFAULT_STRIPESIZE;
+ seg->region_size = new_region_size;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, NULL, NULL);
+}
+
+/* Helper: raid10 -> striped/raid0/raid01 */
+TAKEOVER_HELPER_FN_REMOVAL_LVS(_raid10_striped_r0)
+{
+ int raid10_far;
+ struct lv_segment *seg;
+ uint32_t data_copies;
+#if 0
+ /* Save data_copies and le_count for raid10_far conversion */
+ uint32_t le_count = lv->le_count;
+#endif
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ raid10_far = seg_is_raid10_far(seg);
+ data_copies = seg->data_copies;
+ RETURN_IF_ZERO(data_copies, "data copies > 0");
+
+ if (!segtype_is_striped(new_segtype) &&
+ !segtype_is_any_raid0(new_segtype)) {
+ log_error(INTERNAL_ERROR "Called for %s", new_segtype->name);
+ return 0;
+ }
+
+ if (seg_is_raid10_offset(seg)) {
+ log_error("Can't convert %s LV %s to %s",
+ lvseg_name(seg), display_lvname(lv), new_segtype->name);
+ log_error("Please use \"lvcovert --duplicate ...\"");
+ return 0;
+ }
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s", display_lvname(lv));
+ return 0;
+ }
+
+ if (seg_is_raid10_near(seg)) {
+ /* Don't reduce seg->data_copies before reordering! */
+ log_debug_metadata("Reordering areas for %s LV %s -> %s takeover",
+ lvseg_name(seg), display_lvname(lv), new_segtype->name);
+ if (!_reorder_raid10_near_seg_areas(seg, reorder_from_raid10_near))
+ return 0;
+
+ new_image_count = seg->area_count / seg->data_copies;
+
+ /* Remove the last half of the meta and data image pairs */
+ log_debug_metadata("Removing data and metadata image LV pairs from %s", display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, removal_lvs))
+ return 0;
+
+ /* Adjust raid10_near size to raid0/striped */
+ RETURN_IF_ZERO(seg_type(seg, 0) == AREA_LV, "first data sub lv");
+ seg->area_len = seg_lv(seg, 0)->le_count;
+ seg->len = seg->area_len * seg->area_count;
+ lv->le_count = seg->len;
+
+ /* raid10_far: shrink LV size to striped/raid0* */
+ } else if (raid10_far && !_lv_raid10_resize_data_copies(lv, new_segtype, 1, NULL)) {
+ log_error("Failed to reduce raid10_far LV %s to %s size",
+ display_lvname(lv), new_segtype->name);
+ return 0;
+ }
+
+ seg->data_copies = 1;
+
+PFLA("seg->len=%u seg->area_len=%u seg->area_count=%u", seg->len, seg->area_len, seg->area_count);
+
+ if (segtype_is_striped(new_segtype)) {
+ /* -> striped */
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META)))
+ return_0;
+
+ if (!_convert_raid0_to_striped(lv, 0, removal_lvs))
+ return 0;
+
+ seg = first_seg(lv);
+
+ /* -> raid0 (no mnetadata images) */
+ } else if (segtype_is_raid0(new_segtype) &&
+ !_raid0_add_or_remove_metadata_lvs(lv, 0 /* update_and_reload */, allocate_pvs, removal_lvs))
+ return 0;
+
+PFLA("seg->stripe_size=%u", seg->stripe_size);
+PFLA("seg->chunk_size=%u", seg->chunk_size);
+ seg->segtype = new_segtype;
+ seg->region_size = 0;
+
+ /* HM FIXME: overloading force argument here! */
+ return force ? 1 : _lv_update_reload_fns_reset_eliminate_lvs(lv, removal_lvs, NULL);
+}
+
+/* Helper: raid10 with 2/N (if appropriate) images <-> raid1/raid4/raid5* */
+TAKEOVER_HELPER_FN(_raid10_r1456)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg->data_copies, "data copies > 0");
+ RETURN_IF_ZERO(new_segtype, "lv new segment type argument");
+
+ dm_list_init(&removal_lvs);
+
+ if (seg_is_any_raid10(seg)) {
+ if (!seg_is_raid10_near(seg)) {
+ log_error(INTERNAL_ERROR "Can't takeover %s LV %s",
+ lvseg_name(seg), display_lvname(lv));
return 0;
}
- if (stripe_size_change &&
- yes_no_prompt("Do you really want to convert %s/%s from stripesize %d to stripesize %d? [y/n]: ",
- lv->vg->name, lv->name,
- seg->stripe_size, new_stripe_size) == 'n') {
- log_error("Logical volume %s/%s NOT converted", lv->vg->name, lv->name);
+ if (seg->data_copies != seg->area_count) {
+ log_error(INTERNAL_ERROR "Can't takeover %s LV %s with data copies != areas!",
+ lvseg_name(seg), display_lvname(lv));
return 0;
}
+
+ } else if (seg->area_count != 2 ) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv), lvseg_name(seg), new_segtype->name);
+ return 0;
}
- if (sigint_caught())
+
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 2, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
return_0;
- /* Now archive metadata after the user has confirmed */
+ /* Free any reshape space */
+ if (!_lv_free_reshape_space(lv)) {
+ log_error(INTERNAL_ERROR "Failed to free reshape space of %s", display_lvname(lv));
+ return 0;
+ }
+
+ seg->segtype = new_segtype;
+ seg->region_size = new_region_size;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+/* End takeover helper funtions */
+
+/*
+ * Begin all takeover functions referenced via the 2-dimensional _takeover_fn[][] matrix
+ */
+/* Linear -> raid0 */
+TAKEOVER_FN(_l_r0)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _lv_has_segments_with_n_areas(lv, 1) &&
+ _linear_raid0(lv, new_segtype, yes, force, 1, 1, 0, new_stripe_size, 0, allocate_pvs);
+}
+
+/* Linear -> raid1 */
+TAKEOVER_FN(_l_r1)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _lv_has_segments_with_n_areas(lv, 1) &&
+ _linear_raid14510(lv, new_segtype, yes, force,
+ new_image_count, new_image_count,
+ 0 /* new_stripes */, 0, new_region_size,
+ allocate_pvs);
+}
+
+/* Linear -> raid4/5 */
+TAKEOVER_FN(_l_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return (_lv_has_segments_with_n_areas(lv, 1) &&
+ _linear_raid14510(lv, new_segtype, yes, force,
+ 2 /* new_image_count */, 2, 0 /* new_stripes */,
+ new_stripe_size, new_region_size, allocate_pvs));
+}
+
+/* Linear -> raid10 */
+TAKEOVER_FN(_l_r10)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _lv_has_segments_with_n_areas(lv, 1) &&
+ _linear_raid14510(lv, new_segtype, yes, force,
+ 2 /* new_image_count */ , 2, 0 /* new_stripes */,
+ new_stripe_size, new_region_size, allocate_pvs);
+}
+
+/* HM Helper: convert @lv from striped -> raid0(_meta) */
+static int _striped_raid0(struct logical_volume *lv,
+ const struct segment_type *new_segtype,
+ int yes, int force, int alloc_metadata_devs,
+ struct dm_list *allocate_pvs)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+ if (alloc_metadata_devs)
+ RETURN_IF_ZERO(allocate_pvs && !dm_list_empty(allocate_pvs), "allocate pvs");
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, 0, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
if (!archive(lv->vg))
return_0;
- new_segtype = new_segtype_tmp;
+ return _convert_striped_to_raid0(lv, alloc_metadata_devs, 1 /* update_and_reload */, allocate_pvs) ? 1 : 0;
+}
+
+/* Striped -> raid0 */
+TAKEOVER_FN(_s_r0)
+{
+ return _striped_raid0(lv, new_segtype, yes, force, 0, allocate_pvs);
+}
+
+/* Striped -> raid0_meta */
+TAKEOVER_FN(_s_r0m)
+{
+ return _striped_raid0(lv, new_segtype, yes, force, 1, allocate_pvs);
+}
+/* Striped -> raid4/5 */
+TAKEOVER_FN(_s_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
- /****************************************************************************/
- /* raid1 with N images -> raid1 with M images (N != M ) */
- if ((seg_is_linear(seg) || (seg_is_any_raid0(seg) && seg->area_count == 1) || seg_is_raid1(seg)) &&
- segtype_is_raid1(new_segtype)) {
- new_image_count = new_image_count > 1 ? new_image_count : 2;
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 1,
+ 2 /* data_copies*/, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* Striped -> raid6 */
+TAKEOVER_FN(_s_r6)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 2,
+ 3 /* data_copies*/, 0, 0, new_region_size, allocate_pvs);
+}
+
+TAKEOVER_FN(_s_r10)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force,
+ first_seg(lv)->area_count * new_data_copies,
+ new_data_copies, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* mirror -> raid0 */
+TAKEOVER_FN(_m_r0)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _mirror_raid0(lv, new_segtype, yes, force, 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* mirror -> raid0_meta */
+TAKEOVER_FN(_m_r0m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _mirror_raid0(lv, new_segtype, yes, force, 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* Mirror -> raid1 */
+TAKEOVER_FN(_m_r1)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, new_image_count, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ return _convert_mirror_to_raid(lv, new_segtype, new_image_count, new_region_size,
+ allocate_pvs, 1 /* update_and_reaload */, &removal_lvs);
+}
+
+/* Mirror with 2 images -> raid4/5 */
+TAKEOVER_FN(_m_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _mirror_r45(lv, new_segtype, yes, force, 0, 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* Mirror with 2 images -> raid10 */
+TAKEOVER_FN(_m_r10)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
- /* HM FIXME: yes_no_prompt() ? */
- return _lv_raid_change_image_count(lv, new_segtype, new_image_count, allocate_pvs);
+ seg = first_seg(lv);
+ if (seg->area_count != 2) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_MIRROR, new_segtype->name);
+ return 0;
}
+ if (!_lv_is_synced(lv))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (!_convert_mirror_to_raid(lv, new_segtype, 0, new_region_size, NULL, 0 /* update_and_reaload */, NULL))
+ return 0;
+
+ seg->segtype = new_segtype;
+
+ return lv_update_and_reload(lv);;
+}
+
+
+/* raid0 -> linear */
+TAKEOVER_FN(_r0_l)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_linear(lv, new_segtype, yes, force, 0, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid0 with one image -> mirror */
+TAKEOVER_FN(_r0_m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_mirror(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0 -> raid0_meta */
+TAKEOVER_FN(_r0_r0m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ return _raid0_add_or_remove_metadata_lvs(lv, 1, allocate_pvs, NULL);
+}
+
+/* raid0 -> striped */
+TAKEOVER_FN(_r0_s)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ return _convert_raid0_to_striped(lv, 1, &removal_lvs);
+}
+
+/* raid0 with one image -> raid1 */
+TAKEOVER_FN(_r0_r1)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_raid1(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0 -> raid4/5_n */
+TAKEOVER_FN(_r0_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 1,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0 -> raid6_n_6 */
+TAKEOVER_FN(_r0_r6)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 2,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0 with N images (N > 1) -> raid10 */
+TAKEOVER_FN(_r0_r10)
+{
+ uint32_t data_copies = new_data_copies;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (segtype_is_raid10_near(new_segtype) && data_copies == 1)
+ data_copies++;
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count * new_data_copies,
+ new_data_copies, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0_meta -> */
+TAKEOVER_FN(_r0m_l)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_linear(lv, new_segtype, yes, force, 0, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid0_meta -> mirror */
+TAKEOVER_FN(_r0m_m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_mirror(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0_meta -> raid0 */
+TAKEOVER_FN(_r0m_r0)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ dm_list_init(&removal_lvs);
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ return _raid0_add_or_remove_metadata_lvs(lv, 1, allocate_pvs, &removal_lvs);
+}
+
+/* raid0_meta -> striped */
+TAKEOVER_FN(_r0m_s)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ return _convert_raid0_to_striped(lv, 1, &removal_lvs);
+}
+
+/* raid0_meta wih 1 image -> raid1 */
+TAKEOVER_FN(_r0m_r1)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid0_raid1(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0_meta -> raid4/5_n */
+TAKEOVER_FN(_r0m_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 1,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid0_meta -> raid6_n_6 */
+TAKEOVER_FN(_r0m_r6)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count + 2,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+
+/* raid0_meta wih 1 image -> raid10 */
+TAKEOVER_FN(_r0m_r10)
+{
+ uint32_t data_copies = new_data_copies;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (segtype_is_raid10_near(new_segtype) && data_copies == 1)
+ data_copies++;
+
+ return _striped_raid0_raid45610(lv, new_segtype, yes, force, first_seg(lv)->area_count * data_copies,
+ data_copies, 0, 0, new_region_size, allocate_pvs);
+}
+
+
+/* raid1 with N images -> linear */
+TAKEOVER_FN(_r1_l)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ return _raid14510_linear(lv, new_segtype, yes, force, 1, 1, 0, 0, 0, allocate_pvs);
+}
+
+/* raid1 with N images -> striped */
+TAKEOVER_FN(_r1_s)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ return _raid14510_linear(lv, new_segtype, yes, force, 1, 1, 0, 0, 0, allocate_pvs);
+}
+
+/* raid1 -> mirror */
+TAKEOVER_FN(_r1_m)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (!_raid_in_sync(lv))
+ return 0;
- /****************************************************************************/
/*
- * Linear/raid0 <-> raid0/1/4/5 conversions
+ * FIXME: support this conversion or don't invite users to switch back to "mirror"?
+ * I find this at least valuable in case of an erroneous conversion to raid1
*/
- if (!_convert_linear_or_raid0_to_raid0145(lv, new_segtype,
- new_image_count, new_stripes, new_stripe_size,
- allocate_pvs))
+ if (!yes && yes_no_prompt("WARNING: Do you really want to convert %s to "
+ "non-recommended \"%s\" type? [y/n]: ",
+ display_lvname(lv), SEG_TYPE_NAME_MIRROR) == 'n') {
+ log_warn("Logical volume %s NOT converted to \"%s\"",
+ display_lvname(lv), SEG_TYPE_NAME_MIRROR);
return 0;
+ }
+ if (sigint_caught())
+ return_0;
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
- /****************************************************************************/
+ return _convert_raid1_to_mirror(lv, new_segtype, new_image_count, new_region_size,
+ allocate_pvs, 1, &removal_lvs);
+}
+
+
+/* raid1 -> raid0 */
+TAKEOVER_FN(_r1_r0)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid1_raid0(lv, new_segtype, yes, force, 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid1 -> raid0_meta */
+TAKEOVER_FN(_r1_r0m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _raid1_raid0(lv, new_segtype, yes, force, 1, 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+TAKEOVER_FN(_r1_r1)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg_is_raid1(seg), "raid1 segment");
+ RETURN_IF_ZERO(segtype_is_raid1(new_segtype), "raid1 new segment type");
+ RETURN_IF_NONZERO(_lv_is_duplicating(lv), "duplicating LV allowed");
+
+ if (seg->area_count == new_data_copies) {
+ log_error("No change in number of mirrors in %s", display_lvname(lv));
+ return 0;
+ }
+
+ return _raid145_raid1_raid6(lv, new_segtype, yes, force, new_image_count,
+ new_image_count, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid1 with 2 legs -> raid4/5 */
+TAKEOVER_FN(_r1_r45)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (first_seg(lv)->area_count != 2) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv),
+ SEG_TYPE_NAME_RAID1, new_segtype->name);
+ return 0;
+ }
+
+ return _raid145_raid4510(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+/****************************************************************************/
+
+/* raid1 with N legs or duplicating one -> raid10_near */
+TAKEOVER_FN(_r1_r10)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (!segtype_is_raid10_near(new_segtype)) {
+ log_error("Conversion of %s to %s prohibited",
+ display_lvname(lv), new_segtype->name);
+ log_error("Please use \"lvconvert --duplicate ...\"");
+ return 1;
+ }
+
+ return _raid145_raid4510(lv, new_segtype, yes, force, new_image_count,
+ 1 /* data_copies */, 0 /* stripes */, 0, new_region_size, allocate_pvs);
+}
+
+/* raid45 with 2 images -> linear */
+TAKEOVER_FN(_r45_l)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (first_seg(lv)->area_count != 2) {
+ log_error("Can't convert %s from %s/%s to %s with != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_RAID4,
+ SEG_TYPE_NAME_RAID5, SEG_TYPE_NAME_LINEAR);
+ return 0;
+ }
+
+ return _raid14510_linear(lv, new_segtype, yes, force, 1, 1, 0, 0, 0, allocate_pvs);
+}
+
+/* raid4/5 -> striped */
+TAKEOVER_FN(_r45_s)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+PFL();
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid4/5 with 2 images -> mirror */
+TAKEOVER_FN(_r45_m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _mirror_r45(lv, new_segtype, yes, force, 0, 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid4/5 -> raid0 */
+TAKEOVER_FN(_r45_r0)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid4/5 -> raid0_meta */
+TAKEOVER_FN(_r45_r0m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 1,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid4/5 with 2 images -> raid1 */
+TAKEOVER_FN(_r45_r1)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg_is_raid4(seg) || seg_is_any_raid5(seg), "raid4/5");
+
+ if (seg->area_count != 2) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv), lvseg_name(seg), SEG_TYPE_NAME_RAID1);
+ return 0;
+ }
+
+ return _raid145_raid4510(lv, new_segtype, yes, force, 2,
+ 1 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid4 <-> raid5_n */
+TAKEOVER_FN(_r45_r54)
+{
+ struct lv_segment *seg;
+ const struct segment_type *segtype_sav = new_segtype;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ if (!((seg_is_raid4(seg) && segtype_is_any_raid5(new_segtype)) ||
+ (seg_is_raid5_n(seg) && segtype_is_raid4(new_segtype)))) {
+ log_error(INTERNAL_ERROR "Called with %s -> %s on LV %s",
+ lvseg_name(seg), new_segtype->name, display_lvname(lv));
+ return 0;
+ }
+
+ if (seg_is_raid4(seg) &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID5_N)))
+ return_0;
+
+ if (segtype_sav != new_segtype)
+ log_warn("Adjust new segtype to %s to allow for takeover",
+ lvseg_name(seg));
+
+ seg->segtype = new_segtype;
+
+ return lv_update_and_reload(lv);
+}
+
+/* raid4/5* <-> raid6* */
+TAKEOVER_FN(_r45_r6)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ if (seg_is_raid4(seg)) {
+ const struct segment_type *segtype_sav = new_segtype;
+
+ if (segtype_is_any_raid5(new_segtype) &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID5_N)))
+ return_0;
+
+ else if (segtype_is_any_raid6(new_segtype) &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID6_N_6)))
+ return_0;
+
+ if (segtype_sav != new_segtype)
+ log_warn("Adjust new segtype to %s to allow for takeover",
+ lvseg_name(seg));
+ }
+
+ if (seg->area_count < 3) {
+ log_error("Please convert %s from 1 stripe to at least 2 with \"lvconvert --stripes 2 %s\" "
+ "first for this conversion",
+ display_lvname(lv), display_lvname(lv));
+ return 0;
+ }
+
+ if (seg_is_any_raid5(seg) &&
+ segtype_is_any_raid6(new_segtype) &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, _raid_seg_flag_5_to_6(seg)))) {
+ log_error(INTERNAL_ERROR "Failed to get raid5 -> raid6 conversion type");
+ return_0;
+ }
+
+ return _raid145_raid1_raid6(lv, new_segtype, yes, force, seg->area_count + 1,
+ 3 /* data_copies */, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid6 -> striped */
+TAKEOVER_FN(_r6_s)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid6 -> raid0 */
+TAKEOVER_FN(_r6_r0)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid6 -> raid0_meta */
+TAKEOVER_FN(_r6_r0m)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ return _r456_r0_striped(lv, new_segtype, yes, force, first_seg(lv)->area_count - 2,
+ 1 /* data_copies */, 0, 0, 0, allocate_pvs);
+}
+
+/* raid6* -> raid4/5* */
+TAKEOVER_FN(_r6_r45)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (segtype_is_raid4(new_segtype) &&
+ !seg_is_raid6_n_6(seg)) {
+ log_error("LV %s has to be of type %s to allow for this conversion",
+ display_lvname(lv), SEG_TYPE_NAME_RAID6_N_6);
+ return 0;
+ }
+
+ if ((seg_is_raid6_zr(seg) ||
+ seg_is_raid6_nc(seg) ||
+ seg_is_raid6_nr(seg)) &&
+ !segtype_is_raid6_n_6(new_segtype)) {
+ log_error("LV %s has to be of type %s,%s,%s,%s or %s to allow for direct conversion",
+ display_lvname(lv),
+ SEG_TYPE_NAME_RAID6_LS_6, SEG_TYPE_NAME_RAID6_LA_6,
+ SEG_TYPE_NAME_RAID6_RS_6, SEG_TYPE_NAME_RAID6_RA_6,
+ SEG_TYPE_NAME_RAID6_N_6);
+ return 0;
+ }
+
+ new_image_count = seg->area_count - 1;
+ seg->data_copies = 2;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_image_count, 1 /* data_copies */, 0, 0))
+ return 0;
+
+ dm_list_init(&removal_lvs);
+
+ /* Remove meta and data LVs requested */
+ log_debug_metadata("Removing one data and metadata image LV pair from %s", display_lvname(lv));
+ if (!_lv_change_image_count(lv, new_image_count, allocate_pvs, &removal_lvs))
+ return 0;
+
+ seg->segtype = new_segtype;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* raid10 with 2 images -> linear */
+TAKEOVER_FN(_r10_l)
+{
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ if (first_seg(lv)->area_count != 2) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_RAID10, SEG_TYPE_NAME_MIRROR);
+ return 0;
+ }
+
+ return _raid14510_linear(lv, new_segtype, yes, force, 1, 1, 0, 0, 0, allocate_pvs);
+}
+
+/* raid10 -> raid0* */
+TAKEOVER_FN(_r10_s)
+{
+ struct dm_list removal_lvs;
+PFL();
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ return _raid10_striped_r0(lv, new_segtype, yes, 0, 0, 1 /* data_copies */, 0, 0, 0, allocate_pvs, &removal_lvs);
+}
+
+/* raid10 with 2 images -> mirror */
+TAKEOVER_FN(_r10_m)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ if (seg->area_count != 2) {
+ log_error("Can't convert %s from %s to %s with != 2 images",
+ display_lvname(lv), SEG_TYPE_NAME_RAID10, SEG_TYPE_NAME_MIRROR);
+ return 0;
+ }
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, seg->area_count, seg->area_count, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /* HM FIXME: support -mN during this conversion */
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)) ||
+ !_convert_raid1_to_mirror(lv, new_segtype, new_image_count, new_region_size,
+ allocate_pvs, 0, &removal_lvs))
+ return 0;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* raid10 -> raid0 */
+TAKEOVER_FN(_r10_r0)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ return _raid10_striped_r0(lv, new_segtype, yes, 0, 0, 1 /* data_copies */, 0, 0, 0, allocate_pvs, &removal_lvs);
+}
+
+/* raid10 -> raid0_meta */
+TAKEOVER_FN(_r10_r0m)
+{
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, first_seg(lv), new_segtype);
+
+ dm_list_init(&removal_lvs);
+
+ return _raid10_striped_r0(lv, new_segtype, yes, 0, 0, 1 /* data_copies */, 0, 0, 0, allocate_pvs, &removal_lvs);
+}
+
+/* raid10 with 2 images -> raid1 */
+TAKEOVER_FN(_r10_r1)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+PFL();
+ return (seg_is_raid10_near(seg) && seg->data_copies == seg->area_count &&
+ _raid10_r1456(lv, new_segtype, yes, force, new_image_count, seg->data_copies,
+ seg->area_count, 0, new_region_size, allocate_pvs));
+}
+
+/* Helper: raid10_near with N images to M images (N != M) */
+TAKEOVER_HELPER_FN(_r10_r10)
+{
+ uint32_t data_copies;
+ struct lv_segment *seg;
+ const struct segment_type *raid10_segtype;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(new_data_copies > 1, "data copies argument");
+ RETURN_IF_ZERO(seg_is_raid10_near(seg), "raid10 near LV");
+ RETURN_IF_ZERO(segtype_is_raid10_near(new_segtype), "raid10 near requested");
+
+ if (seg->area_count % seg->data_copies) {
+ log_error("Can't change data copies on raid10_near LV %s with odd number of images",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (new_data_copies == seg->data_copies) {
+ log_error("No change in number of data copies on raid10_near LV %s",
+ display_lvname(lv));
+ return 0;
+ }
+
+ dm_list_init(&removal_lvs);
+
+ raid10_segtype = seg->segtype;
+ data_copies = seg->data_copies;
+
+ if (!_raid_in_sync(lv))
+ return 0;
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_data_copies, new_data_copies, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ log_debug_metadata("Reordering areas for %s image component LVs %s %s",
+ new_data_copies > seg->data_copies ? "adding" : "removing",
+ new_data_copies > seg->data_copies ? "to" : "from",
+ display_lvname(lv));
+ if (!_reorder_raid10_near_seg_areas(seg, reorder_from_raid10_near))
+ return 0;
+
+#if 0
+ if (!(seg->segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID4)))
+ return_0;
+
+ seg->data_copies = 1;
+#endif
+
+PFLA("seg->area_count=%u, new_count=%u", seg->area_count, seg->area_count / data_copies * new_data_copies);
+ if (!_lv_change_image_count(lv, seg->area_count / data_copies * new_data_copies,
+ allocate_pvs, &removal_lvs))
+ return 0;
+PFL();
+ seg->data_copies = new_data_copies;
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID0_META)))
+ return_0;
+PFL();
+ log_debug_metadata("Reordering back image component LVs of %s ",
+ display_lvname(lv));
+ if (!_reorder_raid10_near_seg_areas(seg, reorder_to_raid10_near))
+ return 0;
+
+ seg->segtype = raid10_segtype;
+PFL();
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/*
+ * raid01 (mirrors on top of stripes)
+ *
+ * Creates the image LVs with @stripes and @stripe_size
+ * for @lv starting at area @start, ending at area
+ * @end-1 and commits the MDAs
+ */
+static int _lv_create_raid01_image_lvs(struct logical_volume *lv,
+ struct lv_segment *seg,
+ const struct segment_type *segtype,
+ uint32_t image_extents,
+ uint32_t stripes, uint32_t stripe_size,
+ uint32_t start, uint32_t end,
+ struct dm_list *allocate_pvs)
+{
+ uint32_t data_copies = end, s, ss;
+ char *image_name;
+ struct logical_volume **image_lvs;
+
+ RETURN_IF_LV_SEGTYPE_ZERO(lv, segtype);
+ RETURN_IF_ZERO(image_extents, "image extents");
+ RETURN_IF_ZERO(stripes, "stripes");
+ RETURN_IF_ZERO(stripe_size, "stripe_size");
+ RETURN_IF_NONZERO(start > end || data_copies < 2, "proper end/start/data_copies");
+ RETURN_IF_ZERO((image_lvs = dm_pool_zalloc(lv->vg->vgmem, data_copies * sizeof(**image_lvs))),
+ "image_lvs memory");
+
+ /* Create the #data_copies striped LVs to put under raid1 */
+ log_debug_metadata("Creating %u stripe%s for %s",
+ data_copies, data_copies > 1 ? "s": "", display_lvname(lv));
+ block_signals(0);
+ for (s = start; s < end; s++) {
+ if (!(image_name = _generate_raid_name(lv, "rimage_", s)))
+ goto err;
+ /*
+ * Prevent any PVs holding image components of the just
+ * allocated striped LV from being used for allocation
+ */
+ for (ss = 0; ss < start; ss++) {
+ RETURN_IF_ZERO(seg_type(seg, ss) == AREA_LV, "segment area sub LV");
+
+ if (!_avoid_pvs_with_other_images_of_lv(seg_lv(seg, ss), allocate_pvs))
+ goto err;
+ }
+
+ for (ss = start; ss < s; ss++)
+ if (!_avoid_pvs_with_other_images_of_lv(image_lvs[ss - start], allocate_pvs))
+ goto err;
+
+ log_debug_metadata("Creating %s in array slot %u", image_name, s - start);
+ if (!(image_lvs[s - start] = _lv_create(lv->vg, image_name, segtype,
+ 1 /* data_copies */, 0 /* region_size */,
+ stripes, stripe_size, image_extents, CHANGE_AEY,
+ 1 /* zero */, NULL, allocate_pvs))) {
+ struct lv_list *lvl;
+
+ if ((lvl = find_lv_in_vg(lv->vg, image_name)))
+ unlink_lv_from_vg(lvl->lv);
+
+ goto err;
+ }
+
+ if (sigint_caught())
+ goto err;
+ }
+
+ for (s = start; s < end; s++) {
+ ss = s - start;
+ log_debug_metadata("Setting stripe segment area %u LV %s for %s", s,
+ display_lvname(image_lvs[ss]), display_lvname(lv));
+ if (!set_lv_segment_area_lv(seg, s, image_lvs[ss], 0 /* le */, RAID_IMAGE /* additional LV status */))
+ goto err;
+
+ lv_set_hidden(image_lvs[ss]);
+ }
+
+ _pvs_allow_allocation(allocate_pvs);
+ unblock_signals();
+
+ return 1;
+
+err:
+ _pvs_allow_allocation(allocate_pvs);
+
+ /* We failed allocating -> remove any already allocated new image LVs */
+ if (s - start) {
+ struct dm_list removal_lvs;
+ struct lv_list *lvl_array;
+
+ dm_list_init(&removal_lvs);
+
+ log_debug_metadata("Removing %u new allocated image LVs", s - start);
+ RETURN_IF_ZERO((lvl_array = dm_pool_alloc(lv->vg->vgmem, (s - start) * sizeof(*lvl_array))),
+ "lvl_array memory")
+ for (ss = 0; image_lvs[ss]; ss++) {
+ lvl_array[ss].lv = image_lvs[ss];
+ dm_list_add(&removal_lvs, &lvl_array[ss].list);
+ }
+
+ RETURN_IF_NONZERO(dm_list_empty(&removal_lvs), "image lvs");
+ _eliminate_extracted_lvs_optional_write_vg(lv->vg, &removal_lvs, 1);
+ }
+
+ unblock_signals();
+
+ return 0;
+}
+
+/* striped with any number of images to raid01 */
+TAKEOVER_FN(_s_r01)
+{
+ struct lv_segment *seg, *striped_seg;
+
+PFLA("new_data_copies=%u", new_data_copies);
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (striped_seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(new_data_copies > 1, "data copies > 1 argument");
+PFL();
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, new_data_copies, new_data_copies, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ log_debug_metadata("Converting LV %s to raid1", display_lvname(lv));
+ if (!(seg = _convert_lv_to_raid1(lv, "_rimage_0")))
+ return 0;
+PFL();
+ if (!(seg->segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID01)))
+ return_0;
+
+ log_debug_metadata("Reallocating segment areas of %s", display_lvname(lv));
+ if (!_realloc_seg_areas(lv, new_data_copies, RAID_IMAGE))
+ return 0;
+PFL();
+ /* Got te first striped sub LV after the raid1 conversion -> allocate the others */
+ if (!_lv_create_raid01_image_lvs(lv, seg, striped_seg->segtype, striped_seg->len,
+ striped_seg->area_count, striped_seg->stripe_size,
+ 1, new_data_copies, allocate_pvs))
+ return 0;
+
+ seg->area_count = seg->data_copies = new_data_copies;
+
+PFLA("seg->len=%u seg->area_len=%u", seg->len, seg->area_len);
+ /* Has to be set before calling _check_and_init_region_size() */
+ lv->le_count = seg->len;
+ lv->size = seg->len * lv->vg->extent_size;
+PFL();
+ if (!_check_and_init_region_size(lv))
+ return 0;
+
+ log_debug_metadata("Allocating %u metadata images for %s", new_data_copies, display_lvname(lv));
+ seg->meta_areas = NULL; /* Reset to force rmeta device creation in raid01 segment */
+
+ if (!_alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs))
+ return 0;
+PFL();
+ return lv_update_and_reload(lv);
+}
+
+/* Helper: find any one synced sub LV in @seg and return it or NULL */
+static int _get_any_synced_sub_lv(struct lv_segment *seg, uint32_t *ss)
+{
+ uint32_t s;
+
+ RETURN_IF_SEG_ZERO(seg);
+ RETURN_IF_ZERO(ss, "segment area pointer argument");
+
+ for (s = 0; s < seg->area_count; s++)
+ if (_lv_is_synced(seg_lv(seg, s))) {
+ *ss = s;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* raid01 with any number of data_copies to striped */
+TAKEOVER_FN(_r01_s)
+{
+ uint32_t keep_idx, stripe_size;
+ struct logical_volume *lv_tmp;
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+
+ dm_list_init(&removal_lvs);
+PFL();
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, first_seg(seg_lv(seg, 0))->area_count, 1, 0, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /* Find any one synced mirror and return its index in @s */
+ if (!_get_any_synced_sub_lv(seg, &keep_idx)) {
+ log_error("No mirror in sync!");
+ return 0;
+ }
+PFL();
/*
- * Mirror -> RAID1 conversion
+ * Extract all rmeta images of the raid1 top-level LV
+ * and all but the @keep_idx indexed striped data image.
*/
- if (seg_is_mirror(seg) && segtype_is_raid1(new_segtype))
- return _convert_mirror_to_raid1(lv, new_segtype);
+ if (!_lv_extract_all_images_but_one(lv, keep_idx, &removal_lvs))
+ return 0;
+
+ lv_tmp = seg_lv(seg, 0);
+ stripe_size = first_seg(lv_tmp)->stripe_size;
+
+ if (!_lv_reset_raid_add_to_list(lv_tmp, &removal_lvs))
+ return 0;
+
+ if (!remove_layer_from_lv(lv, lv_tmp))
+ return_0;
+
+ seg = first_seg(lv);
+ seg->stripe_size = stripe_size;
+PFL();
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/* Remove striped legs from raid01 */
+static int _raid01_remove_images(struct logical_volume *lv, uint32_t new_data_copies,
+ struct dm_list *removal_lvs)
+{
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ /* Extract any metadata LVs and the empty data LVs for disposal by the caller */
+ log_debug_metadata("Removing %u striped sub LVs from LV %s",
+ seg->data_copies - new_data_copies, display_lvname(lv));
+ if ((seg->meta_areas && !_extract_image_component_list(seg, RAID_META, new_data_copies, removal_lvs)) ||
+ !_extract_image_component_list(seg, RAID_IMAGE, new_data_copies, removal_lvs))
+ return_0;
+
+ seg->area_count = seg->data_copies = new_data_copies;
+
+ if (new_data_copies == 1 &&
+ !_convert_raid01_to_striped(lv, removal_lvs))
+ return 0;
+
+ return 1;
+}
+
+/* raid01 with any number of data_copies to raid10 */
+TAKEOVER_FN(_r01_r10)
+{
+ uint32_t data_copies, stripes;
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg_is_raid01(seg), "raid01 LV");
+
+ dm_list_init(&removal_lvs);
+ data_copies = new_data_copies > 1 ? new_data_copies : seg->data_copies;
+
+ RETURN_IF_ZERO(data_copies > 1, "data copies");
+
+ stripes = first_seg(seg_lv(seg, 0))->area_count * data_copies;
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, stripes, data_copies, stripes, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (!_raid01_remove_images(lv, 1, &removal_lvs))
+ return 0;
+
+ /* HM FIXME: this renders the LV to be striped, thus non-resilient; there should be a message itr */
+ /* Duplication avoids this interim non-resilience altogether... */
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL))
+ return 0;
+
+ return _striped_raid0_raid45610(lv, new_segtype, 1, force,
+ stripes, data_copies, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* raid10_near with any number of data_copies (stripes must be divisable by data_copies) to raid01 */
+TAKEOVER_FN(_r10_r01)
+{
+ uint32_t stripes;
+ struct lv_segment *seg;
+ struct segment_type *striped_segtype;
+ struct dm_list removal_lvs;
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(seg_is_raid10_near(seg), "raid10 LV");
+
+ if (seg->area_count % seg->data_copies) {
+ log_error("Can't convert raid10_near LV %s with number of stripes not divisable by number of data copies",
+ display_lvname(lv));
+ return 0;
+ }
+
+ dm_list_init(&removal_lvs);
+
+ stripes = seg->area_count;
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, stripes, new_data_copies, stripes, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ if (!(striped_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+PFL();
+ stripes /= seg->data_copies;
+ if (!_raid10_striped_r0(lv, striped_segtype, 1, 0, stripes, 1 /* data_copies */, stripes,
+ seg->stripe_size, 0, allocate_pvs, &removal_lvs))
+ return 0;
+PFL();
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL))
+ return 0;
+PFL();
+ return _s_r01(lv, new_segtype, 1, force,
+ seg->area_count, new_data_copies, 0, 0, new_region_size, allocate_pvs);
+}
+
+/* Change number of data_copies on raid01 */
+TAKEOVER_FN(_r01_r01)
+{
+ struct lv_segment *seg;
+ struct dm_list removal_lvs;
+
+ dm_list_init(&removal_lvs);
+
+ RETURN_IF_LV_SEG_SEGTYPE_ZERO(lv, (seg = first_seg(lv)), new_segtype);
+ RETURN_IF_ZERO(new_data_copies, "data copies");
+
+ if (new_data_copies == seg->data_copies) {
+ log_error("No different data copies for LV %s", display_lvname(lv));
+ return 0;
+ }
+
+ if (!_yes_no_conversion(lv, new_segtype, yes, force, 0, seg->area_count, new_data_copies, seg->area_count, 0))
+ return 0;
+
+ /* Archive metadata */
+ if (!archive(lv->vg))
+ return_0;
+
+ /* Add new striped sub LVs as mirrors aka data copies to raid01 */
+ if (new_data_copies > seg->data_copies) {
+ uint32_t s;
+ struct logical_volume *striped_lv = seg_lv(seg, 0);
+ struct lv_segment *striped_seg = first_seg(striped_lv);
+
+ if (!striped_seg ||
+ !seg_is_striped(striped_seg)) {
+ log_error("Bogus segment in first sub LV of LV %s", display_lvname(lv));
+ return 0;
+ }
+
+ log_debug_metadata("Reallocating segment areas of %s", display_lvname(lv));
+ if (!_realloc_meta_and_data_seg_areas(lv, new_data_copies))
+ return 0;
+
+ if (!_raid_in_sync(lv))
+ return 0;
+PFL();
+ /* Allocate the new, striped image sub LVs */
+ log_debug_metadata("Adding %u striped sub LVs to %s",
+ new_data_copies - seg->data_copies, display_lvname(lv));
+ if (!_lv_create_raid01_image_lvs(lv, seg, striped_seg->segtype, striped_lv->le_count,
+ striped_seg->area_count, striped_seg->stripe_size,
+ seg->data_copies, new_data_copies, allocate_pvs))
+ return 0;
+
+ log_debug_metadata("Allocating metadata images for the new sub LVs of %s", display_lvname(lv));
+ for (s = seg->area_count; s < new_data_copies; s++) {
+ uint32_t ss;
+
+ log_debug_metadata("Avoiding coallocation on PVs holding other sub LVs of %s",
+ display_lvname(lv));
+ for (ss = 0; ss < s; ss++)
+ if (!_avoid_pvs_with_other_images_of_lv(seg_metalv(seg, ss), allocate_pvs))
+ return 0;
+
+ if (!_alloc_rmeta_for_lv_add_set_hidden(lv, s, allocate_pvs))
+ return 0;
+
+ seg_lv(seg, s)->status |= LV_REBUILD;
+ }
+
+ _pvs_allow_allocation(allocate_pvs);
+
+ /* Remove mirrors aka data copies from raid01 */
+ } else if (!_raid01_remove_images(lv, new_data_copies, &removal_lvs))
+ return 0;
+
+ seg->area_count = seg->data_copies = new_data_copies;
+
+ return _lv_update_reload_fns_reset_eliminate_lvs(lv, &removal_lvs, NULL);
+}
+
+/*
+ * 2-dimensional takeover function matrix defining the
+ * FSM of possible/impossible or noop (i.e. requested
+ * conversion already given on the lv) conversions
+ *
+ * Rows define segtype from and columns segtype to
+ */
+static takeover_fn_t _takeover_fns[][10] = {
+ /* from, to -> linear striped mirror raid0 raid0_meta raid1 raid4/5 raid6 raid10 raid01 */
+ /* | */
+ /* v */
+ /* linear */ { _noop, _error, _error, _l_r0, _l_r0, _l_r1, _l_r45, _error, _l_r10 , _error },
+ /* striped */ { _error, _noop, _error, _s_r0, _s_r0m, _l_r1, _s_r45, _s_r6, _s_r10 , _s_r01 },
+ /* mirror */ { _error, _error, _noop, _m_r0, _m_r0m, _m_r1, _m_r45, _error, _m_r10 , _error },
+ /* raid0 */ { _r0_l, _r0_s, _r0_m, _noop, _r0_r0m, _r0_r1, _r0_r45, _r0_r6, _r0_r10 , _error },
+ /* raid0_meta */ { _r0m_l, _r0m_s, _r0m_m, _r0m_r0, _noop, _r0m_r1, _r0m_r45, _r0m_r6, _r0m_r10, _error },
+ /* raid1 */ { _r1_l, _r1_s, _r1_m, _r1_r0, _r1_r0m, _r1_r1, _r1_r45, _error, _r1_r10 , _error },
+ /* raid4/5 */ { _r45_l, _r45_s, _r45_m, _r45_r0, _r45_r0m, _r45_r1, _r45_r54, _r45_r6, _error , _error },
+ /* raid6 */ { _error, _r6_s, _error, _r6_r0, _r6_r0m, _error, _r6_r45, _error, _error , _error },
+ /* raid10 */ { _r10_l, _r10_s, _r10_m, _r10_r0, _r10_r0m, _r10_r1, _error, _error, _r10_r10, _r10_r01 },
+ /* raid01 */ { _error, _r01_s, _error, _error, _error, _error, _error, _error, _r01_r10, _r01_r01 },
+};
+
+/* End: various conversions between layers (aka MD takeover) */
+/****************************************************************************/
+
+/*
+ * Return 1 if provided @data_copies, @stripes, @stripe_size are
+ * possible for conversion from @seg_from to @segtype_to, else 0.
+ */
+static int _log_prohibited_option(const struct lv_segment *seg_from,
+ const struct segment_type *new_segtype,
+ const char *opt_str)
+{
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+ RETURN_IF_ZERO(new_segtype, "segment type argument");
+
+ if (seg_from->segtype == new_segtype)
+ log_error("Prohibited option %s provided to convert %s LV %s",
+ opt_str, lvseg_name(seg_from), display_lvname(seg_from->lv));
+ else
+ log_error("Prohibited option %s provided to convert LV %s from %s to %s",
+ opt_str, display_lvname(seg_from->lv), lvseg_name(seg_from), new_segtype->name);
+
+ return 1;
+}
+
+/* Set segtype conveniently for raid4 <-> raid5 <-> raid6 takeover */
+static int _set_convenient_raid456_segtype_to(const struct lv_segment *seg_from,
+ struct segment_type **segtype)
+{
+ uint64_t seg_flag;
+ struct cmd_context *cmd;
+ struct segment_type *requested_segtype;
+
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+ RETURN_IF_ZERO(segtype || *segtype, "segment type argument");
+
+ cmd = seg_from->lv->vg->cmd;
+ requested_segtype = *segtype;
+PFL();
+ if (seg_is_striped(seg_from) ||
+ seg_is_any_raid0(seg_from) ||
+ seg_is_raid4(seg_from)) {
+PFL();
+ /* If this is any raid5 conversion request -> enforce raid5_n, because we convert from striped */
+ if (segtype_is_any_raid5(*segtype) &&
+ !segtype_is_raid5_n(*segtype) &&
+ !(*segtype = get_segtype_from_flag(cmd, SEG_RAID5_N))) {
+ log_error(INTERNAL_ERROR "Failed to get raid5_n segtype!");
+ return 0;
+
+ /* If this is any raid6 conversion request -> enforce raid6_n_6, because we convert from striped */
+ } else if (segtype_is_any_raid6(*segtype) &&
+ !segtype_is_raid6_n_6(*segtype) &&
+ !(*segtype = get_segtype_from_flag(cmd, SEG_RAID6_N_6))) {
+ log_error(INTERNAL_ERROR "Failed to get raid6_n_6 segtype!");
+ return 0;
+ }
+
+ /* Got to do check for raid5 -> raid6 ... */
+ } else if (seg_is_any_raid5(seg_from) &&
+ segtype_is_any_raid6(*segtype) &&
+ (!(seg_flag = _raid_seg_flag_5_to_6(seg_from)) ||
+ !(*segtype = get_segtype_from_flag(cmd, seg_flag)))) {
+ // log_error(INTERNAL_ERROR "Failed to get raid5 -> raid6 conversion type");
+ return 0;
+
+ /* ... and raid6 -> raid5 */
+ } else if (seg_is_any_raid6(seg_from) &&
+ segtype_is_any_raid5(*segtype) &&
+ (!(seg_flag = _raid_seg_flag_6_to_5(seg_from)) ||
+ !(*segtype = get_segtype_from_flag(cmd, seg_flag)))) {
+ // log_error(INTERNAL_ERROR "Failed to get raid6 -> raid5 conversion type");
+ return 0;
+ }
+
+ if (requested_segtype != *segtype)
+ log_print_unless_silent("Replacing requested RAID type %s with %s for LV %s to allow for takeover",
+ requested_segtype->name, (*segtype)->name, display_lvname(seg_from->lv));
+
+ return 1;
+}
+
+/* Check allowed conversion from @seg_from to @segtype_to */
+static int _conversion_options_allowed(const struct lv_segment *seg_from,
+ struct segment_type **segtype_to,
+ uint32_t new_image_count,
+ int data_copies, int region_size,
+ int stripes, int stripe_size)
+{
+ int r = 1;
+ uint32_t opts;
+
+ RETURN_IF_ZERO(seg_from, "segment from argument");
+ RETURN_IF_ZERO(segtype_to || *segtype_to, "segment type to argument");
+
+PFL();
+ if (!new_image_count &&
+ !_set_convenient_raid456_segtype_to(seg_from, segtype_to))
+ return 0;
+
+PFLA("seg_from->segtype=%s segtype_to=%s", lvseg_name(seg_from), (*segtype_to)->name);
+
+ if (!_get_allowed_conversion_options(seg_from, *segtype_to, new_image_count, &opts))
+ return 0;
+PFLA("segtype_to=%s", (*segtype_to)->name);
+
+ if (data_copies > 1 && !(opts & ALLOW_DATA_COPIES)) {
+ if (!_log_prohibited_option(seg_from, *segtype_to, "-m/--mirrors"))
+ return 0;
+
+ r = 0;
+ }
+
+ if (stripes > 1 && !(opts & ALLOW_STRIPES)) {
+ if (!_log_prohibited_option(seg_from, *segtype_to, "--stripes"))
+ return 0;
+ r = 0;
+ }
+
+ if (stripe_size && !(opts & ALLOW_STRIPE_SIZE)) {
+ if (!_log_prohibited_option(seg_from, *segtype_to, "-I/--stripesize"))
+ return 0;
+ r = 0;
+ }
+
+ return r;
+}
+
+/* HM Helper: initialize @*stripe_size */
+static int _init_stripe_size(const struct lv_segment *seg, uint32_t *stripe_size)
+{
+ RETURN_IF_SEG_ZERO(seg);
+ RETURN_IF_ZERO(stripe_size, "stripe_size pointer argument");
+
+ if (*stripe_size)
+ return 1;
+
+ if (seg->stripe_size)
+ *stripe_size = seg->stripe_size;
+ else {
+ *stripe_size = 64;
+ if (!seg_is_raid01(seg))
+ log_warn("Initializing stripe size on %s to %u sectors",
+ display_lvname(seg->lv), *stripe_size);
+ }
+
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * define current conversion parameters for lv_raid_convert
+ * based on those of @seg if not set
+ */
+static int _raid_convert_define_parms(const struct lv_segment *seg,
+ struct segment_type **segtype,
+ int duplicate,
+ int *data_copies, uint32_t *region_size,
+ uint32_t *stripes, uint32_t *stripe_size)
+{
+ struct cmd_context *cmd;
+
+ RETURN_IF_SEG_ZERO(seg);
+ cmd = seg->lv->vg->cmd;
+
+ *stripes = *stripes ?: ((duplicate) ? 2 : _data_rimages_count(seg, seg->area_count));
+ *stripe_size = *stripe_size ?: seg->stripe_size;
+ *data_copies = *data_copies > -1 ? *data_copies : (duplicate ? 1 : seg->data_copies);
+ *region_size = *region_size ?: seg->region_size;
+
+ /* Check region size */
+ if (!*region_size &&
+ (segtype_is_mirror(*segtype) ||
+ segtype_is_raid1(*segtype) ||
+ segtype_is_reshapable_raid(*segtype))) {
+ *region_size = 1024;
+ log_warn("Initializing region size on %s to %s",
+ display_lvname(seg->lv), display_size(cmd, *region_size));
+ }
+
+ if (segtype_is_thin(*segtype) || segtype_is_thin_pool(*segtype)) {
+ RETURN_IF_ZERO((*segtype = get_segtype_from_string(cmd, "thin")), "thin segtype");
+ *data_copies = 1;
+ *region_size = 0;
+ *stripes = 1;
+ *stripe_size = 0;
+
+ } else if (segtype_is_mirror(*segtype) ||
+ segtype_is_raid1(*segtype)) {
+ *data_copies = *data_copies < 2 ? 2 : *data_copies;
+ *stripes = 1;
+ *stripe_size = 0;
+
+ } else if (segtype_is_any_raid10(*segtype)) {
+ *data_copies = *data_copies < 2 ? 2 : *data_copies;
+
+ if (!segtype_is_raid10_far(*segtype) &&
+ *stripes < 3)
+ *stripes = 3;
+
+ if (!_init_stripe_size(seg, stripe_size))
+ return 0;
+
+ } else if (segtype_is_striped(*segtype) ||
+ segtype_is_striped_raid(*segtype)) {
+ if (seg_is_raid10_near(seg) && seg->area_count == 2)
+ if (*stripes) {
+ log_warn("Ignoring stripes argument on %s", display_lvname(seg->lv));
+ *stripes = 1;
+ }
+
+ if (!_init_stripe_size(seg, stripe_size))
+ return 0;
+
+ if (*stripes == 1 &&
+ *data_copies > 1) {
+ if (*stripe_size) {
+ log_warn("Ignoring stripe size argument on %s", display_lvname(seg->lv));
+ *stripe_size = 0;
+ }
+ }
+
+ }
+
+ if (segtype_is_raid(*segtype) &&
+ !segtype_is_any_raid0(*segtype) &&
+ !segtype_is_raid1(*segtype) &&
+ !segtype_is_raid01(*segtype)) {
+ if (!segtype_is_any_raid6(*segtype) && *data_copies > *stripes) {
+ log_error("Number of data copies %u is larger than number of stripes %u",
+ *data_copies, *stripes);
+ return 0;
+ }
+ if (*region_size < *stripe_size) {
+ *region_size = get_default_region_size(cmd);
+ log_warn("Region size too small, setting to default %s",
+ display_size(cmd, *region_size));
+ }
+ }
+
+ return 1;
+}
+
+/* HM Helper:
+ *
+ * Change region size on raid @lv to @region_size if
+ * different from current region_size and adjusted region size
+ */
+static int _region_size_change_requested(struct logical_volume *lv, int yes, uint32_t region_size)
+{
+ uint32_t old_region_size;
+ const char *seg_region_size_str;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+
+ if (!_raid_in_sync(lv)) {
+ log_error("Unable to change region size on %s while it is not in-sync",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!region_size ||
+ region_size == seg->region_size)
+ return 1;
+
+ old_region_size = seg->region_size;
+ seg->region_size = region_size;
+ RETURN_IF_ZERO((seg_region_size_str = display_size(lv->vg->cmd, seg->region_size)),
+ "region size string");
+
+ if (!_check_and_init_region_size(lv))
+ return 0;
+
+ if (seg->region_size == old_region_size) {
+ log_warn("Region size on %s did not change due to adjustment", display_lvname(lv));
+ return 1;
+ }
+
+ if (!yes && yes_no_prompt("Do you really want to change the region_size %s of LV %s to %s? [y/n]: ",
+ display_size(lv->vg->cmd, old_region_size),
+ display_lvname(lv), seg_region_size_str) == 'n') {
+ log_error("Logical volume %s NOT converted", display_lvname(lv));
+ return 0;
+ }
+
+ /* Check for new region size causing bitmap to still fit metadata image LV */
+ if (seg->meta_areas && seg_metatype(seg, 0) == AREA_LV && seg_metalv(seg, 0)->le_count <
+ _raid_rmeta_extents(lv->vg->cmd, lv->le_count, seg->region_size, lv->vg->extent_size)) {
+ log_error("Region size %s on %s is too small for metadata LV size",
+ seg_region_size_str, display_lvname(lv));
+ return 0;
+ }
+
+ if (!lv_update_and_reload_origin(lv))
+ return 0;
+
+ log_warn("Changed region size on RAID LV %s to %s",
+ display_lvname(lv), seg_region_size_str);
+ return 1;
+}
+
+/*
+ * HM Helper:
+ *
+ * check for @lv if unduplicate request is allowed to proceed based
+ * on any @sub_lv_name or @layout_properties_requested provided
+ */
+static int _valid_name_requested(struct logical_volume **lv, const char **sub_lv_name,
+ int layout_properties_requested, const char *what)
+{
+ RETURN_IF_ZERO(lv && *lv, "lv argument");
+ RETURN_IF_ZERO(sub_lv_name, "sub_lv_name argument");
/*
- * RAID1 -> Mirror conversion
+ * If we got a name which identifies the sub LV uniquely per se,
+ * no layout properties (stripes, ...) may be requested
*/
+ if (*sub_lv_name) {
+ if (layout_properties_requested) {
+ log_error("Rejecting %s request with both sub LV name and layout properties on %s",
+ what, display_lvname(*lv));
+ return 0;
+ }
+
/*
- * FIXME: support this conversion or don't invite users to switch back to "mirror"?
- * I find this at least valuable in case of an erroneous conversion to raid1
+ * If no *sub_lv_name provided, try deriving it from the provided
+ * LV name, assuming user passed in a duplicated sub LV name.
*/
- if (seg_is_raid1(seg) && segtype_is_mirror(new_segtype)) {
- if (!yes && yes_no_prompt("WARNING: Do you really want to convert %s/%s to "
- "non-recommended \"mirror\" type? [y/n]: ",
- lv->vg->name, lv->name) == 'n') {
- log_error("Logical volume %s/%s NOT converted to \"mirror\"", lv->vg->name, lv->name);
+ } else {
+ struct lv_list *lvl;
+ char *lv_name;
+
+ if (!lv_is_duplicated(*lv)) {
+ log_error("Rejecting %s request on %s LV %s; use sub LV",
+ what, _lv_is_duplicating(*lv) ? "duplicating" : "non-duplicated",
+ display_lvname(*lv));
return 0;
}
- if (sigint_caught())
+
+ if (!(*sub_lv_name = dm_pool_strdup((*lv)->vg->cmd->mem, (*lv)->name)))
+ return_0;
+
+ if (!(lv_name = _top_level_lv_name(*lv)))
return_0;
- return _convert_raid1_to_mirror(lv, new_segtype, allocate_pvs);
+ if (!strcmp(lv_name, *sub_lv_name)) {
+ log_error("No sub LV name of %s provided", display_lvname(*lv));
+ return 0;
+ }
+
+ if (!(lvl = find_lv_in_vg((*lv)->vg, lv_name)))
+ return_0;
+
+ *lv = lvl->lv;
}
+ return 1;
+}
+
+/*
+ * API function:
+ *
+ * lv_raid_convert
+ *
+ * Convert @lv from one RAID type (or striped/mirror segtype) to @new_segtype,
+ * change RAID algorithm (e.g. left symmetric to right asymmetric),
+ * add/remove LVs to/from a RAID LV or change stripe sectors and
+ * create/teardown duplicating LV stacks with e.g. N (raid) LVs underneath
+ * a toplevel raid1 mapping.
+ *
+ * Non dm-raid changes are factored in e.g. "mirror" and "striped" related
+ * functions called from here.
+ *
+ * Conversions fall into reshape or takeover classes being coped with in
+ * _raid_reshape() or via _takeover_fn[] jump table.
+ *
+ * Reshape is a change of the number of disks (e.g. change a raid5 set from
+ * 3-way striped to 7-way striped, thus adding capacity) or the data/metadata
+ * allocation algorithm (e.g. raid5 left-symmetric to right-asymmetric,
+ * raid10 near to offset) or the stripe size (aka MD raid chunk size)
+ * of a raid4/5/6/10 RAID LV (raid0 can't be reshaped wrt stripe size directly,
+ * a conversion to raid4/5/6/10 must be carried out initially to achieve such
+ * layout change in a second step).
+ *
+ * Takeover is defined as a switch from one raid level to another, pottentially
+ * involving the addition of one or more image component pairs (i.e. data and metadata LV pair);
+ * for instance a switch from raid0 to raid6 will add 2 image component pairs and
+ * initialize their rebuild.
+ *
+ * Returns: 1 on success, 0 on failure
+ */
+/*
+ * TODO (^ done):
+ * - review size calculations in raid1 <-> raid4/5 ^
+ * - review stripe size usage on conversion from/to striped/nonstriped segment types
+ * - review reshape space alloc/free ^
+ * - conversion raid0 -> raid10 only mentions redundancy = 1 instead of 1..#stripes maximum
+ * - false --striped user entry shows wrong message
+ * - keep ti->len small on initial disk adding reshape and grow after it has finished
+ * in order to avoid bio_endio in the targets map method?
+ * - support region size changes ^
+ */
+int lv_raid_convert(struct logical_volume *lv,
+ struct lv_raid_convert_params rcp)
+{
+ int data_copies = rcp.data_copies;
+ uint32_t image_count;
+ uint32_t region_size = rcp.region_size;
+ uint32_t stripes = rcp.stripes;
+ uint32_t stripe_size = rcp.stripe_size;
+ int layout_properties_requested = (data_copies > -1 ? data_copies : 0) + stripes + stripe_size;
+ struct lv_segment *seg, *seg1;
+ struct segment_type *new_segtype = rcp.segtype;
+ struct dm_list removal_lvs;
+ takeover_fn_t tfn;
+
+PFLA("new_segtype=%s", new_segtype ? new_segtype->name : "");
+
+ /* new_segtype may be NAUGHT */
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO((seg = seg1 = first_seg(lv)), "lv segment");
+
+ if (rcp.duplicate && rcp.unduplicate) {
+ log_error("--duplicate and --unduplicate are mutually exclusive!");
+ return 0;
+ }
+
+ dm_list_init(&removal_lvs);
- /****************************************************************************/
/*
- * RAID0 <-> RAID10 comversion
+ * Define any missing raid paramaters based on @seg of first duplicating seg
+ * and check proper duplicate/unduplicate option provided
+ */
+ if (_lv_is_duplicating(lv)) {
+ RETURN_IF_ZERO(seg_type(seg, 0) == AREA_LV &&
+ seg_lv(seg, 0) &&
+ (seg1 = first_seg(seg_lv(seg, 0))),
+ "sub LV #0");
+
+ if (!rcp.duplicate && !rcp.unduplicate && !rcp.region_size) {
+ log_error("No direct conversion on duplicating LV %s possible!",
+ display_lvname(lv));
+ log_error("Either --duplicate or --unduplicate or --regionsize option mandatory.");
+ return 0;
+ }
+
+ } else if (rcp.unduplicate && !lv_is_duplicated(lv)) {
+ log_error("LV %s is not duplicating!", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_duplicated(lv))
+ seg1 = first_seg(lv);
+
+PFLA("new_segtype=%s", new_segtype ? new_segtype->name : "");
+ new_segtype = new_segtype ?: (struct segment_type *) seg1->segtype;
+PFLA("new_segtype=%s", new_segtype ? new_segtype->name : "");
+PFLA("seg1->segtype=%s", seg1->segtype ? seg1->segtype->name : "");
+
+ /* Define any undefined properties from the given segment ones */
+ if (!_raid_convert_define_parms(seg1, &new_segtype, rcp.duplicate, &data_copies, &region_size, &stripes, &stripe_size))
+ return 0;
+
+PFLA("new_segtype=%s data_copies=%d region_size=%u stripes=%u stripe_size=%u #allocate_pvs=%d", new_segtype ? new_segtype->name : "", data_copies, region_size, stripes, stripe_size, rcp.allocate_pvs ? dm_list_size(rcp.allocate_pvs) : 0);
+
+ if (lv_is_duplicated(lv) && !rcp.duplicate && !rcp.unduplicate) {
+ if (!seg_is_mirrored(seg1) &&
+ stripes && stripes != _data_rimages_count(seg1, seg1->area_count)) {
+ log_error("Adding/removing stripes to/from duplicated LV %s "
+ "prohibited due to sub LV size change",
+ display_lvname(lv));
+ return 0;
+ }
+ }
+
+PFLA("new_segtype=%s data_copies=%d region_size=%u stripes=%u stripe_size=%u", new_segtype ? new_segtype->name : "", data_copies, region_size, stripes, stripe_size);
+ /* Make sure we are being called for segment types we actually support */
+ /* Given segtype of @lv */
+ if (!seg_is_striped(seg1) && /* Catches linear = "overloaded striped with one area" as well */
+ !seg_is_mirror(seg1) &&
+ !seg_is_thin(seg1) &&
+ !seg_is_raid(seg1))
+ goto err;
+
+ /* Requested segtype */
+ if (!segtype_is_linear(new_segtype) &&
+ !segtype_is_striped(new_segtype) &&
+ !segtype_is_mirror(new_segtype) &&
+ !segtype_is_thin(new_segtype) &&
+ !segtype_is_raid(new_segtype))
+ goto err;
+
+ image_count = ((int) stripes >= data_copies) ? stripes : data_copies;
+ image_count += new_segtype ? new_segtype->parity_devs : 0;
+
+PFLA("new_segtype=%s new_data_copies=%d new_stripes=%u segtype=%s, seg->area_count=%u duplicate=%d unduplicate=%d ", new_segtype ? new_segtype->name : "", rcp.data_copies, rcp.stripes, lvseg_name(seg), seg->area_count, rcp.duplicate, rcp.unduplicate);
+PFLA("new_segtype=%s segtype=%s, seg->area_count=%u", new_segtype ? new_segtype->name : "", lvseg_name(seg), seg->area_count);
+PFLA("new_segtype=%s image_count=%u data_copies=%d region_size=%u stripes=%u stripe_size=%u", new_segtype ? new_segtype->name : "", image_count, data_copies, region_size, stripes, stripe_size);
+
+ if (!_check_max_raid_devices(image_count))
+ return 0;
+
+ /*
+ * If clustered VG, @lv has to be active exclusive locally, else just has to be activei
*
- * MD RAID10 "near" is a stripe on top of stripes number of 2-way mirrors
+ * HM FIXME: exclusive activation has to change once we support clustered MD raid1
+ *
+ * Check for active lv late to be able to display rejection reasons before
*/
- /* HM FIXME: move to _raid_takeover() rather than special case here */
- /* HM FIXME: support far and iffset formats */
- /* HM FIXME: adjust_segtype() needed at all? */
- if (seg_is_any_raid0(seg) && segtype_is_raid10(new_segtype))
- return _lv_raid_change_image_count(lv, new_segtype, lv_raid_image_count(lv) * 2, allocate_pvs);
+ if (!_lv_is_active((lv)))
+ return 0;
- if (seg_is_raid10(seg) && segtype_is_any_raid0(new_segtype))
- return _lv_raid_change_image_count(lv, new_segtype, lv_raid_image_count(lv) / 2, allocate_pvs);
+ /*
+ * A conversion by duplication has been requested so either:
+ * - create a new LV of the requested segtype
+ * -or-
+ * - add another LV as a sub LV to an existing duplicating one
+ */
+ if (rcp.duplicate) {
+ /* Check valid options mirrors, stripes and/or stripe_size have been provided suitable to the conversion */
+ if (!_conversion_options_allowed(seg, &new_segtype, image_count /* duplicate check for image_count > 0 */,
+ rcp.data_copies, rcp.region_size,
+ rcp.stripes, rcp.stripe_size))
+ return _log_possible_conversion_types(lv, new_segtype);
+
+ if (!_raid_duplicate(lv, new_segtype, rcp.yes, rcp.force, data_copies, region_size,
+ stripes, stripe_size, rcp.lv_name, rcp.allocate_pvs))
+ return 0;
+PFLA("new_segtype=%s image_count=%u data_copies=%d stripes=%u", new_segtype ? new_segtype->name : "", image_count, data_copies, stripes);
+ goto out;
- /****************************************************************************/
- /* Striped -> RAID0 conversion */
- if (seg_is_striped(seg) && segtype_is_striped_raid(new_segtype)) {
- /* Only allow _convert_striped_to_raid0() to update and reload metadata if the final level is raid0* */
- int update_and_reload = segtype_is_any_raid0(new_segtype);
-
- r = _convert_striped_to_raid0(lv,
- !segtype_is_raid0(new_segtype) /* -> alloc_metadata_devs */,
- update_and_reload);
- /* If error or final type was raid0 -> already finished with remapping in _covert_striped_to_raid9(). */
- if (!r || update_and_reload)
- return r;
+ /*
+ * Remove any active duplicating conversion ->
+ * this'll remove all but 1 leg and withdraw the
+ * top-level raid1 mapping
+ */
+ } else if (rcp.unduplicate) {
+PFLA("lv=%s", display_lvname(lv));
+ /* If user passed in the sub LV name to keep and no --name option, use it */
+ if (!_valid_name_requested(&lv, &rcp.lv_name, layout_properties_requested, "unduplicate"))
+ return 0;
- /* RAID0/4/5 <-> striped/linear conversion */
- } else if (segtype_is_linear(new_segtype) ||
- segtype_is_striped(new_segtype)) {
- if (seg_is_any_raid0(seg))
- return _convert_raid0_to_striped(lv, striped_segtype);
+PFLA("lv=%s lv_name=%s", display_lvname(lv), rcp.lv_name);
+ if (!_raid_unduplicate(lv, rcp.yes, rcp.lv_name)) {
+ if (!_lv_is_duplicating(lv))
+ _log_possible_conversion_types(lv, new_segtype);
- /* Memorize the final "linear"/"striped" segment type */
- final_segtype = new_segtype;
+ return 0;
+ }
- /* Let _convert_raid_to_raid() go to "raid0", thus droping metadata images */
- if (!(new_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_RAID0)))
+ goto out;
+ }
+
+
+ /* Special raid1 handling for segtype not given */
+ if (seg->segtype == new_segtype) {
+ /* Converting raid1 -> linear given "lvconvert -m0 ..." w/o "--type ..." */
+ if (seg_is_raid1(seg) &&
+ image_count == 1 &&
+ !(new_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
+ /* Converting linear to raid1 given "lvconvert -mN ..." (N > 0) w/o "--type ..." */
+ else if (seg_is_linear(seg) &&
+ image_count > 1 &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID1)))
+ return_0;
}
- /****************************************************************************/
/*
- * All the rest of the raid conversions...
+ * If not duplicating/unduplicating request ->
+ *
+ * reshape of capable raid type requested
*/
- if (!_convert_raid_to_raid(lv, new_segtype, final_segtype, yes, force, new_stripes, new_stripe_size, allocate_pvs)) {
- lv_update_and_reload(lv);
+PFL();
+ switch (_reshape_requested(lv, new_segtype, data_copies, region_size, stripes, stripe_size)) {
+ case 0:
+ break;
+ case 1:
+#if 0
+ if ((rcp.data_copies > 1 || stripes != seg->area_count - seg->segtype->parity_devs) &&
+ !is_same_level(seg->segtype, new_segtype)) {
+ log_error("Can't reshape and takeover %s at the same time",
+ display_lvname(lv));
+ return 0;
+ }
+#endif
+ if (!_raid_reshape(lv, new_segtype, rcp.yes, rcp.force,
+ data_copies, region_size,
+ stripes, stripe_size, rcp.allocate_pvs))
+ goto err;
+
+ goto out;;
+ case 2:
+ /* Error if we got here with stripes and/or stripe size change requested */
return 0;
+ case 3:
+ /* Got request to change region size */
+ if (!_region_size_change_requested(lv, rcp.yes, region_size))
+ return 0;
+
+ goto out;
}
- /* HM FIXME: avoid update and reload in _convert_raid_to_raid when we have a final_segtype and reload here! */
- /* Do the final step to convert from "raid0" to "striped" here if requested */
- return _convert_raid0_to_striped(lv, final_segtype);
+PFLA("yes=%d new_segtype=%s data_copies=%u stripes=%u stripe_size=%u", rcp.yes, new_segtype->name, data_copies, stripes, stripe_size);
+
+ /*
+ * All non-duplicating conversion requests need to be 100% in-sync,
+ * because those'll be processed using md takeover features relying
+ * on in-sync crc/q-syndroms
+ */
+ if (!_raid_in_sync(lv)) {
+ log_error("Unable to convert %s while it is not in-sync",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /* User has given -mN on a striped LV w/o asking for a segtype or providing striped -> convert to raid01 */
+ if (seg_is_striped(seg) &&
+ segtype_is_striped(new_segtype) &&
+ rcp.data_copies > 1 &&
+ !(new_segtype = get_segtype_from_flag(lv->vg->cmd, SEG_RAID01)))
+ return_0;
+
+ if (seg_is_striped(seg) &&
+ segtype_is_raid01(new_segtype) &&
+ data_copies < 2)
+ data_copies = 2;
+
+ /*
+ * Check acceptible options mirrors, region_size,
+ * stripes and/or stripe_size have been provided.
+ */
+ if (!_conversion_options_allowed(seg, &new_segtype, 0 /* Takeover */,
+ rcp.data_copies, rcp.region_size,
+ rcp.stripes, rcp.stripe_size))
+ return _log_possible_conversion_types(lv, new_segtype);
+
+PFLA("new_segtype=%s image_count=%u stripes=%u stripe_size=%u", new_segtype->name, image_count, stripes, stripe_size);
+ /*
+ * Table driven takeover, i.e. conversions from one segment type to another
+ */
+ tfn = _takeover_fns[_takeover_fn_idx(seg->segtype, seg->area_count)][_takeover_fn_idx(new_segtype, image_count)];
+ if (!tfn(lv, new_segtype, rcp.yes, rcp.force, image_count,
+ data_copies, stripes, stripe_size, region_size, rcp.allocate_pvs))
+ return 0;
+out:
+ log_print_unless_silent("Logical volume %s successfully converted.", display_lvname(lv));
+ return 1;
err:
/* FIXME: enhance message */
- log_error("Converting the segment type for %s/%s from %s to %s"
- " is not supported.", lv->vg->name, lv->name,
- lvseg_name(seg), new_segtype->name);
+ if (seg->segtype == new_segtype)
+ log_error("No change to %s LV %s requested", lvseg_name(seg), display_lvname(lv));
+ else
+ log_error("Converting the segment type for %s (directly) from %s to %s"
+ " is not supported.", display_lvname(lv),
+ lvseg_name(seg), new_segtype->name);
+
return 0;
}
-static int _remove_partial_multi_segment_image(struct logical_volume *lv,
- struct dm_list *remove_pvs)
+/* Return extents needed to replace on missing PVs */
+static uint32_t _extents_needed_to_repair(struct logical_volume *lv, struct dm_list *remove_pvs)
{
- uint32_t s, extents_needed;
- struct lv_segment *rm_seg, *raid_seg = first_seg(lv);
- struct logical_volume *rm_image = NULL;
- struct physical_volume *pv;
+ uint32_t r = 0;
+ struct lv_segment *rm_seg;
- if (!(lv->status & PARTIAL_LV))
- return_0;
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
- for (s = 0; s < raid_seg->area_count; s++) {
- extents_needed = 0;
- if ((seg_lv(raid_seg, s)->status & PARTIAL_LV) &&
- lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) &&
- (dm_list_size(&(seg_lv(raid_seg, s)->segments)) > 1)) {
- rm_image = seg_lv(raid_seg, s);
-
- /* First, how many damaged extents are there */
- if (seg_metalv(raid_seg, s)->status & PARTIAL_LV)
- extents_needed += seg_metalv(raid_seg, s)->le_count;
- dm_list_iterate_items(rm_seg, &rm_image->segments) {
- /*
- * segment areas are for stripe, mirror, raid,
- * etc. We only need to check the first area
- * if we are dealing with RAID image LVs.
- */
- if (seg_type(rm_seg, 0) != AREA_PV)
- continue;
- pv = seg_pv(rm_seg, 0);
- if (pv->status & MISSING_PV)
- extents_needed += rm_seg->len;
- }
- log_debug("%u extents needed to repair %s",
- extents_needed, rm_image->name);
-
- /* Second, do the other PVs have the space */
- dm_list_iterate_items(rm_seg, &rm_image->segments) {
- if (seg_type(rm_seg, 0) != AREA_PV)
- continue;
- pv = seg_pv(rm_seg, 0);
- if (pv->status & MISSING_PV)
- continue;
-
- if ((pv->pe_count - pv->pe_alloc_count) >
- extents_needed) {
+ if ((lv->status & PARTIAL_LV) && lv_is_on_pvs(lv, remove_pvs))
+ dm_list_iterate_items(rm_seg, &lv->segments)
+ /*
+ * Segment areas are for stripe, mirror, raid,
+ * etc. We only need to check the first area
+ * if we are dealing with RAID image LVs.
+ */
+ if (seg_type(rm_seg, 0) == AREA_PV &&
+ (seg_pv(rm_seg, 0)->status & MISSING_PV))
+ r += rm_seg->len;
+
+ return r;
+}
+
+/* Try to find a PV which can hold the whole @lv for replacement */
+static int _try_to_replace_whole_lv(struct logical_volume *lv, struct dm_list *remove_pvs)
+{
+ uint32_t extents_needed;
+
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
+
+ /* First, get the extents needed to replace @lv */
+ if ((extents_needed = _extents_needed_to_repair(lv, remove_pvs))) {
+ struct lv_segment *rm_seg;
+
+ log_debug("%u extents needed to repair %s",
+ extents_needed, display_lvname(lv));
+
+ /* Second, do the other PVs have the space */
+ dm_list_iterate_items(rm_seg, &lv->segments) {
+ struct physical_volume *pv = seg_pv(rm_seg, 0);
+
+ /* HM FIXME: TEXTME: find_pv_in_pv_ist correct here? */
+ if (seg_type(rm_seg, 0) == AREA_PV &&
+ !(pv->status & MISSING_PV) &&
+ !find_pv_in_pv_list(remove_pvs, pv)) {
+ if ((pv->pe_count - pv->pe_alloc_count) > extents_needed) {
log_debug("%s has enough space for %s",
- pv_dev_name(pv),
- rm_image->name);
- goto has_enough_space;
+ pv_dev_name(pv), display_lvname(lv));
+ /*
+ * Now we have a multi-segment, partial image that has enough
+ * space on just one of its PVs for the entire image to be
+ * replaced. So, we replace the image's space with an error
+ * target so that the allocator can find that space (along with
+ * the remaining free space) in order to allocate the image
+ * anew.
+ */
+ if (!_replace_lv_with_error_segment(lv))
+ return_0;
+
+ return 1;
}
+
log_debug("Not enough space on %s for %s",
- pv_dev_name(pv), rm_image->name);
+ pv_dev_name(pv), display_lvname(lv));
}
}
}
- /*
- * This is likely to be the normal case - single
- * segment images.
- */
- return_0;
-
-has_enough_space:
- /*
- * Now we have a multi-segment, partial image that has enough
- * space on just one of its PVs for the entire image to be
- * replaced. So, we replace the image's space with an error
- * target so that the allocator can find that space (along with
- * the remaining free space) in order to allocate the image
- * anew.
- */
- return _replace_lv_with_error_segment(rm_image);
+ return 0;
}
-/* HM */
-static int _avoid_pvs_of_lv(struct logical_volume *lv, void *data)
+/* Find space to replace partial @lv */
+static int _remove_partial_multi_segment_image(struct logical_volume *lv,
+ struct dm_list *remove_pvs)
{
- struct dm_list *allocate_pvs = (struct dm_list *) data;
- struct pv_list *pvl, *tmp;
+ uint32_t s;
+ struct lv_segment *raid_seg;
- dm_list_iterate_items_safe(pvl, tmp, allocate_pvs)
- if (!(lv->status & PARTIAL_LV) &&
- lv_is_on_pv(lv, pvl->pv))
- pvl->pv->status |= PV_ALLOCATION_PROHIBITED;
+ RETURN_IF_LV_SEG_ZERO(lv, (raid_seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(raid_seg, 0);
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
- return 1;
-}
+ if (!(lv->status & PARTIAL_LV)) {
+ log_error(INTERNAL_ERROR "Called with non-partial LV %s.", display_lvname(lv));
+ return 0;
+ }
-/*
- * Prevent any PVs holding other image components of @lv from being used for allocation,
- * I.e. remove respective PVs from @allocatable_pvs
- */
-static int _avoid_pvs_with_other_images_of_lv(struct logical_volume *lv, struct dm_list *allocate_pvs)
-{
- return for_each_sub_lv(lv, _avoid_pvs_of_lv, allocate_pvs);
+ for (s = 0; s < raid_seg->area_count; s++) {
+ /* Try to replace all extents of any damaged image and meta LVs */
+ int r = _try_to_replace_whole_lv(seg_lv(raid_seg, s), remove_pvs);
+
+ if (raid_seg->meta_areas)
+ r += _try_to_replace_whole_lv(seg_metalv(raid_seg, s), remove_pvs);
+
+ if (r)
+ return !!r;
+ }
+
+ /*
+ * This is likely to be the normal case - single
+ * segment images completely allocated on a missing PV.
+ */
+ return 0;
}
-/* Helper fn to generate LV names and set segment area lv */
+/* HM Helper fn to generate LV names and set segment area LV */
static int _generate_name_and_set_segment(struct logical_volume *lv,
uint32_t s, uint32_t sd,
struct dm_list *lvs, char **tmp_names)
{
- struct lv_segment *raid_seg = first_seg(lv);
- struct lv_list *lvl = dm_list_item(dm_list_first(lvs), struct lv_list);
+ struct lv_segment *raid_seg;
+ struct lv_list *lvl;
+ const char *suffix;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (raid_seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(raid_seg, 0);
+ RETURN_IF_ZERO(lvs, "no LVs list");
+ RETURN_IF_NONZERO(dm_list_empty(lvs), "no LVs listed");
+ lvl = dm_list_item(dm_list_first(lvs), struct lv_list);
dm_list_del(&lvl->list);
- if (!(tmp_names[sd] = _generate_raid_name(lv, s == sd ? "rmeta" : "rimage", s)))
+
+#if 1
+ suffix = (s == sd) ? "rmeta_" : "rimage_";
+ if (!(tmp_names[sd] = _generate_raid_name(lv, suffix, s)))
return_0;
+#endif
if (!set_lv_segment_area_lv(raid_seg, s, lvl->lv, 0, lvl->lv->status)) {
log_error("Failed to add %s to %s", lvl->lv->name, lv->name);
return 0;
@@ -4357,172 +9702,287 @@ static int _generate_name_and_set_segment(struct logical_volume *lv,
return 1;
}
+/* HM Helper: return 1 in case @slv has to be replaced, because it has any allocation on list @removal_pvs */
+static int __sub_lv_needs_rebuilding(struct logical_volume *slv,
+ struct dm_list *remove_pvs, uint32_t *partial_lvs)
+{
+ int r = 0;
+
+ RETURN_IF_LV_SEG_ZERO(slv, first_seg(slv));
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
+ RETURN_IF_ZERO(partial_lvs, "partial LVs argument");
+
+PFLA("slv=%s", display_lvname(slv));
+ if (lv_is_on_pvs(slv, remove_pvs) ||
+ lv_is_virtual(slv)) {
+ r = 1;
+
+ if (slv->status & PARTIAL_LV)
+ (*partial_lvs)++;
+ }
+
+ return r;
+}
+
+/* HM Helper: return 1 in case seg_lv(@seg, @s) has to be replaced, because it has any allocation on list @removal_pvs */
+static int _sub_lv_needs_rebuilding(struct lv_segment *seg, uint32_t s,
+ struct dm_list *remove_pvs, uint32_t *partial_lvs)
+{
+ int r;
+
+ RETURN_IF_ZERO(seg, "segment argument");
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
+ RETURN_IF_ZERO(partial_lvs, "partial LVs argument");
+
+ r = __sub_lv_needs_rebuilding(seg_lv(seg, s), remove_pvs, partial_lvs);
+
+ if (seg->meta_areas)
+ r += __sub_lv_needs_rebuilding(seg_metalv(seg, s), remove_pvs, partial_lvs);
+
+ return !!r;
+}
+
/*
+ * API function:
+ *
* lv_raid_replace
* @lv
* @remove_pvs
* @allocate_pvs
*
* Replace the specified PVs.
+ *
+ * HM FIXME: split long function further up
*/
int lv_raid_replace(struct logical_volume *lv,
+ int yes,
struct dm_list *remove_pvs,
struct dm_list *allocate_pvs)
{
- int partial_segment_removed = 0;
- uint32_t s, sd, match_count = 0;
+ int duplicating = 0, partial_segment_removed = 0;
+ uint32_t match_count = 0, partial_lvs = 0, s, sd;
+ char **tmp_names;
struct dm_list old_lvs;
struct dm_list new_meta_lvs, new_data_lvs;
- struct lv_segment *raid_seg = first_seg(lv);
+ struct logical_volume *slv;
+ struct lv_segment *raid_seg;
struct lv_list *lvl;
- char *tmp_names[raid_seg->area_count * 2];
+
+ RETURN_IF_LV_SEG_ZERO(lv, (raid_seg = first_seg(lv)));
+ RETURN_IF_ZERO(seg_is_raid(raid_seg), "raid segment to replace images in");
+ RETURN_IF_ZERO(remove_pvs, "remove pvs list argument");
dm_list_init(&old_lvs);
dm_list_init(&new_meta_lvs);
dm_list_init(&new_data_lvs);
- /* Replacement for raid0 would request data loss */
+ /* Recurse into sub LVs in case of a duplicating one */
+ if (_lv_is_duplicating(lv)) {
+ /* HM FIXME: first pass: handle mirror at all or require user to remove it? */
+ for (s = 0; s < raid_seg->area_count; s++) {
+ slv = seg_lv(raid_seg, s);
+
+ if (seg_type(raid_seg, s) == AREA_LV &&
+ seg_is_mirror(first_seg(slv)) &&
+ (slv->status & PARTIAL_LV)) {
+ log_error("LV %s is mirror and can't have its missing sub LVs replaced (yet)",
+ display_lvname(slv));
+ log_error("Yu have to split it off for the time being");
+ return 0;
+ }
+ }
+
+ /* 2nd pass: recurse into sub LVs */
+ for (s = 0; s < raid_seg->area_count; s++) {
+ slv = seg_lv(raid_seg, s);
+
+ if (seg_type(raid_seg, s) == AREA_LV &&
+ seg_is_raid(first_seg(slv)) && /* Prevent from processing unless raid sub LV */
+ !seg_is_any_raid0(first_seg(slv)) &&
+ !lv_raid_replace(slv, yes, remove_pvs, allocate_pvs))
+ return 0;
+ }
+
+ duplicating = 1;
+ }
+
+ /* Replacement for raid0 would cause data loss */
if (seg_is_any_raid0(raid_seg)) {
- log_error("Replacement of devices in %s/%s %s LV prohibited.",
- lv->vg->name, lv->name, raid_seg->segtype->name);
+ log_error("Replacement of devices in %s %s LV prohibited.",
+ display_lvname(lv), lvseg_name(raid_seg));
return 0;
}
- if (lv->status & PARTIAL_LV)
- lv->vg->cmd->partial_activation = 1;
-
- if (!lv_is_active_exclusive_locally(lv_lock_holder(lv))) {
- log_error("%s/%s must be active %sto perform this operation.",
- lv->vg->name, lv->name,
- vg_is_clustered(lv->vg) ? "exclusive locally " : "");
+ if (!_lv_is_active((lv)))
return 0;
- }
+
+ if (lv->status & PARTIAL_LV || duplicating)
+ lv->vg->cmd->partial_activation = 1;
if (!_raid_in_sync(lv)) {
- log_error("Unable to replace devices in %s/%s while it is"
- " not in-sync.", lv->vg->name, lv->name);
+ log_error("Unable to replace devices in %s while it is"
+ " not in-sync.", display_lvname(lv));
return 0;
}
+ if (!(tmp_names = dm_pool_zalloc(lv->vg->vgmem, 2 * raid_seg->area_count * sizeof(*tmp_names))))
+ return_0;
+
if (!archive(lv->vg))
return_0;
/*
- * How many sub-LVs are being removed?
+ * How many image component pairs are being removed?
*/
for (s = 0; s < raid_seg->area_count; s++) {
if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) ||
- (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
+ (raid_seg->meta_areas && seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
log_error("Unable to replace RAID images while the "
"array has unassigned areas");
return 0;
}
- if (lv_is_virtual(seg_lv(raid_seg, s)) ||
- lv_is_virtual(seg_metalv(raid_seg, s)) ||
- lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) ||
- lv_is_on_pvs(seg_metalv(raid_seg, s), remove_pvs))
+ if (_sub_lv_needs_rebuilding(raid_seg, s, remove_pvs, &partial_lvs))
match_count++;
}
if (!match_count) {
- log_verbose("%s/%s does not contain devices specified"
- " for replacement", lv->vg->name, lv->name);
+ log_verbose("%s does not contain devices specified"
+ " for replacement", display_lvname(lv));
return 1;
+
} else if (match_count == raid_seg->area_count) {
- log_error("Unable to remove all PVs from %s/%s at once.",
- lv->vg->name, lv->name);
- return 0;
- } else if (raid_seg->segtype->parity_devs &&
- (match_count > raid_seg->segtype->parity_devs)) {
- log_error("Unable to replace more than %u PVs from (%s) %s/%s",
- raid_seg->segtype->parity_devs,
- lvseg_name(raid_seg),
- lv->vg->name, lv->name);
- return 0;
- } else if (seg_is_raid10(raid_seg)) {
- uint32_t i, rebuilds_per_group = 0;
- /* FIXME: We only support 2-way mirrors in RAID10 currently */
- uint32_t copies = 2;
-
- for (i = 0; i < raid_seg->area_count * copies; i++) {
- s = i % raid_seg->area_count;
- if (!(i % copies))
- rebuilds_per_group = 0;
- if (lv_is_on_pvs(seg_lv(raid_seg, s), remove_pvs) ||
- lv_is_on_pvs(seg_metalv(raid_seg, s), remove_pvs) ||
- lv_is_virtual(seg_lv(raid_seg, s)) ||
- lv_is_virtual(seg_metalv(raid_seg, s)))
- rebuilds_per_group++;
- if (rebuilds_per_group >= copies) {
- log_error("Unable to replace all the devices "
- "in a RAID10 mirror group.");
+ log_error("Unable to remove all PVs from %s at once.",
+ display_lvname(lv));
+ return 0;
+
+ } else if (raid_seg->segtype->parity_devs) {
+ if (match_count > raid_seg->segtype->parity_devs) {
+ log_error("Unable to replace more than %u PVs from (%s) %s",
+ raid_seg->segtype->parity_devs,
+ lvseg_name(raid_seg), display_lvname(lv));
+ return 0;
+
+ } else if (match_count == raid_seg->segtype->parity_devs &&
+ match_count > partial_lvs / 2) {
+ log_warn("You'll loose all resilience on %s LV %s during replacement"
+ " until resynchronization has finished!",
+ lvseg_name(raid_seg), display_lvname(lv));
+ if (!yes && yes_no_prompt("WARNING: Do you really want to replace"
+ " PVs in %s LV %s?? [y/n]: ",
+ lvseg_name(raid_seg), display_lvname(lv))) {
+ log_warn("PVs in LV %s NOT replaced!", display_lvname(lv));
return 0;
}
+ if (sigint_caught())
+ return_0;
}
- }
+ } else if (seg_is_any_raid10(raid_seg)) {
+ uint32_t copies = raid_seg->data_copies, i;
+
+ /*
+ * For raid10_{near, offset} with # devices divisible by number of
+ * data copies, we have 'mirror groups', i.e. [AABB] and can check
+ * for at least one mirror per group being available after
+ * replacement...
+ */
+ if (!seg_is_raid10_far(raid_seg) &&
+ !(raid_seg->area_count % raid_seg->data_copies)) {
+ uint32_t rebuilds_per_group;
+
+ for (i = 0; i < raid_seg->area_count * copies; i++) {
+ s = i % raid_seg->area_count;
+ if (!(i % copies))
+ rebuilds_per_group = 0;
+
+ if (_sub_lv_needs_rebuilding(raid_seg, s, remove_pvs, &partial_lvs))
+ rebuilds_per_group++;
+
+ if (rebuilds_per_group >= copies) {
+ log_error("Unable to replace all the devices "
+ "in a RAID10 mirror group.");
+ return 0;
+ }
+ }
+
+ /*
+ * ... and if not so 'mirror groups', we have to have at least
+ * one mirror for the whole raid10 set available after replacement!
+ */
+ } else {
+ uint32_t rebuilds = 0;
+
+ for (s = 0; s < raid_seg->area_count; s++)
+ if (_sub_lv_needs_rebuilding(raid_seg, s, remove_pvs, &partial_lvs))
+ rebuilds++;
+
+ if (rebuilds >= copies) {
+ log_error("Unable to replace all data copies in a RAID10 set.");
+ return 0;
+ }
+ }
+ }
+
/* Prevent any PVs holding image components from being used for allocation */
- if (!_avoid_pvs_with_other_images_of_lv(lv, allocate_pvs)) {
- log_error("Failed to prevent PVs holding image components "
- "from being used for allocation.");
+ if (!_avoid_pvs_with_other_images_of_lv(lv, allocate_pvs))
return 0;
- }
- /*
- * Allocate the new image components first
- * - This makes it easy to avoid all currently used devs
- * - We can immediately tell if there is enough space
- *
- * - We need to change the LV names when we insert them.
- */
-try_again:
- if (!_alloc_image_components(lv, 1, allocate_pvs, match_count,
- &new_meta_lvs, &new_data_lvs)) {
- if (!(lv->status & PARTIAL_LV)) {
- log_error("LV %s is not partial.", display_lvname(lv));
- return 0;
- }
+ /* If this is not the top-level duplicating raid1 LV -> allocate image component pairs */
+ if (!duplicating) {
+ /*
+ * Allocate the new image components first
+ * - This makes it easy to avoid all currently used devs
+ * - We can immediately tell if there is enough space
+ *
+ * - We need to change the LV names when we insert them.
+ */
+ while (!_alloc_image_components(lv, match_count,
+ &new_meta_lvs, &new_data_lvs,
+ allocate_pvs)) {
+ if (!(lv->status & PARTIAL_LV)) {
+ log_error("LV %s in not partial.", display_lvname(lv));
+ return 0;
+ }
- /* This is a repair, so try to do better than all-or-nothing */
- match_count--;
- if (match_count > 0) {
- log_error("Failed to replace %u devices."
- " Attempting to replace %u instead.",
- match_count, match_count+1);
/*
- * Since we are replacing some but not all of the bad
- * devices, we must set partial_activation
+ * We failed allocating all required devices so
+ * we'll try less devices; we must set partial_activation
*/
lv->vg->cmd->partial_activation = 1;
- goto try_again;
- } else if (!match_count && !partial_segment_removed) {
- /*
- * We are down to the last straw. We can only hope
- * that a failed PV is just one of several PVs in
- * the image; and if we extract the image, there may
- * be enough room on the image's other PVs for a
- * reallocation of the image.
- */
- if (!_remove_partial_multi_segment_image(lv, remove_pvs))
- return_0;
+
+ /* This is a repair, so try to do better than all-or-nothing */
+ if (match_count > 0 && !partial_segment_removed) {
+ log_error("Failed to replace %u devices.", match_count);
+ match_count--;
+ log_error("Attempting to replace %u instead.", match_count);
+
+ } else if (!partial_segment_removed) {
+ /*
+ * match_count = 0
+ *
+ * We are down to the last straw. We can only hope
+ * that a failed PV is just one of several PVs in
+ * the image; and if we extract the image, there may
+ * be enough room on the image's other PVs for a
+ * reallocation of the image.
+ */
+ if (!_remove_partial_multi_segment_image(lv, remove_pvs))
+ return_0;
- match_count = 1;
- partial_segment_removed = 1;
- lv->vg->cmd->partial_activation = 1;
- goto try_again;
- }
- log_error("Failed to allocate replacement images for %s/%s",
- lv->vg->name, lv->name);
+ match_count = 1;
+ partial_segment_removed = 1;
- return 0;
+ } else {
+
+ log_error("Failed to allocate replacement images for %s",
+ display_lvname(lv));
+ return 0;
+ }
+ }
}
- /* HM FIXME: TESTME */
- /* The new metadata LV(s) must be cleared before being added to the array */
- log_debug_metadata("Clearing newly allocated replacement metadata LV");
- if (!_clear_lvs(&new_meta_lvs))
- return 0;
+ _pvs_allow_allocation(allocate_pvs);
/*
* Remove the old images
@@ -4536,13 +9996,18 @@ try_again:
* the image with the error target. Thus, the full set of PVs is
* supplied - knowing that only the image with the error target
* will be affected.
+ *
+ * - If this is the duplicating top-level LV, only extract
+ * any failed metadata devices.
*/
+
+ /* never extract top-level raid1 images, because they are stacked LVs (e.g. raid5) */
if (!_raid_extract_images(lv, raid_seg->area_count - match_count,
- partial_segment_removed ?
+ (partial_segment_removed || dm_list_empty(remove_pvs)) ?
&lv->vg->pvs : remove_pvs, 0 /* Don't shift */,
- &old_lvs, &old_lvs)) {
- log_error("Failed to remove the specified images from %s/%s",
- lv->vg->name, lv->name);
+ &old_lvs, duplicating ? NULL : &old_lvs)) {
+ log_error("Failed to remove the specified images from %s",
+ display_lvname(lv));
return 0;
}
@@ -4550,159 +10015,454 @@ try_again:
* Now that they are extracted and visible, make the system aware
* of their new names.
*/
- dm_list_iterate_items(lvl, &old_lvs)
- if (!activate_lv_excl_local(lv->vg->cmd, lvl->lv))
- return_0;
+ if (!_activate_lv_list_excl_local(&old_lvs))
+ return_0;
+#if 1
+ /* Top-level LV needs special treatment of its metadata LVs */
+ if (duplicating) {
+ struct lv_list *lvl_array;
+
+ /* HM FIXME: if we don't need to clear the new metadata LVs, avoid lvlist altogether */
+ RETURN_IF_ZERO((lvl_array = dm_pool_alloc(lv->vg->vgmem, dm_list_size(&old_lvs) * sizeof(*lvl_array))),
+ "lvl_array memory");
+
+ dm_list_init(&new_meta_lvs);
+ sd = 0;
+
+ dm_list_iterate_items(lvl, &old_lvs) {
+ if (!_lv_name_get_string_index(lvl->lv, &s))
+ return 0;
+
+ RETURN_IF_SEG_AREA_COUNT_FALSE(raid_seg, s);
+ /* We only have to allocate the new metadata devs... */
+ if (!__alloc_rmeta_for_lv(seg_lv(raid_seg, s), &lvl_array[sd].lv, allocate_pvs))
+ return 0;
+
+ dm_list_add(&new_meta_lvs, &lvl_array[sd].list);
+ sd++;
+ }
+ }
+#endif
/*
* Skip metadata operation normally done to clear the metadata sub-LVs.
*
* The LV_REBUILD flag is set on the new sub-LVs,
* so they will be rebuilt and we don't need to clear the metadata dev.
+ *
+ * Insert new allocated image component pairs into now empty area slots.
*/
+ for (s = 0, sd = raid_seg->area_count; s < raid_seg->area_count; s++, sd++) {
+ if (seg_type(raid_seg, s) == AREA_UNASSIGNED) {
+ if (!_generate_name_and_set_segment(lv, s, sd, &new_data_lvs, tmp_names))
+ return 0;
- for (s = 0; s < raid_seg->area_count; s++) {
- sd = s + raid_seg->area_count;
+ /* Tell kernel to rebuild the image */
+ seg_lv(raid_seg, s)->status |= LV_REBUILD;
+ }
- if ((seg_type(raid_seg, s) == AREA_UNASSIGNED) &&
- (seg_metatype(raid_seg, s) == AREA_UNASSIGNED)) {
- if (!_generate_name_and_set_segment(lv, s, s, &new_meta_lvs, tmp_names) ||
- !_generate_name_and_set_segment(lv, s, sd, &new_data_lvs, tmp_names))
- return 0;
- } else
- tmp_names[s] = tmp_names[sd] = NULL;
+ if (raid_seg->meta_areas &&
+ seg_metatype(raid_seg, s) == AREA_UNASSIGNED &&
+ !_generate_name_and_set_segment(lv, s, s, &new_meta_lvs, tmp_names))
+ return 0;
}
+PFL();
+ /* This'll reset the rebuild flags passed to the kernel */
+ if (!_lv_update_reload_fns_reset_eliminate_lvs(lv, &old_lvs, NULL))
+ return_0;
+PFL();
+ /* Update new sub-LVs to correct name and clear REBUILD flag in-kernel and in metadata */
+ for (s = 0, sd = raid_seg->area_count; s < raid_seg->area_count; s++, sd++) {
+ if (tmp_names[s])
+ seg_metalv(raid_seg, s)->name = tmp_names[s];
+ if (tmp_names[sd])
+ seg_lv(raid_seg, s)->name = tmp_names[sd];
+ }
+
+ init_mirror_in_sync(0);
+#if 0
+ /* HM FIXME: LV_NOTSYNCED needed to start repair this way, but that leaves it in the metadata */
+ lv->status |= LV_NOTSYNCED;
+ return lv_update_and_reload_origin(lv);
+#else
+ /* HM FIXME: this does not touch LV_NOTSYNCED in the metadata */
if (!lv_update_and_reload_origin(lv))
return_0;
+PFL();
+ return _lv_cond_repair(lv);
+#endif
+}
+
+/* HM Helper: check for @pv listed on @failed_pvs */
+static int _pv_on_list(struct physical_volume *pv, struct dm_list *failed_pvs)
+{
+ struct pv_list *pvl;
+
+ RETURN_IF_ZERO(pv, "pv argument");
+ /* failed_pvs may be empty initially but reference must be present */
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+
+ dm_list_iterate_items(pvl, failed_pvs)
+ if (pvl->pv == pv)
+ return 1;
+
+ return 0;
+}
- if (!_deactivate_and_remove_lvs(lv->vg, &old_lvs))
+/*
+ * HM Helper
+ *
+ * Add @pv to list of @failed_pvs if not yet on
+ *
+ * Returns:
+ * 0 -> already on
+ * 1 -> put on anew
+ * -ENOMEM -> failed to allocate "struct pv_list *" var
+ *
+ */
+static int _add_pv_to_failed_pvs(struct physical_volume *pv, struct dm_list *failed_pvs)
+{
+ struct pv_list *pvl;
+
+ RETURN_IF_ZERO(pv, "pv argument");
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+ /* failed_ps may be empty initially */
+
+ if (_pv_on_list(pv, failed_pvs))
return 0;
- /* Update new sub-LVs to correct name and clear REBUILD flag */
- for (s = 0; s < raid_seg->area_count; s++) {
- sd = s + raid_seg->area_count;
- if (tmp_names[s] && tmp_names[sd]) {
- seg_metalv(raid_seg, s)->name = tmp_names[s];
- seg_lv(raid_seg, s)->name = tmp_names[sd];
- seg_metalv(raid_seg, s)->status &= ~LV_REBUILD;
- seg_lv(raid_seg, s)->status &= ~LV_REBUILD;
+ if (!(pvl = dm_pool_alloc(pv->vg->vgmem, sizeof(*pvl))))
+ return -ENOMEM;
+
+ pvl->pv = pv;
+ dm_list_add(failed_pvs, &pvl->list);
+
+ return 1;
+}
+
+/* Iterate the segments of a sublv and check their allocations vs. missing pvs populating @failed_pvs list */
+static int _find_sub_lv_failed_pvs(struct logical_volume *sublv, uint32_t *failed, struct dm_list *failed_pvs)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+
+ RETURN_IF_ZERO(sublv, "sublv argument");
+ RETURN_IF_ZERO(failed, "failed argument");
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+
+ *failed = 0;
+
+ dm_list_iterate_items(seg, &sublv->segments)
+ for (s = 0; s < seg->area_count; s++)
+ if (seg_type(seg, s) == AREA_PV &&
+ is_missing_pv(seg_pv(seg, s))) {
+ if (_add_pv_to_failed_pvs(seg_pv(seg, s), failed_pvs) < 0)
+ return 0;
+
+ (*failed)++;
+ }
+
+ return 1;
+}
+
+/* HM Helper: find number of @failed_rimage and @failed_rmeta sub LVs and populate @failed_pvs list */
+static int _find_failed_pvs_of_lv(struct logical_volume *lv,
+ struct dm_list *failed_pvs,
+ uint32_t *failed_rimage, uint32_t *failed_rmeta)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+ RETURN_IF_ZERO(failed_rimage, "failed rimage argument");
+ RETURN_IF_ZERO(failed_rmeta, "failed rmeta argument");
+
+ if (_lv_is_duplicating(lv)) {
+ for (s = 0; s < seg->area_count; s++)
+ if (!_find_failed_pvs_of_lv(seg_lv(seg, s), failed_pvs, failed_rimage, failed_rmeta))
+ return 0;
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg->meta_areas &&
+ !_find_sub_lv_failed_pvs(seg_metalv(seg, s), failed_rmeta, failed_pvs))
+ return 0;
}
+
+ return 1;
}
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (!_find_sub_lv_failed_pvs(seg_lv(seg, s), failed_rimage, failed_pvs))
+ return 0;
- /* FIXME: will this discontinue a running rebuild of the replaced legs? */
- /* HM: no, because md will restart based on the recovery_cp offset in the superblock */
- if (!lv_update_and_reload_origin(lv))
- return_0;
+ if (seg->meta_areas &&
+ !_find_sub_lv_failed_pvs(seg_metalv(seg, s), failed_rmeta, failed_pvs))
+ return 0;
+ }
return 1;
}
-int lv_raid_remove_missing(struct logical_volume *lv)
+/*
+ * HM Helper
+ *
+ * Replace @lv with error segment, setting @lv @status,
+ * puting failed PVs on list @failed_pvs and
+ * reporting number of uniquely failed LVs on @*replaced_lvs
+ */
+static int _replace_raid_lv_with_error_segment(struct logical_volume *lv,
+ uint64_t status,
+ struct dm_list *failed_pvs,
+ uint32_t *replaced_lvs)
+{
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+ RETURN_IF_ZERO(replaced_lvs, "replaced LVs argument");
+
+ if (lv_is_on_pvs(lv, failed_pvs)) {
+ log_debug("Replacing %s segments with error target",
+ display_lvname(lv));
+ lv->status |= PARTIAL_LV;
+
+ if (!_replace_lv_with_error_segment(lv))
+ return 0;
+
+ lv->status &= ~PARTIAL_LV;
+ lv->status |= status;
+ (*replaced_lvs)++;
+ }
+
+ return 1;
+}
+
+/*
+ * Replace any image or metadata LVs of @lv with allocation on @failed_pvs
+ * with error segments and return their number in @replaced_lvs
+ */
+static int _replace_lvs_on_failed_pvs_with_error_segments(struct logical_volume *lv,
+ struct dm_list *failed_pvs,
+ uint32_t *replaced_lvs)
{
uint32_t s;
- struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *seg;
- if (!(lv->status & PARTIAL_LV)) {
- log_error(INTERNAL_ERROR "%s/%s is not a partial LV",
- lv->vg->name, lv->name);
- return 0;
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(failed_pvs, "failed pvs list argument");
+ RETURN_IF_ZERO(replaced_lvs, "replaced LVs argument");
+
+ /* Recurse to allow for duplicating LV to work */
+ if (_lv_is_duplicating(lv)) {
+ for (s = 0; s < seg->area_count; s++)
+ if (_replace_lvs_on_failed_pvs_with_error_segments(seg_lv(seg, s), failed_pvs, replaced_lvs))
+ return 0;
+ return 1;
}
- if (!archive(lv->vg))
- return_0;
+ for (s = 0; s < seg->area_count; s++) {
+ if (!_replace_raid_lv_with_error_segment(seg_lv(seg, s), RAID_IMAGE, failed_pvs, replaced_lvs))
+ return 0;
+
+ if (seg->meta_areas &&
+ !_replace_raid_lv_with_error_segment(seg_metalv(seg, s), RAID_META, failed_pvs, replaced_lvs))
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * API function:
+ *
+ * replace any partial data and metadata LVs with error segments
+ */
+int lv_raid_remove_missing(struct logical_volume *lv)
+{
+ uint32_t replaced_lvs = 0, failed_rimage = 0, failed_rmeta = 0, max_failed;
+ struct lv_segment *seg;
+ struct dm_list failed_pvs;
+
+PFL();
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_ZERO(seg_is_raid(seg), "raid segment to remove images from");
+ RETURN_IF_ZERO(lv->status & PARTIAL_LV, "partial LV");
+
+ dm_list_init(&failed_pvs);
log_debug("Attempting to remove missing devices from %s LV, %s",
lvseg_name(seg), lv->name);
/*
- * FIXME: Make sure # of compromised components will not affect RAID
+ * Find the amount of rimage and rmeta devices on failed PVs of @lv
+ * and put the failed pvs on failed_pvs list
*/
- for (s = 0; s < seg->area_count; s++)
- if (!_replace_lv_with_error_segment(seg_lv(seg, s)) ||
- !_replace_lv_with_error_segment(seg_metalv(seg, s)))
- return 0;
+ log_debug_metadata("Scanning all rimage and rmeta sub LVs and all their segments of %s for any failed pvs",
+ display_lvname(lv));
+ if (!_find_failed_pvs_of_lv(lv, &failed_pvs, &failed_rimage, &failed_rmeta))
+ return 0;
- if (!lv_update_and_reload(lv))
+PFL();
+ /* Exit in case LV has no allocations on any failed pvs */
+ if (dm_list_empty(&failed_pvs))
+ return 1;
+
+ log_debug_metadata("lv %s is mapped to %u failed pvs", display_lvname(lv), dm_list_size(&failed_pvs));
+
+ /* Define maximum sub LVs which are allowed to fail */
+ max_failed = (seg_is_striped_raid(seg) && !seg_is_any_raid10(seg)) ?
+ seg->segtype->parity_devs : seg->data_copies - 1;
+ if (failed_rimage > max_failed ||
+ failed_rmeta > seg->area_count - 1)
+ log_error("RAID LV %s is not operational with %u pvs missing!",
+ display_lvname(lv), dm_list_size(&failed_pvs));
+
+ if (!archive(lv->vg))
+ return_0;
+
+ /*
+ * Only error those rimage/rmeta devices which have allocations
+ * on @failed_pvs and only their failed segments in multi-segmented
+ * rimage/rmeta sub LVs rather than the whole sublv!
+ */
+ log_debug_metadata("Replacing all failed segments in LV %s with error types",
+ display_lvname(lv));
+
+ if (!_replace_lvs_on_failed_pvs_with_error_segments(lv, &failed_pvs, &replaced_lvs))
+ return 0;
+
+ if (replaced_lvs &&
+ !lv_update_and_reload(lv))
return_0;
return 1;
}
+/* Return 1 if @lv has failed */
+static int _lv_has_failed(struct logical_volume *lv)
+{
+ RETURN_IF_LV_SEG_ZERO(lv, first_seg(lv));
+
+ return (lv->status & PARTIAL_LV) ||
+ lv_is_virtual(lv);
+}
+
/* Return 1 if a partial raid LV can be activated redundantly */
static int _partial_raid_lv_is_redundant(const struct logical_volume *lv)
{
- struct lv_segment *raid_seg = first_seg(lv);
- uint32_t copies;
- uint32_t i, s, rebuilds_per_group = 0;
- uint32_t failed_components = 0;
+ struct lv_segment *raid_seg;
+ uint32_t failed_rimage = 0, failed_rmeta = 0, min_devs, s;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (raid_seg = first_seg(lv)));
+
+ min_devs = raid_seg->segtype->parity_devs ?: 1;
- if (seg_is_raid10(raid_seg)) {
- /* FIXME: We only support 2-way mirrors in RAID10 currently */
- copies = 2;
- for (i = 0; i < raid_seg->area_count * copies; i++) {
+ /*
+ * Count number of failed rimage and rmeta components seperately
+ * so that we can activate an raid set with at least one metadata
+ * dev (mandatory unless raid0) and quorum number of data devs
+ */
+ for (s = 0; s < raid_seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(raid_seg, s) == AREA_LV, "data sub lv");
+
+ if (_lv_has_failed(seg_lv(raid_seg, s)))
+ failed_rimage++;
+
+ if (raid_seg->meta_areas) {
+ RETURN_IF_ZERO(seg_metatype(raid_seg, s) == AREA_LV, "meta sub lv");
+
+ if (_lv_has_failed(seg_lv(raid_seg, s)))
+ failed_rmeta++;
+ }
+ }
+
+ /* No devices failed -> fully redundant */
+ if (failed_rimage + failed_rmeta == 0)
+ return 1;
+
+ /* All data devices have failed */
+ if (failed_rimage == raid_seg->area_count) {
+ log_verbose("All data components of raid LV %s have failed.",
+ display_lvname(lv));
+ return 0; /* Insufficient redundancy to activate */
+ }
+
+ /* We require at least one metadata component to retrieve raid set state */
+ if (failed_rmeta == raid_seg->area_count) {
+ log_error("All metadata devices of %s have failed! Can't retrive raid set state!",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /*
+ * raid10:
+ *
+ * - if #devices is divisable by number of data copies,
+ * the data copies form 'mirror groups' like 'AAABBB' for 3 data copies and 6 stripes ->
+ * check that each of the mirror groups has at least 2 data copies available
+ *
+ * - of not, we have an odd number of devices causing just _one_ mirror group ->
+ * check that at least one data copy is available
+ *
+ */
+ if (seg_is_any_raid10(raid_seg)) {
+ uint32_t i;
+ uint32_t mirror_groups = (raid_seg->area_count % raid_seg->data_copies) ?
+ 1 : raid_seg->data_copies;
+ uint32_t rebuilds_per_group = 0;
+
+ for (i = 0; i < raid_seg->area_count * mirror_groups; i++) {
s = i % raid_seg->area_count;
- if (!(i % copies))
+ if (!(i % mirror_groups))
rebuilds_per_group = 0;
- if ((seg_lv(raid_seg, s)->status & PARTIAL_LV) ||
- (seg_metalv(raid_seg, s)->status & PARTIAL_LV) ||
- lv_is_virtual(seg_lv(raid_seg, s)) ||
- lv_is_virtual(seg_metalv(raid_seg, s)))
+ if (_lv_has_failed(seg_lv(raid_seg, s)))
rebuilds_per_group++;
- if (rebuilds_per_group >= copies) {
- log_verbose("An entire mirror group has failed in %s.",
+ if (rebuilds_per_group >= raid_seg->data_copies) {
+ log_verbose(mirror_groups == 1 ? "Tue many data copies have failed in %s." :
+ "An entire mirror group has failed in %s.",
display_lvname(lv));
- return 0; /* Insufficient redundancy to activate */
+ return 0; /* Insufficient redundancy to activate */
}
}
- return 1; /* Redundant */
- }
-
- for (s = 0; s < raid_seg->area_count; s++) {
- if ((seg_lv(raid_seg, s)->status & PARTIAL_LV) ||
- (seg_metalv(raid_seg, s)->status & PARTIAL_LV) ||
- lv_is_virtual(seg_lv(raid_seg, s)) ||
- lv_is_virtual(seg_metalv(raid_seg, s)))
- failed_components++;
- }
-
- if (failed_components && seg_is_any_raid0(raid_seg)) {
- log_verbose("No components of raid LV %s may fail",
- display_lvname(lv));
- return 0;
+ } else if (failed_rimage) {
+ /* Check raid0* */
+ if (seg_is_any_raid0(raid_seg)) {
+ log_verbose("No data components of %s LV %s may fail",
+ lvseg_name(raid_seg), display_lvname(lv));
+ return 0; /* Insufficient redundancy to activate */
+ }
- } else if (failed_components == raid_seg->area_count) {
- log_verbose("All components of raid LV %s have failed.",
- display_lvname(lv));
- return 0; /* Insufficient redundancy to activate */
- } else if (raid_seg->segtype->parity_devs &&
- (failed_components > raid_seg->segtype->parity_devs)) {
- log_verbose("More than %u components from %s %s have failed.",
- raid_seg->segtype->parity_devs,
- lvseg_name(raid_seg),
- display_lvname(lv));
- return 0; /* Insufficient redundancy to activate */
+ /* Check for mirrored/parity raid being redundant */
+ if (failed_rimage > min_devs) {
+ log_verbose("More than %u components from %s %s have failed.",
+ min_devs, lvseg_name(raid_seg), display_lvname(lv));
+ return 0; /* Insufficient redundancy to activate */
+ }
}
- return 1;
+ return 1; /* @lv is redundant -> user data intact */
}
-/* Sets *data to 1 if the LV cannot be activated without data loss */
+/* Sets *@data to 1 if @lv cannot be activated without data loss */
static int _lv_may_be_activated_in_degraded_mode(struct logical_volume *lv, void *data)
{
- int *not_capable = (int *)data;
+ int *not_capable;
uint32_t s;
struct lv_segment *seg;
- if (*not_capable)
- return 1; /* No further checks needed */
+ RETURN_IF_ZERO(lv, "lv argument");
+ RETURN_IF_ZERO(data, "data argument");
- if (!(lv->status & PARTIAL_LV))
- return 1;
+ not_capable = (int*) data;
+
+ if (*not_capable ||
+ !(lv->status & PARTIAL_LV))
+ return 1; /* No further checks needed */
if (lv_is_raid(lv)) {
*not_capable = !_partial_raid_lv_is_redundant(lv);
@@ -4724,10 +10484,20 @@ static int _lv_may_be_activated_in_degraded_mode(struct logical_volume *lv, void
return 1;
}
+/*
+ * API function:
+ *
+ * check if @clv supported degraded activation
+ */
int partial_raid_lv_supports_degraded_activation(const struct logical_volume *clv)
{
int not_capable = 0;
- struct logical_volume * lv = (struct logical_volume *)clv; /* drop const */
+ struct logical_volume *lv;
+
+ RETURN_IF_LV_SEG_ZERO(clv, first_seg(clv));
+ RETURN_IF_ZERO(seg_is_raid(first_seg(clv)), "raid segment passed in");
+
+ lv = (struct logical_volume*) clv; /* drop const */
if (!_lv_may_be_activated_in_degraded_mode(lv, &not_capable) || not_capable)
return_0;
@@ -4739,3 +10509,238 @@ int partial_raid_lv_supports_degraded_activation(const struct logical_volume *cl
return !not_capable;
}
+
+/* HM raid10_far helper: ensure consistent image LVs have been passed in for @seg */
+static int _raid10_seg_images_sane(struct lv_segment *seg)
+{
+ uint32_t len = 0, s;
+ struct logical_volume *slv;
+
+ RETURN_IF_SEG_ZERO(seg);
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+
+ for (s = 0; s < seg->area_count; s++) {
+ RETURN_IF_ZERO(seg_type(seg, s) == AREA_LV, "raid10_far image LV");
+ slv = seg_lv(seg, s);
+ if (len) {
+ RETURN_IF_ZERO((slv->le_count == len), "consistent raid10_far image LV length");
+ RETURN_IF_NONZERO((slv->le_count % seg->data_copies),
+ "raid10_far image LV length divisibility by #data_copies");
+ } else
+ RETURN_IF_ZERO((len = slv->le_count), "raid10_far image LV length");
+ }
+
+ return 1;
+}
+
+/* HM raid10_far helper: split up all data image sub LVs of @lv from @start LE to @end LE in @split_len increments */
+static int _split_lv_data_images(struct logical_volume *lv,
+ uint32_t start, uint32_t end,
+ uint32_t split_len)
+{
+ uint32_t s;
+ struct lv_segment *seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (seg = first_seg(lv)));
+ RETURN_IF_SEG_AREA_COUNT_FALSE(seg, 0);
+ RETURN_IF_ZERO(split_len < seg->len, "suitable agument split_len");
+
+ for (s = 0; s < seg->area_count; s++) {
+ uint32_t le;
+ struct logical_volume *slv = seg_lv(seg, s);
+
+ /* Split the image up */
+ for (le = start; le < end; le += split_len)
+ if (!lv_split_segment(slv, le))
+ return_0;
+ }
+
+ return 1;
+}
+
+/*
+ * HM API function:
+ *
+ * Reorder segments for @extents length in @lv;
+ * @extend flag indicates extension/reduction request.
+ *
+ * raid10_far arranges stripe zones with differing data block rotation
+ * one after the other and does data_copies across them.
+ *
+ * In order to resize those, we have to split them up by # data copies
+ * and reorder the split segments.
+ *
+ * Called via the lv_extend()/lv_reduce() API
+ *
+ */
+int lv_raid10_far_reorder_segments(struct logical_volume *lv, uint32_t extents, int extend)
+{
+ uint32_t le, s;
+ struct logical_volume *slv;
+ struct lv_segment *seg, *raid_seg;
+
+ RETURN_IF_LV_SEG_ZERO(lv, (raid_seg = first_seg(lv)));
+ RETURN_IF_ZERO(extents, "extents to reorder");
+ /* We may only reorder in case of raid10 far */
+ RETURN_IF_ZERO(seg_is_raid10_far(raid_seg), "raid10_far segment");
+
+PFLA("extents=%u lv->le_count=%u raid_seg->area_len=%u", extents, lv->le_count, raid_seg->area_len);
+ /* If this is a new LV -> no need to reorder */
+ if (!lv->le_count && extents == lv->le_count)
+ return 1;
+
+ /* Check properties of raid10_far segment for compaitbility */
+ if (!_raid10_seg_images_sane(raid_seg))
+ return 0;
+PFL();
+ if (extend) {
+ uint32_t new_split_len, prev_le_count, prev_split_len;
+
+ /*
+ * We've got new extents added to the image LVs which
+ * are in the wrong place; got to split them up to insert
+ * the split ones into the previous raid10_far ones.
+ */
+ /* Ensure proper segment boundaries so that we can move segments */
+
+ /* Calculate previous length, because the LV is already grwon when we get here */
+ prev_le_count = raid_rimage_extents(raid_seg->segtype, lv->le_count - extents,
+ raid_seg->area_count, raid_seg->data_copies);
+ prev_split_len = prev_le_count / raid_seg->data_copies;
+
+ /* Split segments of all image LVs for reordering */
+ if (!_split_lv_data_images(lv, prev_split_len /* start */, prev_le_count, prev_split_len))
+ return 0;
+
+ /* Split the newly allocated part of the images up */
+ slv = seg_lv(raid_seg, 0);
+ new_split_len = (slv->le_count - prev_le_count) / raid_seg->data_copies;
+ if (!_split_lv_data_images(lv, prev_le_count /* start */, slv->le_count, new_split_len))
+ return 0;
+PFL();
+ /*
+ * Reorder segments of the image LVs so that the split off #data_copies
+ * segments of the new allocation get moved to the ends of the split off
+ * previous ones.
+ *
+ * E.g. with 3 data copies before/after reordering an image LV:
+ *
+ * P1, P2, P3, N1, N2, N3 -> P1, N1, P2, N2, P3, N3
+ */
+ for (s = 0; s < raid_seg->area_count; s++) {
+ uint32_t le2;
+ struct lv_segment *seg2;
+
+ slv = seg_lv(raid_seg, s);
+ for (le = prev_split_len, le2 = prev_le_count + new_split_len;
+ le2 < slv->le_count;
+ le += prev_split_len, le2 += new_split_len) {
+ seg = find_seg_by_le(slv, le);
+ seg2 = find_seg_by_le(slv, le2);
+ dm_list_move(seg->list.n, &seg2->list);
+ }
+ }
+
+ /*
+ * Reduce...
+ */
+ } else {
+ uint32_t reduction, split_len;
+
+ /* Only reorder in case of partial reduction; deletion does not require it */
+ if (extents >= raid_seg->len)
+ return 1;
+
+ /* Ensure proper segment boundaries so that we can move segments */
+ slv = seg_lv(raid_seg, 0);
+ reduction = extents / raid_seg->area_count;
+ split_len = slv->le_count / raid_seg->data_copies;
+
+ /* Split segments of all image LVs for reordering */
+ if (!_split_lv_data_images(lv, split_len - reduction, slv->le_count, split_len) ||
+ !_split_lv_data_images(lv, split_len, slv->le_count, split_len))
+ return 0;
+
+ /* Reorder split segments of all image LVs to have those to reduce at the end */
+ for (s = 0; s < raid_seg->area_count; s++) {
+ slv = seg_lv(raid_seg, s);
+ for (le = split_len - reduction; le < slv->le_count; le += split_len) {
+ seg = find_seg_by_le(slv, le);
+ dm_list_move(&slv->segments, &seg->list);
+ }
+ }
+PFL();
+ }
+
+ /* Correct segments start logical extents in all sub LVs of @lv */
+ return _lv_set_image_lvs_start_les(lv);
+}
+
+/*
+ * HM API function
+ *
+ * Create a raid01 (mirrors on top of stripes) LV
+ *
+ * I.e. allocate a "raid01" top-level segment and @data_copies striped LVs
+ * with @stripes each to insert as the top-level segments area LVs.
+ *
+ * Called via the lv_extend() API
+ */
+int lv_create_raid01(struct logical_volume *lv, const struct segment_type *segtype,
+ unsigned data_copies, unsigned stripes,
+ unsigned stripe_size, unsigned region_size,
+ unsigned extents, struct dm_list *allocate_pvs)
+{
+ uint64_t status = RAID_IMAGE | LVM_READ | LVM_WRITE;
+ struct lv_segment *raid01_seg;
+ struct segment_type *image_segtype;
+ struct volume_group *vg;
+
+ RETURN_IF_LV_SEGTYPE_ZERO(lv, segtype);
+ RETURN_IF_ZERO(extents, "extents");
+ RETURN_IF_ZERO(allocate_pvs, "allocate pvs argument");
+ RETURN_IF_NONZERO(dm_list_empty(allocate_pvs), "pvs to allocate on listed");
+ RETURN_IF_NONZERO(stripes < 2, "proper number of stripes");
+
+ data_copies = data_copies < 2 ? 2 : data_copies;
+ vg = lv->vg;
+
+ if (!(image_segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ /* Create the raid01 top-level segment */
+ if (!(raid01_seg = alloc_lv_segment(segtype, lv, 0 /* le */, extents /* len */,
+ 0 /* reshape_len */, status | RAID,
+ 0 /* stripe_size */, NULL,
+ data_copies, extents /* area_len */,
+ data_copies, 0, region_size, 0, NULL))) {
+ log_error("Failed to create %s top-level segment for LV %s",
+ segtype->name, display_lvname(lv));
+ return_0;
+ }
+
+ if (!archive(vg))
+ return_0;
+
+ /* Create the #data_copies striped sub LVs */
+ if (!_lv_create_raid01_image_lvs(lv, raid01_seg, image_segtype, extents,
+ stripes, stripe_size, 0 /* first area */,
+ data_copies, allocate_pvs))
+ return 0;
+
+ dm_list_init(&lv->segments);
+ dm_list_add(&lv->segments, &raid01_seg->list);
+
+ /* Has to be set before calling _check_and_init_region_size() */
+ lv->le_count = raid01_seg->len;
+ lv->size = raid01_seg->len * lv->vg->extent_size;
+
+ if (!_check_and_init_region_size(lv))
+ return 0;
+
+ /* Reset to force rmeta image LV creation in new raid01 segment */
+ raid01_seg->meta_areas = NULL;
+
+ /* If metadata images fail to allocate, remove the LV */
+ return _alloc_and_add_rmeta_devs_for_lv(lv, allocate_pvs) ? 1 : lv_remove(lv);
+}
diff --git a/lib/metadata/replicator_manip.c b/lib/metadata/replicator_manip.c
index 54dc75929..ed34ccab5 100644
--- a/lib/metadata/replicator_manip.c
+++ b/lib/metadata/replicator_manip.c
@@ -298,7 +298,7 @@ int check_replicator_segment(const struct lv_segment *rseg)
}
if (rsite->fall_behind_data) {
log_error("Defined fall_behind_data="
- "%" PRIu64 " for sync replicator %s/%s.",
+ FMTu64 " for sync replicator %s/%s.",
rsite->fall_behind_data, lv->name, rsite->name);
r = 0;
}
@@ -566,7 +566,7 @@ int cmd_vg_read(struct cmd_context *cmd, struct dm_list *cmd_vgs)
/* Iterate through alphabeticaly ordered cmd_vg list */
dm_list_iterate_items(cvl, cmd_vgs) {
- cvl->vg = vg_read(cmd, cvl->vg_name, cvl->vgid, cvl->flags);
+ cvl->vg = vg_read(cmd, cvl->vg_name, cvl->vgid, cvl->flags, 0);
if (vg_read_error(cvl->vg)) {
log_debug_metadata("Failed to vg_read %s", cvl->vg_name);
return 0;
@@ -644,7 +644,7 @@ int lv_read_replicator_vgs(const struct logical_volume *lv)
dm_list_iterate_items(rsite, &first_seg(lv)->replicator->rsites) {
if (!rsite->vg_name)
continue;
- vg = vg_read(lv->vg->cmd, rsite->vg_name, 0, 0); // READ_WITHOUT_LOCK
+ vg = vg_read(lv->vg->cmd, rsite->vg_name, 0, 0, 0); // READ_WITHOUT_LOCK
if (vg_read_error(vg)) {
log_error("Unable to read volume group %s",
rsite->vg_name);
diff --git a/lib/metadata/segtype.c b/lib/metadata/segtype.c
index 02dff4cbd..f6565377f 100644
--- a/lib/metadata/segtype.c
+++ b/lib/metadata/segtype.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -22,6 +22,9 @@ struct segment_type *get_segtype_from_string(struct cmd_context *cmd,
{
struct segment_type *segtype;
+ if (!strcmp(str, "linear"))
+ str = "striped";
+
dm_list_iterate_items(segtype, &cmd->segtypes)
if (!strcmp(segtype->name, str))
return segtype;
@@ -34,3 +37,15 @@ struct segment_type *get_segtype_from_string(struct cmd_context *cmd,
return segtype;
}
+
+struct segment_type *get_segtype_from_flag(struct cmd_context *cmd,
+ uint64_t flag)
+{
+ struct segment_type *segtype;
+
+ dm_list_iterate_items(segtype, &cmd->segtypes)
+ if (flag & segtype->flags)
+ return segtype;
+
+ return get_segtype_from_string(cmd, "");
+}
diff --git a/lib/metadata/segtype.h b/lib/metadata/segtype.h
index 7c5e0c9e9..32dfc072b 100644
--- a/lib/metadata/segtype.h
+++ b/lib/metadata/segtype.h
@@ -28,46 +28,52 @@ struct dm_config_node;
struct dev_manager;
/* Feature flags */
-#define SEG_CAN_SPLIT 0x0000000000000001U
-#define SEG_AREAS_STRIPED 0x0000000000000002U
-#define SEG_AREAS_MIRRORED 0x0000000000000004U
-#define SEG_SNAPSHOT 0x0000000000000008U
-#define SEG_FORMAT1_SUPPORT 0x0000000000000010U
-#define SEG_VIRTUAL 0x0000000000000020U
-#define SEG_CANNOT_BE_ZEROED 0x0000000000000040U
-#define SEG_MONITORED 0x0000000000000080U
-#define SEG_REPLICATOR 0x0000000000000100U
-#define SEG_REPLICATOR_DEV 0x0000000000000200U
-#define SEG_RAID 0x0000000000000400U
-#define SEG_THIN_POOL 0x0000000000000800U
-#define SEG_THIN_VOLUME 0x0000000000001000U
-#define SEG_CACHE 0x0000000000002000U
-#define SEG_CACHE_POOL 0x0000000000004000U
-#define SEG_MIRROR 0x0000000000008000U
-#define SEG_ONLY_EXCLUSIVE 0x0000000000010000U /* In cluster only exlusive activation */
-
-#define SEG_RAID0 0x0000000000020000U
-#define SEG_RAID0_META 0x0000000000040000U
-#define SEG_RAID1 0x0000000000080000U
-#define SEG_RAID10 0x0000000000100000U
-#define SEG_RAID4 0x0000000000200000U
-#define SEG_RAID5_N 0x0000000000400000U
-#define SEG_RAID5_LA 0x0000000000800000U
-#define SEG_RAID5_LS 0x0000000001000000U
-#define SEG_RAID5_RA 0x0000000002000000U
-#define SEG_RAID5_RS 0x0000000004000000U
+#define SEG_CAN_SPLIT UINT64_C(0x0000000000000001)
+#define SEG_AREAS_STRIPED UINT64_C(0x0000000000000002)
+#define SEG_AREAS_MIRRORED UINT64_C(0x0000000000000004)
+#define SEG_SNAPSHOT UINT64_C(0x0000000000000008)
+#define SEG_FORMAT1_SUPPORT UINT64_C(0x0000000000000010)
+#define SEG_VIRTUAL UINT64_C(0x0000000000000020)
+#define SEG_CANNOT_BE_ZEROED UINT64_C(0x0000000000000040)
+#define SEG_MONITORED UINT64_C(0x0000000000000080)
+#define SEG_REPLICATOR UINT64_C(0x0000000000000100)
+#define SEG_REPLICATOR_DEV UINT64_C(0x0000000000000200)
+#define SEG_RAID UINT64_C(0x0000000000000400)
+#define SEG_THIN_POOL UINT64_C(0x0000000000000800)
+#define SEG_THIN_VOLUME UINT64_C(0x0000000000001000)
+#define SEG_CACHE UINT64_C(0x0000000000002000)
+#define SEG_CACHE_POOL UINT64_C(0x0000000000004000)
+#define SEG_MIRROR UINT64_C(0x0000000000008000)
+#define SEG_ONLY_EXCLUSIVE UINT64_C(0x0000000000010000) /* In cluster only exlusive activation */
+#define SEG_CAN_ERROR_WHEN_FULL UINT64_C(0x0000000000020000)
+
+#define SEG_RAID0 UINT64_C(0x0000000000040000)
+#define SEG_RAID0_META UINT64_C(0x0000000000080000)
+#define SEG_RAID1 UINT64_C(0x0000000000100000)
+#define SEG_RAID10_NEAR UINT64_C(0x0000000000200000)
+#define SEG_RAID4 UINT64_C(0x0000000000400000)
+#define SEG_RAID5_N UINT64_C(0x0000000000800000)
+#define SEG_RAID5_LA UINT64_C(0x0000000001000000)
+#define SEG_RAID5_LS UINT64_C(0x0000000002000000)
+#define SEG_RAID5_RA UINT64_C(0x0000000004000000)
+#define SEG_RAID5_RS UINT64_C(0x0000000008000000)
+#define SEG_RAID6_NC UINT64_C(0x0000000010000000)
+#define SEG_RAID6_NR UINT64_C(0x0000000020000000)
+#define SEG_RAID6_ZR UINT64_C(0x0000000040000000)
+#define SEG_RAID6_LA_6 UINT64_C(0x0000000080000000)
+#define SEG_RAID6_LS_6 UINT64_C(0x0000000100000000)
+#define SEG_RAID6_RA_6 UINT64_C(0x0000000200000000)
+#define SEG_RAID6_RS_6 UINT64_C(0x0000000400000000)
+#define SEG_RAID6_N_6 UINT64_C(0x0000000800000000)
+#define SEG_RAID10_FAR UINT64_C(0x0000001000000000)
+#define SEG_RAID10_OFFSET UINT64_C(0x0000002000000000)
+#define SEG_RAID01 UINT64_C(0x0000004000000000)
+
#define SEG_RAID5 SEG_RAID5_LS
-#define SEG_RAID6_NC 0x0000000008000000U
-#define SEG_RAID6_NR 0x0000000010000000U
-#define SEG_RAID6_ZR 0x0000000020000000U
-#define SEG_RAID6_LA_6 0x0000000040000000U
-#define SEG_RAID6_LS_6 0x0000000080000000U
-#define SEG_RAID6_RA_6 0x0000000100000000U
-#define SEG_RAID6_RS_6 0x0000000200000000U
-#define SEG_RAID6_N_6 0x0000000400000000U
#define SEG_RAID6 SEG_RAID6_ZR
+#define SEG_RAID10 SEG_RAID10_NEAR
-#define SEG_UNKNOWN 0x8000000000000000U
+#define SEG_UNKNOWN UINT64_C(0x8000000000000000)
#define segtype_is_cache(segtype) ((segtype)->flags & SEG_CACHE ? 1 : 0)
#define segtype_is_cache_pool(segtype) ((segtype)->flags & SEG_CACHE_POOL ? 1 : 0)
@@ -103,6 +109,7 @@ struct dev_manager;
#define seg_cannot_be_zeroed(seg) ((seg)->segtype->flags & SEG_CANNOT_BE_ZEROED ? 1 : 0)
#define seg_monitored(seg) ((seg)->segtype->flags & SEG_MONITORED ? 1 : 0)
#define seg_only_exclusive(seg) ((seg)->segtype->flags & SEG_ONLY_EXCLUSIVE ? 1 : 0)
+#define seg_can_error_when_full(seg) ((seg)->segtype->flags & SEG_CAN_ERROR_WHEN_FULL ? 1 : 0)
struct segment_type {
struct dm_list list; /* Internal */
@@ -112,6 +119,7 @@ struct segment_type {
struct segtype_handler *ops;
const char *name;
+ const char *descr;
void *library; /* lvm_register_segtype() sets this. */
void *private; /* For the segtype handler to use. */
@@ -160,6 +168,8 @@ struct segtype_handler {
struct segment_type *get_segtype_from_string(struct cmd_context *cmd,
const char *str);
+struct segment_type *get_segtype_from_flag(struct cmd_context *cmd,
+ uint64_t flag);
struct segtype_library;
int lvm_register_segtype(struct segtype_library *seglib,
@@ -180,6 +190,8 @@ struct segment_type *init_unknown_segtype(struct cmd_context *cmd,
int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#endif
+#define SEG_TYPE_NAME_MIRROR "mirror"
+
/* RAID specific seg and segtype checks */
#define SEG_TYPE_NAME_LINEAR "linear"
#define SEG_TYPE_NAME_STRIPED "striped"
@@ -187,7 +199,11 @@ int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#define SEG_TYPE_NAME_RAID0 "raid0"
#define SEG_TYPE_NAME_RAID0_META "raid0_meta"
#define SEG_TYPE_NAME_RAID1 "raid1"
+#define SEG_TYPE_NAME_RAID01 "raid01"
#define SEG_TYPE_NAME_RAID10 "raid10"
+#define SEG_TYPE_NAME_RAID10_NEAR "raid10_near"
+#define SEG_TYPE_NAME_RAID10_FAR "raid10_far"
+#define SEG_TYPE_NAME_RAID10_OFFSET "raid10_offset"
#define SEG_TYPE_NAME_RAID4 "raid4"
#define SEG_TYPE_NAME_RAID5 "raid5"
#define SEG_TYPE_NAME_RAID5_N "raid5_n"
@@ -209,7 +225,13 @@ int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#define segtype_is_raid0_meta(segtype) (((segtype)->flags & SEG_RAID0_META) ? 1 : 0)
#define segtype_is_any_raid0(segtype) (((segtype)->flags & (SEG_RAID0|SEG_RAID0_META)) ? 1 : 0)
#define segtype_is_raid1(segtype) (((segtype)->flags & SEG_RAID1) ? 1 : 0)
-#define segtype_is_raid10(segtype) (((segtype)->flags & SEG_RAID10) ? 1 : 0)
+#define segtype_is_raid01(segtype) (((segtype)->flags & SEG_RAID01) ? 1 : 0)
+#define segtype_is_raid10(segtype) (((segtype)->flags & SEG_RAID10_NEAR) ? 1 : 0)
+#define segtype_is_raid10_near(segtype) segtype_is_raid10(segtype)
+#define segtype_is_raid10_far(segtype) (((segtype)->flags & SEG_RAID10_FAR) ? 1 : 0)
+#define segtype_is_raid10_offset(segtype) (((segtype)->flags & SEG_RAID10_OFFSET) ? 1 : 0)
+#define segtype_is_any_raid10(segtype) (((segtype)->flags & \
+ (SEG_RAID10_NEAR|SEG_RAID10_FAR|SEG_RAID10_OFFSET)) ? 1 : 0)
#define segtype_is_raid4(segtype) (((segtype)->flags & SEG_RAID4) ? 1 : 0)
#define segtype_is_raid5_ls(segtype) (((segtype)->flags & SEG_RAID5_LS) ? 1 : 0)
#define segtype_is_raid5_rs(segtype) (((segtype)->flags & SEG_RAID5_RS) ? 1 : 0)
@@ -223,15 +245,26 @@ int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#define segtype_is_raid6_la_6(segtype) (((segtype)->flags & SEG_RAID6_LA_6) ? 1 : 0)
#define segtype_is_raid6_ra_6(segtype) (((segtype)->flags & SEG_RAID6_RA_6) ? 1 : 0)
#define segtype_is_raid6_n_6(segtype) (((segtype)->flags & SEG_RAID6_N_6) ? 1 : 0)
+#define segtype_is_raid6_zr(segtype) (((segtype)->flags & SEG_RAID6_ZR) ? 1 : 0)
+#define segtype_is_raid6_nc(segtype) (((segtype)->flags & SEG_RAID6_NC) ? 1 : 0)
+#define segtype_is_raid6_nr(segtype) (((segtype)->flags & SEG_RAID6_NR) ? 1 : 0)
#define segtype_is_any_raid6(segtype) (((segtype)->flags & \
- (SEG_RAID6_ZR|SEG_RAID6_NC|SEG_RAID6_NR|SEG_RAID6_N_6)) ? 1 : 0)
-#define segtype_is_striped_raid(segtype) (segtype_is_raid(segtype) && !segtype_is_raid1(segtype))
+ (SEG_RAID6_ZR|SEG_RAID6_NC|SEG_RAID6_NR| \
+ SEG_RAID6_LS_6|SEG_RAID6_LA_6|SEG_RAID6_RS_6|SEG_RAID6_RA_6|SEG_RAID6_N_6)) ? 1 : 0)
+#define segtype_is_striped_raid(segtype) (segtype_is_raid(segtype) && \
+ !(segtype_is_raid1(segtype) || segtype_is_raid01(segtype)))
+#define segtype_is_reshapable_raid(segtype) ((segtype_is_striped_raid(segtype) && !segtype_is_any_raid0(segtype)) || segtype_is_raid10_near(segtype) || segtype_is_raid10_offset(segtype))
#define seg_is_raid0(seg) segtype_is_raid0((seg)->segtype)
#define seg_is_raid0_meta(seg) segtype_is_raid0_meta((seg)->segtype)
#define seg_is_any_raid0(seg) segtype_is_any_raid0((seg)->segtype)
#define seg_is_raid1(seg) segtype_is_raid1((seg)->segtype)
-#define seg_is_raid10(seg) segtype_is_raid10((seg)->segtype)
+#define seg_is_raid01(seg) segtype_is_raid01((seg)->segtype)
+#define seg_is_raid10_near(seg) segtype_is_raid10_near((seg)->segtype)
+#define seg_is_raid10(seg) seg_is_raid10_near((seg))
+#define seg_is_raid10_far(seg) segtype_is_raid10_far((seg)->segtype)
+#define seg_is_raid10_offset(seg) segtype_is_raid10_offset((seg)->segtype)
+#define seg_is_any_raid10(seg) segtype_is_any_raid10((seg)->segtype)
#define seg_is_raid4(seg) segtype_is_raid4((seg)->segtype)
#define seg_is_any_raid5(seg) segtype_is_any_raid5((seg)->segtype)
#define seg_is_raid5_ls(seg) segtype_is_raid5_ls((seg)->segtype)
@@ -246,7 +279,11 @@ int init_raid_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#define seg_is_raid6_la_6(seg) segtype_is_raid6_la_6((seg)->segtype)
#define seg_is_raid6_ra_6(seg) segtype_is_raid6_ra_6((seg)->segtype)
#define seg_is_raid6_n_6(seg) segtype_is_raid6_n_6((seg)->segtype)
+#define seg_is_raid6_zr(seg) segtype_is_raid6_zr((seg)->segtype)
+#define seg_is_raid6_nc(seg) segtype_is_raid6_nc((seg)->segtype)
+#define seg_is_raid6_nr(seg) segtype_is_raid6_nr((seg)->segtype)
#define seg_is_striped_raid(seg) segtype_is_striped_raid((seg)->segtype)
+#define seg_is_reshapable_raid(seg) segtype_is_reshapable_raid((seg)->segtype)
#ifdef REPLICATOR_INTERNAL
int init_replicator_segtype(struct cmd_context *cmd, struct segtype_library *seglib);
@@ -259,6 +296,7 @@ int init_replicator_segtype(struct cmd_context *cmd, struct segtype_library *seg
#define THIN_FEATURE_DISCARDS_NON_POWER_2 (1U << 4)
#define THIN_FEATURE_METADATA_RESIZE (1U << 5)
#define THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND (1U << 6)
+#define THIN_FEATURE_ERROR_IF_NO_SPACE (1U << 7)
#ifdef THIN_INTERNAL
int init_thin_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
@@ -268,6 +306,9 @@ int init_thin_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
int init_cache_segtypes(struct cmd_context *cmd, struct segtype_library *seglib);
#endif
+#define CACHE_FEATURE_POLICY_MQ (1U << 0)
+#define CACHE_FEATURE_POLICY_SMQ (1U << 1)
+
#define SNAPSHOT_FEATURE_FIXED_LEAK (1U << 0) /* version 1.12 */
#ifdef SNAPSHOT_INTERNAL
diff --git a/lib/metadata/thin_manip.c b/lib/metadata/thin_manip.c
index e617b3c58..24c68a852 100644
--- a/lib/metadata/thin_manip.c
+++ b/lib/metadata/thin_manip.c
@@ -21,6 +21,7 @@
#include "defaults.h"
#include "display.h"
+/* TODO: drop unused no_update */
int attach_pool_message(struct lv_segment *pool_seg, dm_thin_message_t type,
struct logical_volume *lv, uint32_t delete_id,
int no_update)
@@ -237,18 +238,107 @@ int pool_below_threshold(const struct lv_segment *pool_seg)
}
/*
+ * Detect overprovisioning and check lvm2 is configured for auto resize.
+ *
+ * If passed LV is thin volume/pool, check first only this one for overprovisiong.
+ * Lots of test combined together.
+ * Test is not detecting status of dmeventd, too complex for now...
+ */
+int pool_check_overprovisioning(const struct logical_volume *lv)
+{
+ const struct lv_list *lvl;
+ const struct seg_list *sl;
+ const struct logical_volume *pool_lv = NULL;
+ struct cmd_context *cmd = lv->vg->cmd;
+ const char *txt = "";
+ uint64_t thinsum = 0, poolsum = 0, sz = ~0;
+ int threshold, max_threshold = 0;
+ int percent, min_percent = 100;
+ int more_pools = 0;
+
+ /* When passed thin volume, check related pool first */
+ if (lv_is_thin_volume(lv))
+ pool_lv = first_seg(lv)->pool_lv;
+ else if (lv_is_thin_pool(lv))
+ pool_lv = lv;
+
+ if (pool_lv) {
+ poolsum += pool_lv->size;
+ dm_list_iterate_items(sl, &pool_lv->segs_using_this_lv)
+ thinsum += sl->seg->lv->size;
+
+ if (thinsum <= poolsum)
+ return 1; /* All thins fit into this thin pool */
+ }
+
+ /* Sum all thins and all thin pools in VG */
+ dm_list_iterate_items(lvl, &lv->vg->lvs) {
+ if (!lv_is_thin_pool(lvl->lv))
+ continue;
+
+ threshold = find_config_tree_int(cmd, activation_thin_pool_autoextend_threshold_CFG,
+ lv_config_profile(lvl->lv));
+ percent = find_config_tree_int(cmd, activation_thin_pool_autoextend_percent_CFG,
+ lv_config_profile(lvl->lv));
+ if (threshold > max_threshold)
+ max_threshold = threshold;
+ if (percent < min_percent)
+ min_percent = percent;
+
+ if (lvl->lv == pool_lv)
+ continue; /* Skip iteration for already checked thin pool */
+
+ more_pools++;
+ poolsum += lvl->lv->size;
+ dm_list_iterate_items(sl, &lvl->lv->segs_using_this_lv)
+ thinsum += sl->seg->lv->size;
+ }
+
+ if (thinsum <= poolsum)
+ return 1; /* All fits for all pools */
+
+ if ((sz = vg_size(lv->vg)) < thinsum)
+ /* Thin sum size is above VG size */
+ txt = " and the size of whole volume group";
+ else if ((sz = vg_free(lv->vg)) < thinsum)
+ /* Thin sum size is more then free space in a VG */
+ txt = !sz ? "" : " and the amount of free space in volume group";
+ else if ((max_threshold > 99) || !min_percent)
+ /* There is some free space in VG, but it is not configured
+ * for growing - threshold is 100% or percent is 0% */
+ sz = poolsum;
+ else
+ sz = ~0; /* No warning */
+
+ if (sz != ~0) {
+ log_warn("WARNING: Sum of all thin volume sizes (%s) exceeds the "
+ "size of thin pool%s%s%s (%s)!",
+ display_size(cmd, thinsum),
+ more_pools ? "" : " ",
+ more_pools ? "s" : display_lvname(pool_lv),
+ txt,
+ (sz > 0) ? display_size(cmd, sz) : "no free space in volume group");
+ if (max_threshold > 99)
+ log_print_unless_silent("For thin pool auto extension activation/thin_pool_autoextend_threshold should be below 100.");
+ if (!min_percent)
+ log_print_unless_silent("For thin pool auto extension activation/thin_pool_autoextend_percent should be above 0.");
+ }
+
+ return 1;
+}
+
+/*
* Validate given external origin could be used with thin pool
*/
int pool_supports_external_origin(const struct lv_segment *pool_seg, const struct logical_volume *external_lv)
{
uint32_t csize = pool_seg->chunk_size;
- if ((external_lv->size < csize) || (external_lv->size % csize)) {
- /* TODO: Validate with thin feature flag once, it will be supported */
- log_error("Can't use \"%s/%s\" as external origin with \"%s/%s\" pool. "
+ if (((external_lv->size < csize) || (external_lv->size % csize)) &&
+ !thin_pool_feature_supported(pool_seg->lv, THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND)) {
+ log_error("Can't use \"%s\" as external origin with \"%s\" pool. "
"Size %s is not a multiple of pool's chunk size %s.",
- external_lv->vg->name, external_lv->name,
- pool_seg->lv->vg->name, pool_seg->lv->name,
+ display_lvname(external_lv), display_lvname(pool_seg->lv),
display_size(external_lv->vg->cmd, external_lv->size),
display_size(external_lv->vg->cmd, csize));
return 0;
@@ -336,7 +426,7 @@ static int _check_pool_create(const struct logical_volume *lv)
int update_pool_lv(struct logical_volume *lv, int activate)
{
- int monitored;
+ int monitored = DMEVENTD_MONITOR_IGNORE;
int ret = 1;
if (!lv_is_thin_pool(lv)) {
@@ -362,28 +452,30 @@ int update_pool_lv(struct logical_volume *lv, int activate)
display_lvname(lv));
return 0;
}
-
- if (!(ret = _check_pool_create(lv)))
- stack;
-
+ } else
+ activate = 0; /* Was already active */
+
+ if (!(ret = _check_pool_create(lv)))
+ stack; /* Safety guard, needs local presence of thin-pool target */
+ else if (!(ret = suspend_lv_origin(lv->vg->cmd, lv)))
+ /* Send messages */
+ log_error("Failed to suspend and send message %s.", display_lvname(lv));
+ else if (!(ret = resume_lv_origin(lv->vg->cmd, lv)))
+ log_error("Failed to resume %s.", display_lvname(lv));
+
+ if (activate) {
if (!deactivate_lv(lv->vg->cmd, lv)) {
init_dmeventd_monitor(monitored);
return_0;
}
init_dmeventd_monitor(monitored);
-
- /* Unlock memory if possible */
- memlock_unlock(lv->vg->cmd);
}
- /*
- * Resume active pool to send thin messages.
- * origin_only is used to skip check for resumed state
- */
- else if (!resume_lv_origin(lv->vg->cmd, lv)) {
- log_error("Failed to resume %s.", lv->name);
- return 0;
- } else if (!(ret = _check_pool_create(lv)))
- stack;
+
+ /* Unlock memory if possible */
+ memlock_unlock(lv->vg->cmd);
+
+ if (!ret)
+ return_0;
}
dm_list_init(&(first_seg(lv)->thin_messages));
@@ -631,7 +723,7 @@ int check_new_thin_pool(const struct logical_volume *pool_lv)
/* Require pool to have same transaction_id as new */
if (first_seg(pool_lv)->transaction_id != transaction_id) {
log_error("Cannot use thin pool %s with transaction id "
- "%" PRIu64 " for thin volumes. "
+ FMTu64 " for thin volumes. "
"Expected transaction id %" PRIu64 ".",
display_lvname(pool_lv), transaction_id,
first_seg(pool_lv)->transaction_id);
diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c
index fee3767f8..ca2bf9616 100644
--- a/lib/metadata/vg.c
+++ b/lib/metadata/vg.c
@@ -41,6 +41,14 @@ struct volume_group *alloc_vg(const char *pool_name, struct cmd_context *cmd,
return NULL;
}
+ if (!(vg->lvm1_system_id = dm_pool_zalloc(vgmem, NAME_LEN + 1))) {
+ log_error("Failed to allocate VG systemd id.");
+ dm_pool_destroy(vgmem);
+ return NULL;
+ }
+
+ vg->system_id = "";
+
vg->cmd = cmd;
vg->vgmem = vgmem;
vg->alloc = ALLOC_NORMAL;
@@ -53,8 +61,10 @@ struct volume_group *alloc_vg(const char *pool_name, struct cmd_context *cmd,
dm_list_init(&vg->pvs);
dm_list_init(&vg->pvs_to_create);
+ dm_list_init(&vg->pvs_outdated);
dm_list_init(&vg->lvs);
dm_list_init(&vg->tags);
+ dm_list_init(&vg->removed_lvs);
dm_list_init(&vg->removed_pvs);
log_debug_mem("Allocated VG %s at %p.", vg->name, vg);
@@ -121,7 +131,17 @@ char *vg_name_dup(const struct volume_group *vg)
char *vg_system_id_dup(const struct volume_group *vg)
{
- return dm_pool_strdup(vg->vgmem, vg->system_id);
+ return dm_pool_strdup(vg->vgmem, vg->system_id ? : vg->lvm1_system_id ? : "");
+}
+
+char *vg_lock_type_dup(const struct volume_group *vg)
+{
+ return dm_pool_strdup(vg->vgmem, vg->lock_type ? : vg->lock_type ? : "");
+}
+
+char *vg_lock_args_dup(const struct volume_group *vg)
+{
+ return dm_pool_strdup(vg->vgmem, vg->lock_args ? : vg->lock_args ? : "");
}
char *vg_uuid_dup(const struct volume_group *vg)
@@ -601,6 +621,45 @@ int vg_set_clustered(struct volume_group *vg, int clustered)
return 1;
}
+/* The input string has already been validated. */
+
+int vg_set_system_id(struct volume_group *vg, const char *system_id)
+{
+ if (!system_id || !*system_id) {
+ vg->system_id = NULL;
+ return 1;
+ }
+
+ if (systemid_on_pvs(vg)) {
+ log_error("Metadata format %s does not support this type of system ID.",
+ vg->fid->fmt->name);
+ return 0;
+ }
+
+ if (!(vg->system_id = dm_pool_strdup(vg->vgmem, system_id))) {
+ log_error("Failed to allocate memory for system_id in vg_set_system_id.");
+ return 0;
+ }
+
+ if (vg->lvm1_system_id)
+ *vg->lvm1_system_id = '\0';
+
+ return 1;
+}
+
+int vg_set_lock_type(struct volume_group *vg, const char *lock_type)
+{
+ if (!lock_type)
+ lock_type = "none";
+
+ if (!(vg->lock_type = dm_pool_strdup(vg->vgmem, lock_type))) {
+ log_error("vg_set_lock_type %s no mem", lock_type);
+ return 0;
+ }
+
+ return 1;
+}
+
char *vg_attr_dup(struct dm_pool *mem, const struct volume_group *vg)
{
char *repstr;
@@ -615,7 +674,14 @@ char *vg_attr_dup(struct dm_pool *mem, const struct volume_group *vg)
repstr[2] = (vg_is_exported(vg)) ? 'x' : '-';
repstr[3] = (vg_missing_pv_count(vg)) ? 'p' : '-';
repstr[4] = alloc_policy_char(vg->alloc);
- repstr[5] = (vg_is_clustered(vg)) ? 'c' : '-';
+
+ if (vg_is_clustered(vg))
+ repstr[5] = 'c';
+ else if (is_lockd_type(vg->lock_type))
+ repstr[5] = 's';
+ else
+ repstr[5] = '-';
+
return repstr;
}
@@ -670,7 +736,7 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg,
vg->extent_count -= pv_pe_count(pv);
orphan_vg = vg_read_for_update(cmd, vg->fid->fmt->orphan_vg_name,
- NULL, 0);
+ NULL, 0, 0);
if (vg_read_error(orphan_vg))
goto bad;
diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h
index fffe9a7d5..a21af8b06 100644
--- a/lib/metadata/vg.h
+++ b/lib/metadata/vg.h
@@ -44,10 +44,12 @@ struct volume_group {
struct cmd_context *cmd;
struct dm_pool *vgmem;
struct format_instance *fid;
+ const struct format_type *original_fmt; /* Set when processing backup files */
struct lvmcache_vginfo *vginfo;
struct dm_list *cmd_vgs;/* List of wanted/locked and opened VGs */
uint32_t cmd_missing_vgs;/* Flag marks missing VG */
uint32_t seqno; /* Metadata sequence number */
+ unsigned skip_validate_lock_args : 1;
/*
* The parsed on-disk copy of this VG; is NULL if this is the on-disk
@@ -67,7 +69,10 @@ struct volume_group {
struct id id;
const char *name;
const char *old_name; /* Set during vgrename and vgcfgrestore */
- char *system_id;
+ const char *system_id;
+ char *lvm1_system_id;
+ const char *lock_type;
+ const char *lock_args;
uint32_t extent_size;
uint32_t extent_count;
@@ -88,6 +93,24 @@ struct volume_group {
struct dm_list pvs_to_create;
/*
+ * List of physical volumes that carry outdated metadata that belongs
+ * to this VG. Currently only populated when lvmetad is in use. The PVs
+ * on this list could still belong to the VG (but their MDA carries an
+ * out-of-date copy of the VG metadata) or they could no longer belong
+ * to the VG. With lvmetad, this list is populated with all PVs that
+ * have a VGID matching ours, but seqno that is smaller than the
+ * current seqno for the VG. The MDAs on still-in-VG PVs are updated as
+ * part of the normal vg_write/vg_commit process. The MDAs on PVs that
+ * no longer belong to the VG are wiped during vg_read.
+ *
+ * However, even though still-in-VG PVs *may* be on the list, this is
+ * not guaranteed. The in-lvmetad list is cleared whenever out-of-VG
+ * outdated PVs are wiped during vg_read.
+ */
+
+ struct dm_list pvs_outdated;
+
+ /*
* logical volumes
* The following relationship should always hold:
* dm_list_size(lvs) = user visible lv_count + snapshot_count + other invisible LVs
@@ -110,6 +133,11 @@ struct volume_group {
*/
/*
+ * List of removed logical volumes by _lv_reduce.
+ */
+ struct dm_list removed_lvs;
+
+ /*
* List of removed physical volumes by pvreduce.
* They have to get cleared on vg_commit.
*/
@@ -125,6 +153,7 @@ struct volume_group {
struct dm_hash_table *hostnames; /* map of creation hostnames */
struct logical_volume *pool_metadata_spare_lv; /* one per VG */
+ struct logical_volume *sanlock_lv; /* one per VG */
};
struct volume_group *alloc_vg(const char *pool_name, struct cmd_context *cmd,
@@ -140,10 +169,14 @@ void free_orphan_vg(struct volume_group *vg);
char *vg_fmt_dup(const struct volume_group *vg);
char *vg_name_dup(const struct volume_group *vg);
char *vg_system_id_dup(const struct volume_group *vg);
+char *vg_lock_type_dup(const struct volume_group *vg);
+char *vg_lock_args_dup(const struct volume_group *vg);
uint32_t vg_seqno(const struct volume_group *vg);
uint64_t vg_status(const struct volume_group *vg);
int vg_set_alloc_policy(struct volume_group *vg, alloc_policy_t alloc);
int vg_set_clustered(struct volume_group *vg, int clustered);
+int vg_set_system_id(struct volume_group *vg, const char *system_id);
+int vg_set_lock_type(struct volume_group *vg, const char *lock_type);
uint64_t vg_size(const struct volume_group *vg);
uint64_t vg_free(const struct volume_group *vg);
uint64_t vg_extent_size(const struct volume_group *vg);
diff --git a/lib/mirror/mirrored.c b/lib/mirror/mirrored.c
index 7ab11c216..2d122dba5 100644
--- a/lib/mirror/mirrored.c
+++ b/lib/mirror/mirrored.c
@@ -139,7 +139,7 @@ static int _mirrored_text_export(const struct lv_segment *seg, struct formatter
if (seg->region_size)
outf(f, "region_size = %" PRIu32, seg->region_size);
- return out_areas(f, seg, "mirror");
+ return out_areas(f, seg, SEG_TYPE_NAME_MIRROR);
}
#ifdef DEVMAPPER_SUPPORT
@@ -195,7 +195,7 @@ static int _mirrored_target_percent(void **target_state,
pos += used;
}
- if (sscanf(pos, "%" PRIu64 "/%" PRIu64 "%n", &numerator, &denominator,
+ if (sscanf(pos, FMTu64 "/" FMTu64 "%n", &numerator, &denominator,
&used) != 2) {
log_error("Failure parsing mirror status fraction: %s", params);
return 0;
@@ -435,7 +435,8 @@ static int _mirrored_add_target_line(struct dev_manager *dm, struct dm_pool *mem
} else
region_size = adjusted_mirror_region_size(seg->lv->vg->extent_size,
seg->area_len,
- mirr_state->default_region_size, 1);
+ mirr_state->default_region_size, 1,
+ vg_is_clustered(seg->lv->vg));
if (!dm_tree_node_add_mirror_target(node, len))
return_0;
@@ -462,7 +463,7 @@ static int _mirrored_target_present(struct cmd_context *cmd,
if (!_mirrored_checked) {
_mirrored_checked = 1;
- _mirrored_present = target_present(cmd, "mirror", 1);
+ _mirrored_present = target_present(cmd, SEG_TYPE_NAME_MIRROR, 1);
/*
* block_on_error available as "block_on_error" log
@@ -478,7 +479,7 @@ static int _mirrored_target_present(struct cmd_context *cmd,
*/
/* FIXME Move this into libdevmapper */
- if (target_version("mirror", &maj, &min, &patchlevel) &&
+ if (target_version(SEG_TYPE_NAME_MIRROR, &maj, &min, &patchlevel) &&
maj == 1 &&
((min >= 1) ||
(min == 0 && driver_version(vsn, sizeof(vsn)) &&
@@ -572,7 +573,7 @@ static int _mirrored_modules_needed(struct dm_pool *mem,
return 0;
}
- if (!str_list_add(mem, modules, "mirror")) {
+ if (!str_list_add(mem, modules, SEG_TYPE_NAME_MIRROR)) {
log_error("mirror string list allocation failed");
return 0;
}
@@ -619,7 +620,7 @@ struct segment_type *init_segtype(struct cmd_context *cmd)
return_NULL;
segtype->ops = &_mirrored_ops;
- segtype->name = "mirror";
+ segtype->name = SEG_TYPE_NAME_MIRROR;
segtype->flags = SEG_MIRROR | SEG_AREAS_MIRRORED;
#ifdef DEVMAPPER_SUPPORT
diff --git a/lib/misc/.gitignore b/lib/misc/.gitignore
index 37a05fe2b..396884bea 100644
--- a/lib/misc/.gitignore
+++ b/lib/misc/.gitignore
@@ -1 +1,2 @@
+configure.h
lvm-version.h
diff --git a/lib/misc/configure.h.in b/lib/misc/configure.h.in
index 7c0d64c8c..0adb7b106 100644
--- a/lib/misc/configure.h.in
+++ b/lib/misc/configure.h.in
@@ -6,6 +6,10 @@
/* The path to 'cache_check', if available. */
#undef CACHE_CHECK_CMD
+/* Define to 1 if the external 'cache_check' tool requires the
+ --clear-needs-check-flag option */
+#undef CACHE_CHECK_NEEDS_CHECK
+
/* The path to 'cache_dump', if available. */
#undef CACHE_DUMP_CMD
@@ -65,6 +69,13 @@
/* Default DM run directory. */
#undef DEFAULT_DM_RUN_DIR
+/* Default system configuration directory. */
+#undef DEFAULT_ETC_DIR
+
+/* Fall back to LVM1 by default if device-mapper is missing from the kernel.
+ */
+#undef DEFAULT_FALLBACK_TO_LVM1
+
/* Name of default locking directory. */
#undef DEFAULT_LOCK_DIR
@@ -92,6 +103,18 @@
/* Path to LVM system directory. */
#undef DEFAULT_SYS_DIR
+/* Use blkid wiping by default. */
+#undef DEFAULT_USE_BLKID_WIPING
+
+/* Use lvmetad by default. */
+#undef DEFAULT_USE_LVMETAD
+
+/* Use lvmlockd by default. */
+#undef DEFAULT_USE_LVMLOCKD
+
+/* Use lvmpolld by default. */
+#undef DEFAULT_USE_LVMPOLLD
+
/* Define to 1 to enable LVM2 device-mapper interaction. */
#undef DEVMAPPER_SUPPORT
@@ -104,9 +127,27 @@
/* Path to dmeventd pidfile. */
#undef DMEVENTD_PIDFILE
+/* Define to enable compat protocol */
+#undef DM_COMPAT
+
+/* Define default group for device node */
+#undef DM_DEVICE_GID
+
+/* Define default mode for device node */
+#undef DM_DEVICE_MODE
+
+/* Define default owner for device node */
+#undef DM_DEVICE_UID
+
+/* Define to enable ioctls calls to kernel */
+#undef DM_IOCTLS
+
/* Library version */
#undef DM_LIB_VERSION
+/* Define to 1 if you have the `alarm' function. */
+#undef HAVE_ALARM
+
/* Define to 1 if you have `alloca', as a function or macro. */
#undef HAVE_ALLOCA
@@ -123,12 +164,18 @@
/* Define to 1 if you have the <assert.h> header file. */
#undef HAVE_ASSERT_H
+/* Define to 1 if you have the `atexit' function. */
+#undef HAVE_ATEXIT
+
/* Define to 1 if canonicalize_file_name is available. */
#undef HAVE_CANONICALIZE_FILE_NAME
/* Define to 1 if your system has a working `chown' function. */
#undef HAVE_CHOWN
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
/* Define to 1 if you have the <corosync/cmap.h> header file. */
#undef HAVE_COROSYNC_CMAP_H
@@ -138,6 +185,10 @@
/* Define to 1 if you have the <ctype.h> header file. */
#undef HAVE_CTYPE_H
+/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
+ don't. */
+#undef HAVE_DECL_STRERROR_R
+
/* Define to 1 if you have the <dirent.h> header file. */
#undef HAVE_DIRENT_H
@@ -156,6 +207,9 @@
/* Define to 1 if you have the <fcntl.h> header file. */
#undef HAVE_FCNTL_H
+/* Define to 1 if you have the <float.h> header file. */
+#undef HAVE_FLOAT_H
+
/* Define to 1 if you have the `fork' function. */
#undef HAVE_FORK
@@ -213,6 +267,9 @@
/* Define to 1 if you have the <locale.h> header file. */
#undef HAVE_LOCALE_H
+/* Define to 1 if you have the `localtime_r' function. */
+#undef HAVE_LOCALTIME_R
+
/* Define to 1 if `lstat' has the bug that it succeeds when given the
zero-length file name argument. */
#undef HAVE_LSTAT_EMPTY_STRING_BUG
@@ -227,6 +284,9 @@
/* Define to 1 if you have the <malloc.h> header file. */
#undef HAVE_MALLOC_H
+/* Define to 1 if you have the `memchr' function. */
+#undef HAVE_MEMCHR
+
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
@@ -263,9 +323,15 @@
/* Define to 1 if you have the `nl_langinfo' function. */
#undef HAVE_NL_LANGINFO
+/* Define to 1 if you have the <paths.h> header file. */
+#undef HAVE_PATHS_H
+
/* Define to 1 if you have the <pthread.h> header file. */
#undef HAVE_PTHREAD_H
+/* Define to 1 if the system has the type `ptrdiff_t'. */
+#undef HAVE_PTRDIFF_T
+
/* Define to 1 if you have the <readline/history.h> header file. */
#undef HAVE_READLINE_HISTORY_H
@@ -276,6 +342,9 @@
and to 0 otherwise. */
#undef HAVE_REALLOC
+/* Define to 1 if you have the `realpath' function. */
+#undef HAVE_REALPATH
+
/* Define to 1 to include support for realtime clock. */
#undef HAVE_REALTIME
@@ -319,9 +388,15 @@
zero-length file name argument. */
#undef HAVE_STAT_EMPTY_STRING_BUG
+/* Define if struct stat has a field st_ctim with timespec for ctime */
+#undef HAVE_STAT_ST_CTIM
+
/* Define to 1 if you have the <stdarg.h> header file. */
#undef HAVE_STDARG_H
+/* Define to 1 if stdbool.h conforms to C99. */
+#undef HAVE_STDBOOL_H
+
/* Define to 1 if you have the <stddef.h> header file. */
#undef HAVE_STDDEF_H
@@ -349,6 +424,9 @@
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
+/* Define to 1 if you have the `strerror_r' function. */
+#undef HAVE_STRERROR_R
+
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
@@ -358,6 +436,12 @@
/* Define to 1 if you have the `strncasecmp' function. */
#undef HAVE_STRNCASECMP
+/* Define to 1 if you have the `strndup' function. */
+#undef HAVE_STRNDUP
+
+/* Define to 1 if you have the `strpbrk' function. */
+#undef HAVE_STRPBRK
+
/* Define to 1 if you have the `strrchr' function. */
#undef HAVE_STRRCHR
@@ -373,7 +457,10 @@
/* Define to 1 if you have the `strtoul' function. */
#undef HAVE_STRTOUL
-/* Define to 1 if `st_rdev' is a member of `struct stat'. */
+/* Define to 1 if you have the `strtoull' function. */
+#undef HAVE_STRTOULL
+
+/* Define to 1 if `st_rdev' is member of `struct stat'. */
#undef HAVE_STRUCT_STAT_ST_RDEV
/* Define to 1 if you have the <syslog.h> header file. */
@@ -429,6 +516,9 @@
/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
+/* Define to 1 if you have the <sys/timerfd.h> header file. */
+#undef HAVE_SYS_TIMERFD_H
+
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
@@ -459,6 +549,9 @@
/* Define to 1 if you have the <utmpx.h> header file. */
#undef HAVE_UTMPX_H
+/* valgrind.h found */
+#undef HAVE_VALGRIND
+
/* Define to 1 if you have the `vfork' function. */
#undef HAVE_VFORK
@@ -474,6 +567,21 @@
/* Define to 1 if `vfork' works. */
#undef HAVE_WORKING_VFORK
+/* Define to 1 if the system has the type `_Bool'. */
+#undef HAVE__BOOL
+
+/* Internalization package */
+#undef INTL_PACKAGE
+
+/* Locale-dependent data */
+#undef LOCALEDIR
+
+/* Define to 1 to include code that uses lvmlockd dlm option. */
+#undef LOCKDDLM_SUPPORT
+
+/* Define to 1 to include code that uses lvmlockd sanlock option. */
+#undef LOCKDSANLOCK_SUPPORT
+
/* Define to 1 if `lstat' dereferences a symlink specified with a trailing
slash. */
#undef LSTAT_FOLLOWS_SLASHED_SYMLINK
@@ -491,6 +599,18 @@
/* Define to 1 to include code that uses lvmetad. */
#undef LVMETAD_SUPPORT
+/* Path to lvmlockd pidfile. */
+#undef LVMLOCKD_PIDFILE
+
+/* Define to 1 to include code that uses lvmlockd. */
+#undef LVMLOCKD_SUPPORT
+
+/* Path to lvmpolld pidfile. */
+#undef LVMPOLLD_PIDFILE
+
+/* Define to 1 to include code that uses lvmpolld. */
+#undef LVMPOLLD_SUPPORT
+
/* Path to lvm binary. */
#undef LVM_PATH
@@ -523,9 +643,6 @@
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
-/* Define to the home page for this package. */
-#undef PACKAGE_URL
-
/* Define to the version of this package. */
#undef PACKAGE_VERSION
@@ -567,6 +684,12 @@
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
+/* Define to 1 if strerror_r returns char *. */
+#undef STRERROR_R_CHAR_P
+
+/* Path to testsuite data */
+#undef TESTSUITE_DATA
+
/* The path to 'thin_check', if available. */
#undef THIN_CHECK_CMD
diff --git a/lib/misc/lib.h b/lib/misc/lib.h
index c879b23ef..b354f50b0 100644
--- a/lib/misc/lib.h
+++ b/lib/misc/lib.h
@@ -20,20 +20,36 @@
#define _LVM_LIB_H
/* HM FIXME: REMOVEME: devel output */
-#if 1
-#define USE_PFL
+#if 0
+#include "dump.h"
#endif
+/* HM FIXME: REMOVEME: devel output */
+#if 0
+#define PFL() printf("%s %u\n", __func__, __LINE__);
+#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
+#else
+#define PFL() ;
+#define PFLA(format, arg...) ;
+#endif
+
+
#include "configure.h"
#define _REENTRANT
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64
-/* Define some portable printing types */
-#define PRIsize_t "zu"
-#define PRIptrdiff_t "td"
-#define PRIpid_t PRId32
+#if defined(__GNUC__)
+#define DM_EXPORTED_SYMBOL(func, ver) \
+ __asm__(".symver " #func "_v" #ver ", " #func "@@DM_" #ver )
+#define DM_EXPORTED_SYMBOL_BASE(func) \
+ __asm__(".symver " #func "_base, " #func "@Base" )
+#else
+#define DM_EXPORTED_SYMBOL(func, ver)
+#define DM_EXPORTED_SYMBOL_BASE(func)
+#endif
+
#include "intl.h"
#include "libdevmapper.h"
diff --git a/lib/misc/lvm-exec.c b/lib/misc/lvm-exec.c
index 273e7f90c..e4145242b 100644
--- a/lib/misc/lvm-exec.c
+++ b/lib/misc/lvm-exec.c
@@ -62,8 +62,11 @@ int exec_cmd(struct cmd_context *cmd, const char *const argv[],
*rstatus = -1;
if (sync_needed)
- if (!sync_local_dev_names(cmd)) /* Flush ops and reset dm cookie */
- return_0;
+ /* Flush ops and reset dm cookie */
+ if (!sync_local_dev_names(cmd)) {
+ log_error("Failed to sync local device names before forking.");
+ return 0;
+ }
log_verbose("Executing:%s", _verbose_args(argv, buf, sizeof(buf)));
@@ -148,8 +151,11 @@ FILE *pipe_open(struct cmd_context *cmd, const char *const argv[],
char buf[PATH_MAX * 2];
if (sync_needed)
- if (!sync_local_dev_names(cmd)) /* Flush ops and reset dm cookie */
- return_0;
+ /* Flush ops and reset dm cookie */
+ if (!sync_local_dev_names(cmd)) {
+ log_error("Failed to sync local device names before forking.");
+ return 0;
+ }
if (pipe(pipefd)) {
log_sys_error("pipe", "");
diff --git a/lib/misc/lvm-file.c b/lib/misc/lvm-file.c
index f0e284120..6fc31cd21 100644
--- a/lib/misc/lvm-file.c
+++ b/lib/misc/lvm-file.c
@@ -273,3 +273,13 @@ int lvm_fclose(FILE *fp, const char *filename)
return EOF;
}
+
+void lvm_stat_ctim(struct timespec *ctim, const struct stat *buf)
+{
+#ifdef HAVE_STAT_ST_CTIM
+ *ctim = buf->st_ctim;
+#else
+ ctim->tv_sec = buf->st_ctime;
+ ctim->tv_nsec = 0;
+#endif
+}
diff --git a/lib/misc/lvm-file.h b/lib/misc/lvm-file.h
index c23d8ad65..a4f332c58 100644
--- a/lib/misc/lvm-file.h
+++ b/lib/misc/lvm-file.h
@@ -62,4 +62,15 @@ void fcntl_unlock_file(int lockfd);
*/
int lvm_fclose(FILE *fp, const char *filename);
+/*
+ * Convert stat->st_ctim status of last change in nanoseconds
+ * uses st_ctime when not available.
+ */
+void lvm_stat_ctim(struct timespec *ts, const struct stat *buf);
+
+/* Inspired by <sys/time.h> timercmp() macro for timeval */
+#define timespeccmp(tsp, usp, cmp)\
+ (((tsp)->tv_sec == (usp)->tv_sec) ?\
+ ((tsp)->tv_nsec cmp (usp)->tv_nsec) :\
+ ((tsp)->tv_sec cmp (usp)->tv_sec))
#endif
diff --git a/lib/misc/lvm-globals.c b/lib/misc/lvm-globals.c
index ee192b026..60397e55e 100644
--- a/lib/misc/lvm-globals.c
+++ b/lib/misc/lvm-globals.c
@@ -26,12 +26,14 @@ static int _verbose_level = VERBOSE_BASE_LEVEL;
static int _silent = 0;
static int _test = 0;
static int _md_filtering = 0;
+static int _fwraid_filtering = 0;
static int _pvmove = 0;
static int _full_scan_done = 0; /* Restrict to one full scan during each cmd */
static int _obtain_device_list_from_udev = DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV;
+static unsigned _external_device_info_source = DEV_EXT_NONE;
static int _trust_cache = 0; /* Don't scan when incomplete VGs encountered */
static int _debug_level = 0;
-static int _debug_classes_logged = DEFAULT_LOGGED_DEBUG_CLASSES;
+static int _debug_classes_logged = 0;
static int _log_cmd_name = 0;
static int _ignorelockingfailure = 0;
static int _security_level = SECURITY_LEVEL;
@@ -74,6 +76,11 @@ void init_md_filtering(int level)
_md_filtering = level;
}
+void init_fwraid_filtering(int level)
+{
+ _fwraid_filtering = level;
+}
+
void init_pvmove(int level)
{
_pvmove = level;
@@ -89,6 +96,11 @@ void init_obtain_device_list_from_udev(int device_list_from_udev)
_obtain_device_list_from_udev = device_list_from_udev;
}
+void init_external_device_info_source(unsigned src)
+{
+ _external_device_info_source = src;
+}
+
void init_trust_cache(int trustcache)
{
_trust_cache = trustcache;
@@ -181,6 +193,11 @@ void set_cmd_name(const char *cmd)
_cmd_name[sizeof(_cmd_name) - 1] = '\0';
}
+const char *get_cmd_name(void)
+{
+ return _cmd_name;
+}
+
void set_sysfs_dir_path(const char *path)
{
strncpy(_sysfs_dir_path, path, sizeof(_sysfs_dir_path) - 1);
@@ -215,6 +232,11 @@ int md_filtering(void)
return _md_filtering;
}
+int fwraid_filtering(void)
+{
+ return _fwraid_filtering;
+}
+
int pvmove_mode(void)
{
return _pvmove;
@@ -230,6 +252,11 @@ int obtain_device_list_from_udev(void)
return _obtain_device_list_from_udev;
}
+unsigned external_device_info_source(void)
+{
+ return _external_device_info_source;
+}
+
int trust_cache(void)
{
return _trust_cache;
diff --git a/lib/misc/lvm-globals.h b/lib/misc/lvm-globals.h
index 4af713bd3..8628a890a 100644
--- a/lib/misc/lvm-globals.h
+++ b/lib/misc/lvm-globals.h
@@ -24,8 +24,10 @@ void init_verbose(int level);
void init_silent(int silent);
void init_test(int level);
void init_md_filtering(int level);
+void init_fwraid_filtering(int level);
void init_pvmove(int level);
void init_full_scan_done(int level);
+void init_external_device_info_source(unsigned src);
void init_obtain_device_list_from_udev(int device_list_from_udev);
void init_trust_cache(int trustcache);
void init_debug(int level);
@@ -49,13 +51,16 @@ void init_detect_internal_vg_cache_corruption(int detect);
void init_retry_deactivation(int retry);
void set_cmd_name(const char *cmd_name);
+const char *get_cmd_name(void);
void set_sysfs_dir_path(const char *path);
int test_mode(void);
int md_filtering(void);
+int fwraid_filtering(void);
int pvmove_mode(void);
int full_scan_done(void);
int obtain_device_list_from_udev(void);
+unsigned external_device_info_source(void);
int trust_cache(void);
int verbose_level(void);
int silent_mode(void);
diff --git a/lib/misc/lvm-signal.c b/lib/misc/lvm-signal.c
index d5dac6f77..0d61e73d5 100644
--- a/lib/misc/lvm-signal.c
+++ b/lib/misc/lvm-signal.c
@@ -95,7 +95,7 @@ void sigint_restore(void)
--_handler_installed >= MAX_SIGINTS)
return;
- /* Nesting count went bellow MAX_SIGINTS. */
+ /* Nesting count went below MAX_SIGINTS. */
if (_oldmasked[_handler_installed]) {
sigset_t sigs;
sigprocmask(0, NULL, &sigs);
diff --git a/lib/misc/lvm-string.c b/lib/misc/lvm-string.c
index 4ed1b28dc..5b675ccdc 100644
--- a/lib/misc/lvm-string.c
+++ b/lib/misc/lvm-string.c
@@ -75,7 +75,7 @@ static name_error_t _validate_name(const char *n)
/* Hyphen used as VG-LV separator - ambiguity if LV starts with it */
if (*n == '-')
- return NAME_INVALID_HYPEN;
+ return NAME_INVALID_HYPHEN;
if ((*n == '.') && (!n[1] || (n[1] == '.' && !n[2]))) /* ".", ".." */
return NAME_INVALID_DOTS;
@@ -101,6 +101,39 @@ int validate_name(const char *n)
return (_validate_name(n) == NAME_VALID) ? 1 : 0;
}
+/*
+ * Copy valid systemid characters from source to destination.
+ * Invalid characters are skipped. Copying is stopped
+ * when NAME_LEN characters have been copied.
+ * A terminating NUL is appended.
+ */
+void copy_systemid_chars(const char *src, char *dst)
+{
+ const char *s = src;
+ char *d = dst;
+ int len = 0;
+ char c;
+
+ if (!s || !*s)
+ return;
+
+ /* Skip non-alphanumeric starting characters */
+ while (*s && !isalnum(*s))
+ s++;
+
+ while ((c = *s++)) {
+ if (!isalnum(c) && c != '.' && c != '_' && c != '-' && c != '+')
+ continue;
+
+ *d++ = c;
+
+ if (++len >= NAME_LEN)
+ break;
+ }
+
+ *d = '\0';
+}
+
static const char *_lvname_has_reserved_prefix(const char *lvname)
{
static const char _prefixes[][12] = {
@@ -127,6 +160,8 @@ static const char *_lvname_has_reserved_string(const char *lvname)
"_pmspare",
"_rimage",
"_rmeta",
+ "_rdimage",
+ "_rdmeta",
"_tdata",
"_tmeta",
"_vorigin"
diff --git a/lib/misc/lvm-string.h b/lib/misc/lvm-string.h
index 6a13e9ea4..251aab821 100644
--- a/lib/misc/lvm-string.h
+++ b/lib/misc/lvm-string.h
@@ -28,7 +28,7 @@ struct logical_volume;
typedef enum name_error {
NAME_VALID = 0,
NAME_INVALID_EMPTY = -1,
- NAME_INVALID_HYPEN = -2,
+ NAME_INVALID_HYPHEN = -2,
NAME_INVALID_DOTS = -3,
NAME_INVALID_CHARSET = -4,
NAME_INVALID_LENGTH = -5
@@ -44,6 +44,8 @@ int validate_name(const char *n);
name_error_t validate_name_detailed(const char *n);
int validate_tag(const char *n);
+void copy_systemid_chars(const char *src, char *dst);
+
int apply_lvname_restrictions(const char *name);
int is_reserved_lvname(const char *name);
diff --git a/lib/misc/timestamp.c b/lib/misc/timestamp.c
deleted file mode 100644
index 47b5586ad..000000000
--- a/lib/misc/timestamp.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2006 Rackable Systems All rights reserved.
- *
- * This file is part of LVM2.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU Lesser General Public License v.2.1.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * Abstract out the time methods used so they can be adjusted later -
- * the results of these routines should stay in-core. This implementation
- * requires librt.
- */
-
-#include "lib.h"
-#include <stdlib.h>
-
-#include "timestamp.h"
-
-/*
- * The realtime section uses clock_gettime with the CLOCK_MONOTONIC
- * parameter to prevent issues with time warps
- */
-#ifdef HAVE_REALTIME
-
-#include <time.h>
-#include <bits/time.h>
-
-struct timestamp {
- struct timespec t;
-};
-
-struct timestamp *get_timestamp(void)
-{
- struct timestamp *ts = NULL;
-
- if (!(ts = dm_malloc(sizeof(*ts))))
- return_NULL;
-
- if (clock_gettime(CLOCK_MONOTONIC, &ts->t)) {
- log_sys_error("clock_gettime", "get_timestamp");
- return NULL;
- }
-
- return ts;
-}
-
-/* cmp_timestamp: Compare two timestamps
- *
- * Return: -1 if t1 is less than t2
- * 0 if t1 is equal to t2
- * 1 if t1 is greater than t2
- */
-int cmp_timestamp(struct timestamp *t1, struct timestamp *t2)
-{
- if(t1->t.tv_sec < t2->t.tv_sec)
- return -1;
- if(t1->t.tv_sec > t2->t.tv_sec)
- return 1;
-
- if(t1->t.tv_nsec < t2->t.tv_nsec)
- return -1;
- if(t1->t.tv_nsec > t2->t.tv_nsec)
- return 1;
-
- return 0;
-}
-
-#else /* ! HAVE_REALTIME */
-
-/*
- * The !realtime section just uses gettimeofday and is therefore subject
- * to ntp-type time warps - not sure if should allow that.
- */
-
-#include <sys/time.h>
-
-struct timestamp {
- struct timeval t;
-};
-
-struct timestamp *get_timestamp(void)
-{
- struct timestamp *ts = NULL;
-
- if (!(ts = dm_malloc(sizeof(*ts))))
- return_NULL;
-
- if (gettimeofday(&ts->t, NULL)) {
- log_sys_error("gettimeofday", "get_timestamp");
- return NULL;
- }
-
- return ts;
-}
-
-/* cmp_timestamp: Compare two timestamps
- *
- * Return: -1 if t1 is less than t2
- * 0 if t1 is equal to t2
- * 1 if t1 is greater than t2
- */
-int cmp_timestamp(struct timestamp *t1, struct timestamp *t2)
-{
- if(t1->t.tv_sec < t2->t.tv_sec)
- return -1;
- if(t1->t.tv_sec > t2->t.tv_sec)
- return 1;
-
- if(t1->t.tv_usec < t2->t.tv_usec)
- return -1;
- if(t1->t.tv_usec > t2->t.tv_usec)
- return 1;
-
- return 0;
-}
-
-#endif /* HAVE_REALTIME */
-
-void destroy_timestamp(struct timestamp *t)
-{
- dm_free(t);
-}
diff --git a/lib/misc/util.h b/lib/misc/util.h
index a4534697c..52d9f2bb8 100644
--- a/lib/misc/util.h
+++ b/lib/misc/util.h
@@ -15,6 +15,8 @@
#ifndef _LVM_UTIL_H
#define _LVM_UTIL_H
+#include <inttypes.h>
+
#define min(a, b) ({ typeof(a) _a = (a); \
typeof(b) _b = (b); \
(void) (&_a == &_b); \
@@ -33,4 +35,41 @@
#define KERNEL_VERSION(major, minor, release) (((major) << 16) + ((minor) << 8) + (release))
+/* Define some portable printing types */
+#define PRIsize_t "zu"
+#define PRIssize_t "zd"
+#define PRIptrdiff_t "td"
+#define PRIpid_t PRId32
+
+/* For convenience */
+#define FMTsize_t "%" PRIsize_t
+#define FMTssize_t "%" PRIssize_t
+#define FMTptrdiff_t "%" PRIptrdiff_t
+#define FMTpid_t "%" PRIpid_t
+
+#define FMTd8 "%" PRId8
+#define FMTd16 "%" PRId16
+#define FMTd32 "%" PRId32
+#define FMTd64 "%" PRId64
+
+#define FMTi8 "%" PRIi8
+#define FMTi16 "%" PRIi16
+#define FMTi32 "%" PRIi32
+#define FMTi64 "%" PRIi64
+
+#define FMTo8 "%" PRIo8
+#define FMTo16 "%" PRIo16
+#define FMTo32 "%" PRIo32
+#define FMTo64 "%" PRIo64
+
+#define FMTu8 "%" PRIu8
+#define FMTu16 "%" PRIu16
+#define FMTu32 "%" PRIu32
+#define FMTu64 "%" PRIu64
+
+#define FMTx8 "%" PRIx8
+#define FMTx16 "%" PRIx16
+#define FMTx32 "%" PRIx32
+#define FMTx64 "%" PRIx64
+
#endif
diff --git a/lib/mm/memlock.c b/lib/mm/memlock.c
index c5358dc1f..60686ea5f 100644
--- a/lib/mm/memlock.c
+++ b/lib/mm/memlock.c
@@ -27,6 +27,10 @@
#include <sys/resource.h>
#include <malloc.h>
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#endif
+
#ifndef DEVMAPPER_SUPPORT
void memlock_inc_daemon(struct cmd_context *cmd)
@@ -254,12 +258,15 @@ static int _maps_line(const struct dm_config_node *cn, lvmlock_t lock,
}
}
-#ifdef VALGRIND_POOL
+#ifdef HAVE_VALGRIND
/*
* Valgrind is continually eating memory while executing code
* so we need to deactivate check of locked memory size
- */
- sz -= sz; /* = 0, but avoids getting warning about dead assigment */
+ */
+#ifndef VALGRIND_POOL
+ if (RUNNING_ON_VALGRIND)
+#endif
+ sz -= sz; /* = 0, but avoids getting warning about dead assigment */
#endif
*mstats += sz;
@@ -340,7 +347,7 @@ static int _memlock_maps(struct cmd_context *cmd, lvmlock_t lock, size_t *mstats
}
line = _maps_buffer;
- cn = find_config_tree_node(cmd, activation_mlock_filter_CFG, NULL);
+ cn = find_config_tree_array(cmd, activation_mlock_filter_CFG, NULL);
while ((line_end = strchr(line, '\n'))) {
*line_end = '\0'; /* remove \n */
diff --git a/lib/mm/xlate.h b/lib/mm/xlate.h
index 0d243c4c1..efb9ee3fd 100644
--- a/lib/mm/xlate.h
+++ b/lib/mm/xlate.h
@@ -38,27 +38,71 @@
#endif
#if BYTE_ORDER == LITTLE_ENDIAN
-# define xlate16(x) (x)
-# define xlate32(x) (x)
-# define xlate64(x) (x)
-# define xlate16_be(x) bswap_16(x)
-# define xlate32_be(x) bswap_32(x)
-# define xlate64_be(x) bswap_64(x)
+/* New clearer variants. */
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+#define be16_to_cpu(x) bswap_16(x)
+#define be32_to_cpu(x) bswap_32(x)
+#define be64_to_cpu(x) bswap_64(x)
+#define cpu_to_be16(x) bswap_16(x)
+#define cpu_to_be32(x) bswap_32(x)
+#define cpu_to_be64(x) bswap_64(x)
+/* Old alternative variants. */
+#define xlate16(x) (x)
+#define xlate32(x) (x)
+#define xlate64(x) (x)
+#define xlate16_be(x) bswap_16(x)
+#define xlate32_be(x) bswap_32(x)
+#define xlate64_be(x) bswap_64(x)
+
#elif BYTE_ORDER == BIG_ENDIAN
-# define xlate16(x) bswap_16(x)
-# define xlate32(x) bswap_32(x)
-# define xlate64(x) bswap_64(x)
-# define xlate16_be(x) (x)
-# define xlate32_be(x) (x)
-# define xlate64_be(x) (x)
+/* New clearer variants. */
+#define le16_to_cpu(x) bswap_16(x)
+#define le32_to_cpu(x) bswap_32(x)
+#define le64_to_cpu(x) bswap_64(x)
+#define cpu_to_le16(x) bswap_16(x)
+#define cpu_to_le32(x) bswap_32(x)
+#define cpu_to_le64(x) bswap_64(x)
+#define be16_to_cpu(x) (x)
+#define be32_to_cpu(x) (x)
+#define be64_to_cpu(x) (x)
+#define cpu_to_be16(x) (x)
+#define cpu_to_be32(x) (x)
+#define cpu_to_be64(x) (x)
+/* Old alternative variants. */
+#define xlate16(x) bswap_16(x)
+#define xlate32(x) bswap_32(x)
+#define xlate64(x) bswap_64(x)
+#define xlate16_be(x) (x)
+#define xlate32_be(x) (x)
+#define xlate64_be(x) (x)
+
#else
-# include <asm/byteorder.h>
-# define xlate16(x) __cpu_to_le16((x))
-# define xlate32(x) __cpu_to_le32((x))
-# define xlate64(x) __cpu_to_le64((x))
-# define xlate16_be(x) __cpu_to_be16((x))
-# define xlate32_be(x) __cpu_to_be32((x))
-# define xlate64_be(x) __cpu_to_be64((x))
+#include <asm/byteorder.h>
+/* New clearer variants. */
+#define le16_to_cpu(x) __le16_to_cpu(x)
+#define le32_to_cpu(x) __le32_to_cpu(x)
+#define le64_to_cpu(x) __le64_to_cpu(x)
+#define cpu_to_le16(x) __cpu_to_le16(x)
+#define cpu_to_le32(x) __cpu_to_le32(x)
+#define cpu_to_le64(x) __cpu_to_le64(x)
+#define be16_to_cpu(x) __be16_to_cpu(x)
+#define be32_to_cpu(x) __be32_to_cpu(x)
+#define be64_to_cpu(x) __be64_to_cpu(x)
+#define cpu_to_be16(x) __cpu_to_be16(x)
+#define cpu_to_be32(x) __cpu_to_be32(x)
+#define cpu_to_be64(x) __cpu_to_be64(x)
+/* Old alternative variants. */
+#define xlate16(x) __cpu_to_le16(x)
+#define xlate32(x) __cpu_to_le32(x)
+#define xlate64(x) __cpu_to_le64(x)
+#define xlate16_be(x) __cpu_to_be16(x)
+#define xlate32_be(x) __cpu_to_be32(x)
+#define xlate64_be(x) __cpu_to_be64(x)
#endif
#endif
diff --git a/lib/properties/prop_common.h b/lib/properties/prop_common.h
index d7d01af35..0bf05d9c7 100644
--- a/lib/properties/prop_common.h
+++ b/lib/properties/prop_common.h
@@ -15,6 +15,7 @@
#define _LVM_PROP_COMMON_H
#include <stdint.h>
+#include "lib.h" /* HM FIXME: REMOVEME: */
/*
* Common code for getting and setting properties.
@@ -26,9 +27,11 @@ struct lvm_property_type {
unsigned is_settable:1;
unsigned is_string:1;
unsigned is_integer:1;
+ unsigned is_signed:1;
union {
const char *string;
uint64_t integer;
+ int64_t signed_integer;
} value;
int (*get) (const void *obj, struct lvm_property_type *prop);
int (*set) (void *obj, struct lvm_property_type *prop);
@@ -49,6 +52,7 @@ static int _ ## NAME ## _get (const void *obj, struct lvm_property_type *prop) \
{ \
const struct TYPE *VAR = (const struct TYPE *)obj; \
\
+PFLA("%s", "Huhu!!!") \
prop->value.integer = VALUE; \
return 1; \
}
@@ -125,10 +129,12 @@ static int _ ## NAME ## _get (const void *obj, struct lvm_property_type *prop) \
#define BIN 3
#define SIZ 4
#define PCT 5
-#define STR_LIST 6
+#define TIM 6
+#define SNUM 7 /* Signed Number */
+#define STR_LIST 8
#define FIELD_MODIFIABLE 0x00000001
#define FIELD(type, strct, field_type, head, field, width, fn, id, desc, settable) \
- { type, #id, settable, field_type == STR, ((field_type == NUM) || (field_type == BIN) || (field_type == SIZ) || (field_type == PCT)), { .integer = 0 }, _ ## id ## _get, _ ## id ## _set },
+ { type, #id, settable, (field_type == STR || field_type == STR_LIST), ((field_type == NUM) || (field_type == BIN) || (field_type == SIZ) || (field_type == PCT) || (field_type == SNUM)), ((field_type == SNUM) || (field_type == PCT)), { .integer = 0 }, _ ## id ## _get, _ ## id ## _set },
#endif
diff --git a/lib/raid/raid.c b/lib/raid/raid.c
index e3bf3cdb9..5fc0db2a0 100644
--- a/lib/raid/raid.c
+++ b/lib/raid/raid.c
@@ -16,7 +16,6 @@
#include "segtype.h"
#include "display.h"
#include "text_export.h"
-#include "text_import.h"
#include "config.h"
#include "str_list.h"
#include "targets.h"
@@ -25,15 +24,6 @@
#include "metadata.h"
#include "lv_alloc.h"
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
static void _raid_display(const struct lv_segment *seg)
{
unsigned s;
@@ -44,7 +34,7 @@ static void _raid_display(const struct lv_segment *seg)
}
if (seg->meta_areas)
- for (s = 0; seg->meta_areas && s < seg->area_count; ++s)
+ for (s = 0; s < seg->area_count; ++s)
log_print(" Raid Metadata LV%2d\t%s", s, seg_metalv(seg, s)->name);
log_print(" ");
@@ -55,7 +45,7 @@ static int _raid_text_import_area_count(const struct dm_config_node *sn,
{
if (!dm_config_get_uint32(sn, "device_count", area_count) &&
!dm_config_get_uint32(sn, "stripe_count", area_count)) {
- log_error("Couldn't read '{device|stripe}_count' for "
+ log_error("Couldn't read '(device|stripe)_count' for "
"segment '%s'.", dm_config_parent_name(sn));
return 0;
}
@@ -135,6 +125,7 @@ static int _raid_text_import(struct lv_segment *seg,
} attr_import[] = {
{ "region_size", &seg->region_size },
{ "stripe_size", &seg->stripe_size },
+ { "data_copies", &seg->data_copies },
{ "writebehind", &seg->writebehind },
{ "min_recovery_rate", &seg->min_recovery_rate },
{ "max_recovery_rate", &seg->max_recovery_rate },
@@ -143,11 +134,16 @@ static int _raid_text_import(struct lv_segment *seg,
for (i = 0; i < DM_ARRAY_SIZE(attr_import); i++, aip++) {
if (dm_config_has_node(sn, aip->name)) {
if (!dm_config_get_uint32(sn, aip->name, aip->var)) {
- log_error("Couldn't read '%s' for segment %s of logical volume %s.",
- aip->name, dm_config_parent_name(sn), seg->lv->name);
- return 0;
- }
- }
+ if (!strcmp(aip->name, "data_copies")) {
+ log_error("Couldn't read '%s' for segment %s of logical volume %s.",
+ aip->name, dm_config_parent_name(sn), seg->lv->name);
+ return 0;
+ }
+ }
+
+ /* FIXME: only necessary for givne matadata w/o new data_copies */
+ } else if (!strcmp(aip->name, "data_copies"))
+ seg->data_copies = seg_is_raid1(seg) ? seg->area_count : 1;
}
if (!dm_config_get_list(sn, "raids", &cv)) {
@@ -163,13 +159,14 @@ static int _raid_text_import(struct lv_segment *seg,
}
seg->status |= RAID;
-
+ seg->area_len = raid_rimage_extents(seg->segtype, seg->len, seg->area_count - seg->segtype->parity_devs,
+ seg->segtype->parity_devs ? 1 : seg->data_copies);
return 1;
}
static int _raid_text_export(const struct lv_segment *seg, struct formatter *f)
{
- int raid0 = (seg_is_raid0(seg) || seg_is_raid0_meta(seg));
+ int raid0 = seg_is_any_raid0(seg);
if (raid0)
outfc(f, (seg->area_count == 1) ? "# linear" : NULL,
@@ -177,6 +174,8 @@ static int _raid_text_export(const struct lv_segment *seg, struct formatter *f)
else {
outf(f, "device_count = %u", seg->area_count);
+ if (seg->data_copies > 0)
+ outf(f, "data_copies = %" PRIu32, seg->data_copies);
if (seg->region_size)
outf(f, "region_size = %" PRIu32, seg->region_size);
}
@@ -205,7 +204,7 @@ static int _raid_add_target_line(struct dev_manager *dm __attribute__((unused)),
struct dm_tree_node *node, uint64_t len,
uint32_t *pvmove_mirror_count __attribute__((unused)))
{
- int r, delta_disks = 0, data_offset = 0;
+ int delta_disks = 0, data_offset = 0;
uint32_t s;
uint64_t flags = 0;
uint64_t rebuilds[4];
@@ -278,21 +277,25 @@ PFL();
/* RAID 4/5/6 */
params.mirrors = 1;
params.stripes = seg->area_count - seg->segtype->parity_devs;
+PFLA("mirrors=%u stripes=%u", params.mirrors, params.stripes);
} else if (seg_is_any_raid0(seg)) {
params.mirrors = 1;
params.stripes = seg->area_count;
PFLA("mirrors=%u stripes=%u", params.mirrors, params.stripes);
- } else if (seg_is_raid10(seg)) {
- /* RAID 10 only supports 2 mirrors now */
- /* FIXME: HM: is this actually a constraint still? */
- params.mirrors = 2;
- params.stripes = seg->area_count / 2;
+ } else if (seg_is_any_raid10(seg)) {
+ if (!seg->data_copies)
+ seg->data_copies = 2;
+
+ params.data_copies = seg->data_copies;
+ params.stripes = seg->area_count;
+PFLA("mirrors=%u stripes=%u", params.mirrors, params.stripes);
} else {
/* RAID 1 */
params.mirrors = seg->area_count;
params.stripes = 1;
params.writebehind = seg->writebehind;
memcpy(params.writemostly, writemostly, sizeof(params.writemostly));
+PFLA("mirrors=%u stripes=%u", params.mirrors, params.stripes);
}
/* RAID 0 doesn't have a bitmap, thus no region_size, rebuilds etc. */
@@ -307,14 +310,10 @@ PFLA("mirrors=%u stripes=%u", params.mirrors, params.stripes);
params.stripe_size = seg->stripe_size;
params.flags = flags;
-
PFL();
if (!dm_tree_node_add_raid_target_with_params(node, len, &params))
return_0;
PFL();
- r = add_areas_line(dm, seg, node, 0u, seg->area_count);
-PFLA("r=%d", r);
- return r;
return add_areas_line(dm, seg, node, 0u, seg->area_count);
}
@@ -353,8 +352,11 @@ static int _raid_target_percent(void **target_state,
else
break;
}
- if (!pos || (sscanf(pos, "%" PRIu64 "/%" PRIu64 "%n",
- &numerator, &denominator, &i) != 2)) {
+
+ if (!pos ||
+ (sscanf(pos, "%" PRIu64 "/%" PRIu64 "%n",
+ &numerator, &denominator, &i) != 2) ||
+ !denominator) {
log_error("Failed to parse %s status fraction: %s",
(seg) ? seg->segtype->name : "segment", params);
return 0;
@@ -486,30 +488,35 @@ static struct segtype_handler _raid_ops = {
};
static const struct raid_type {
- const char name[12];
+ const char name[19];
unsigned parity;
uint64_t extra_flags;
+ const char *descr; /* HM FIXME: use segtype flags instead and display based on them */
} _raid_types[] = {
- { SEG_TYPE_NAME_RAID0, 0, SEG_RAID0 },
- { SEG_TYPE_NAME_RAID0_META, 0, SEG_RAID0_META },
- { SEG_TYPE_NAME_RAID1, 0, SEG_RAID1 | SEG_AREAS_MIRRORED },
- { SEG_TYPE_NAME_RAID10, 0, SEG_RAID10 | SEG_AREAS_MIRRORED },
- { SEG_TYPE_NAME_RAID4, 1, SEG_RAID4 },
- { SEG_TYPE_NAME_RAID5, 1, SEG_RAID5 }, /* is raid5_ls */
- { SEG_TYPE_NAME_RAID5_N, 1, SEG_RAID5_N },
- { SEG_TYPE_NAME_RAID5_LA, 1, SEG_RAID5_LA },
- { SEG_TYPE_NAME_RAID5_LS, 1, SEG_RAID5_LS },
- { SEG_TYPE_NAME_RAID5_RA, 1, SEG_RAID5_RA },
- { SEG_TYPE_NAME_RAID5_RS, 1, SEG_RAID5_RS },
- { SEG_TYPE_NAME_RAID6, 2, SEG_RAID6 }, /* is raid6_zr */
- { SEG_TYPE_NAME_RAID6_NC, 2, SEG_RAID6_NC },
- { SEG_TYPE_NAME_RAID6_NR, 2, SEG_RAID6_NR },
- { SEG_TYPE_NAME_RAID6_ZR, 2, SEG_RAID6_ZR },
- { SEG_TYPE_NAME_RAID6_LA_6, 2, SEG_RAID6_LA_6 },
- { SEG_TYPE_NAME_RAID6_LS_6, 2, SEG_RAID6_LS_6 },
- { SEG_TYPE_NAME_RAID6_RA_6, 2, SEG_RAID6_RA_6 },
- { SEG_TYPE_NAME_RAID6_RS_6, 2, SEG_RAID6_RS_6 },
- { SEG_TYPE_NAME_RAID6_N_6, 2, SEG_RAID6_N_6 },
+ { SEG_TYPE_NAME_RAID0, 0, SEG_RAID0, "striped/raid4/raid5/raid6/raid10" },
+ { SEG_TYPE_NAME_RAID0_META, 0, SEG_RAID0_META, "striped/raid4/raid5/raid6/raid10" },
+ { SEG_TYPE_NAME_RAID1, 0, SEG_RAID1 | SEG_AREAS_MIRRORED, "linear/raid4(2)/raid5(2)/raid10" },
+ { SEG_TYPE_NAME_RAID01, 0, SEG_RAID01 | SEG_AREAS_MIRRORED, "striped/raid10" },
+ { SEG_TYPE_NAME_RAID10_NEAR, 0, SEG_RAID10_NEAR | SEG_AREAS_MIRRORED, "raid0/1(!(stripes%mirrors)" },
+ { SEG_TYPE_NAME_RAID10_FAR, 0, SEG_RAID10_FAR | SEG_AREAS_MIRRORED | SEG_CAN_SPLIT, "striped/raid0" },
+ { SEG_TYPE_NAME_RAID10_OFFSET, 0, SEG_RAID10_OFFSET | SEG_AREAS_MIRRORED, "raid10_near" },
+ { SEG_TYPE_NAME_RAID10, 0, SEG_RAID10 | SEG_AREAS_MIRRORED /* is raid10_near */, "raid01(!(stripes%mirrors)" },
+ { SEG_TYPE_NAME_RAID4, 1, SEG_RAID4, "striped/raid0/raid1(2)/raid5/raid6" },
+ { SEG_TYPE_NAME_RAID5_N, 1, SEG_RAID5_N, "raid0/striped/raid1(2)/raid4/raid6" },
+ { SEG_TYPE_NAME_RAID5_LA, 1, SEG_RAID5_LA, "raid5*/raid6" },
+ { SEG_TYPE_NAME_RAID5_LS, 1, SEG_RAID5_LS, "raid5*/raid6" },
+ { SEG_TYPE_NAME_RAID5_RA, 1, SEG_RAID5_RA, "raid5*/raid6" },
+ { SEG_TYPE_NAME_RAID5_RS, 1, SEG_RAID5_RS, "raid5*/raid6" },
+ { SEG_TYPE_NAME_RAID5, 1, SEG_RAID5 /* is raid5_ls */, "raid5*/raid6" },
+ { SEG_TYPE_NAME_RAID6_NC, 2, SEG_RAID6_NC, "raid6*" },
+ { SEG_TYPE_NAME_RAID6_NR, 2, SEG_RAID6_NR, "raid6*" },
+ { SEG_TYPE_NAME_RAID6_ZR, 2, SEG_RAID6_ZR, "raid6*" },
+ { SEG_TYPE_NAME_RAID6_LA_6, 2, SEG_RAID6_LA_6, "raid5/raid6*" },
+ { SEG_TYPE_NAME_RAID6_LS_6, 2, SEG_RAID6_LS_6, "raid5/raid6*" },
+ { SEG_TYPE_NAME_RAID6_RA_6, 2, SEG_RAID6_RA_6, "raid5/raid6*" },
+ { SEG_TYPE_NAME_RAID6_RS_6, 2, SEG_RAID6_RS_6, "raid5/raid6*" },
+ { SEG_TYPE_NAME_RAID6_N_6, 2, SEG_RAID6_N_6, "striped/raid0*/raid5/raid6*" },
+ { SEG_TYPE_NAME_RAID6, 2, SEG_RAID6 /* is raid6_zr */, "raid6*" },
};
static struct segment_type *_init_raid_segtype(struct cmd_context *cmd,
@@ -526,6 +533,7 @@ static struct segment_type *_init_raid_segtype(struct cmd_context *cmd,
segtype->ops = &_raid_ops;
segtype->name = rt->name;
+ segtype->descr = rt->descr ?: "";
segtype->flags = SEG_RAID | SEG_ONLY_EXCLUSIVE | rt->extra_flags | monitored;
segtype->parity_devs = rt->parity;
diff --git a/lib/replicator/replicator.c b/lib/replicator/replicator.c
index ae87d7f95..7a8c3a83e 100644
--- a/lib/replicator/replicator.c
+++ b/lib/replicator/replicator.c
@@ -672,7 +672,7 @@ static int _replicator_dev_add_target_line(struct dev_manager *dm,
if (!(slog_dlid = build_dm_uuid(mem, rdev->slog, NULL)))
return_0;
} else if (rdev->slog_name &&
- sscanf(rdev->slog_name, "%" PRIu32, &slog_size) == 1) {
+ sscanf(rdev->slog_name, FMTu32, &slog_size) == 1) {
slog_flags = DM_CORELOG | DM_FORCESYNC;
if (slog_size == 0) {
log_error("Failed to use empty corelog size "
diff --git a/lib/report/columns.h b/lib/report/columns.h
index 1dd2ae0dc..5a250ed12 100644
--- a/lib/report/columns.h
+++ b/lib/report/columns.h
@@ -38,7 +38,7 @@ FIELD(LVS, lv, STR, "LV", lvid, 4, lvfullname, lv_full_name, "Full name of LV in
FIELD(LVS, lv, STR, "Path", lvid, 4, lvpath, lv_path, "Full pathname for LV. Blank for internal LVs.", 0)
FIELD(LVS, lv, STR, "DMPath", lvid, 6, lvdmpath, lv_dm_path, "Internal device-mapper pathname for LV (in /dev/mapper directory).", 0)
FIELD(LVS, lv, STR, "Parent", lvid, 6, lvparent, lv_parent, "For LVs that are components of another LV, the parent LV.", 0)
-FIELD(LVS, lv, STR, "Attr", lvid, 4, lvstatus, lv_attr, "Various attributes - see man page.", 0)
+FIELD(LVSINFOSTATUS, lv, STR, "Attr", lvid, 4, lvstatus, lv_attr, "Various attributes - see man page.", 0)
FIELD(LVS, lv, STR_LIST, "Layout", lvid, 10, lvlayout, lv_layout, "LV layout.", 0)
FIELD(LVS, lv, STR_LIST, "Role", lvid, 10, lvrole, lv_role, "LV role.", 0)
FIELD(LVS, lv, BIN, "InitImgSync", lvid, 10, lvinitialimagesync, lv_initial_image_sync, "Set if mirror/RAID images underwent initial resynchronization.", 0)
@@ -50,20 +50,22 @@ FIELD(LVS, lv, BIN, "AllocLock", lvid, 10, lvallocationlocked, lv_allocation_loc
FIELD(LVS, lv, BIN, "FixMin", lvid, 10, lvfixedminor, lv_fixed_minor, "Set if LV has fixed minor number assigned.", 0)
FIELD(LVS, lv, BIN, "MergeFailed", lvid, 15, lvmergefailed, lv_merge_failed, "Set if snapshot merge failed.", 0)
FIELD(LVS, lv, BIN, "SnapInvalid", lvid, 15, lvsnapshotinvalid, lv_snapshot_invalid, "Set if snapshot LV is invalid.", 0)
-FIELD(LVS, lv, STR, "Health", lvid, 15, lvhealthstatus, lv_health_status, "LV health status.", 0)
FIELD(LVS, lv, BIN, "SkipAct", lvid, 15, lvskipactivation, lv_skip_activation, "Set if LV is skipped on activation.", 0)
+FIELD(LVS, lv, STR, "WhenFull", lvid, 15, lvwhenfull, lv_when_full, "For thin pools, behavior when full.", 0)
FIELD(LVS, lv, STR, "Active", lvid, 6, lvactive, lv_active, "Active state of the LV.", 0)
FIELD(LVS, lv, BIN, "ActLocal", lvid, 10, lvactivelocally, lv_active_locally, "Set if the LV is active locally.", 0)
FIELD(LVS, lv, BIN, "ActRemote", lvid, 10, lvactiveremotely, lv_active_remotely, "Set if the LV is active remotely.", 0)
FIELD(LVS, lv, BIN, "ActExcl", lvid, 10, lvactiveexclusively, lv_active_exclusively, "Set if the LV is active exclusively.", 0)
-FIELD(LVS, lv, NUM, "Maj", major, 3, int32, lv_major, "Persistent major number or -1 if not persistent.", 0)
-FIELD(LVS, lv, NUM, "Min", minor, 3, int32, lv_minor, "Persistent minor number or -1 if not persistent.", 0)
+FIELD(LVS, lv, SNUM, "Maj", major, 3, int32, lv_major, "Persistent major number or -1 if not persistent.", 0)
+FIELD(LVS, lv, SNUM, "Min", minor, 3, int32, lv_minor, "Persistent minor number or -1 if not persistent.", 0)
FIELD(LVS, lv, SIZ, "Rahead", lvid, 6, lvreadahead, lv_read_ahead, "Read ahead setting in current units.", 0)
-FIELD(LVS, lv, SIZ, "LSize", size, 5, size64, lv_size, "Size of LV in current units.", 0)
-FIELD(LVS, lv, SIZ, "MSize", lvid, 6, lvmetadatasize, lv_metadata_size, "For thin pools, the size of the LV that holds the metadata.", 0)
+FIELD(LVS, lv, SIZ, "LSize", lvid, 5, lvsize, lv_size, "Size of LV in current units.", 0)
+FIELD(LVS, lv, SIZ, "MSize", lvid, 6, lvmetadatasize, lv_metadata_size, "For thin and cache pools, the size of the LV that holds the metadata.", 0)
FIELD(LVS, lv, NUM, "#Seg", lvid, 4, lvsegcount, seg_count, "Number of segments in LV.", 0)
FIELD(LVS, lv, STR, "Origin", lvid, 6, origin, origin, "For snapshots, the origin device of this LV.", 0)
FIELD(LVS, lv, SIZ, "OSize", lvid, 5, originsize, origin_size, "For snapshots, the size of the origin device of this LV.", 0)
+FIELD(LVS, lv, STR_LIST, "Ancestors", lvid, 12, lvancestors, lv_ancestors, "Ancestors of this LV.", 0)
+FIELD(LVS, lv, STR_LIST, "Descendants", lvid, 12, lvdescendants, lv_descendants, "Descendants of this LV.", 0)
FIELD(LVS, lv, PCT, "Data%", lvid, 6, datapercent, data_percent, "For snapshot and thin pools and volumes, the percentage full if LV is active.", 0)
FIELD(LVS, lv, PCT, "Snap%", lvid, 6, snpercent, snap_percent, "For snapshots, the percentage full if LV is active.", 0)
FIELD(LVS, lv, PCT, "Meta%", lvid, 6, metadatapercent, metadata_percent, "For thin pools, the percentage of metadata full if LV is active.", 0)
@@ -77,17 +79,18 @@ FIELD(LVS, lv, NUM, "MaxSync", lvid, 7, raidmaxrecoveryrate, raid_max_recovery_r
FIELD(LVS, lv, STR, "Move", lvid, 4, movepv, move_pv, "For pvmove, Source PV of temporary LV created by pvmove.", 0)
FIELD(LVS, lv, STR, "Convert", lvid, 7, convertlv, convert_lv, "For lvconvert, Name of temporary LV created by lvconvert.", 0)
FIELD(LVS, lv, STR, "Log", lvid, 3, loglv, mirror_log, "For mirrors, the LV holding the synchronisation log.", 0)
-FIELD(LVS, lv, STR, "Data", lvid, 4, datalv, data_lv, "For thin pools, the LV holding the associated data.", 0)
-FIELD(LVS, lv, STR, "Meta", lvid, 4, metadatalv, metadata_lv, "For thin pools, the LV holding the associated metadata.", 0)
+FIELD(LVS, lv, STR, "Data", lvid, 4, datalv, data_lv, "For thin and cache pools, the LV holding the associated data.", 0)
+FIELD(LVS, lv, STR, "Meta", lvid, 4, metadatalv, metadata_lv, "For thin and cache pools, the LV holding the associated metadata.", 0)
FIELD(LVS, lv, STR, "Pool", lvid, 4, poollv, pool_lv, "For thin volumes, the thin pool LV for this volume.", 0)
FIELD(LVS, lv, STR_LIST, "LV Tags", tags, 7, tags, lv_tags, "Tags, if any.", 0)
FIELD(LVS, lv, STR, "LProfile", lvid, 8, lvprofile, lv_profile, "Configuration profile attached to this LV.", 0)
-FIELD(LVS, lv, STR, "Time", lvid, 26, lvtime, lv_time, "Creation time of the LV, if known", 0)
+FIELD(LVS, lv, STR, "Lock Args", lvid, 9, lvlockargs, lv_lockargs, "Lock args of the LV used by lvmlockd.", 0)
+FIELD(LVS, lv, TIM, "Time", lvid, 26, lvtime, lv_time, "Creation time of the LV, if known", 0)
FIELD(LVS, lv, STR, "Host", lvid, 10, lvhost, lv_host, "Creation host of the LV, if known.", 0)
FIELD(LVS, lv, STR_LIST, "Modules", lvid, 7, modules, lv_modules, "Kernel device-mapper modules required for this LV.", 0)
-FIELD(LVSINFO, lv, NUM, "KMaj", lvid, 4, lvkmaj, lv_kernel_major, "Currently assigned major number or -1 if LV is not active.", 0)
-FIELD(LVSINFO, lv, NUM, "KMin", lvid, 4, lvkmin, lv_kernel_minor, "Currently assigned minor number or -1 if LV is not active.", 0)
+FIELD(LVSINFO, lv, SNUM, "KMaj", lvid, 4, lvkmaj, lv_kernel_major, "Currently assigned major number or -1 if LV is not active.", 0)
+FIELD(LVSINFO, lv, SNUM, "KMin", lvid, 4, lvkmin, lv_kernel_minor, "Currently assigned minor number or -1 if LV is not active.", 0)
FIELD(LVSINFO, lv, SIZ, "KRahead", lvid, 7, lvkreadahead, lv_kernel_read_ahead, "Currently-in-use read ahead setting in current units.", 0)
FIELD(LVSINFO, lv, STR, "LPerms", lvid, 8, lvpermissions, lv_permissions, "LV permissions.", 0)
FIELD(LVSINFO, lv, BIN, "Suspended", lvid, 10, lvsuspended, lv_suspended, "Set if LV is suspended.", 0)
@@ -102,6 +105,7 @@ FIELD(LVSSTATUS, lv, NUM, "CacheReadHits", lvid, 16, cache_read_hits, cache_read
FIELD(LVSSTATUS, lv, NUM, "CacheReadMisses", lvid, 16, cache_read_misses, cache_read_misses, "Cache read misses.", 0)
FIELD(LVSSTATUS, lv, NUM, "CacheWriteHits", lvid, 16, cache_write_hits, cache_write_hits, "Cache write hits.", 0)
FIELD(LVSSTATUS, lv, NUM, "CacheWriteMisses", lvid, 16, cache_write_misses, cache_write_misses, "Cache write misses.", 0)
+FIELD(LVSSTATUS, lv, STR, "Health", lvid, 15, lvhealthstatus, lv_health_status, "LV health status.", 0)
FIELD(LABEL, label, STR, "Fmt", type, 3, pvfmt, pv_fmt, "Type of metadata.", 0)
FIELD(LABEL, label, STR, "PV UUID", type, 38, pvuuid, pv_uuid, "Unique identifier.", 0)
@@ -123,7 +127,7 @@ FIELD(PVS, pv, NUM, "Alloc", pe_alloc_count, 5, uint32, pv_pe_alloc_count, "Tota
FIELD(PVS, pv, STR_LIST, "PV Tags", tags, 7, tags, pv_tags, "Tags, if any.", 0)
FIELD(PVS, pv, NUM, "#PMda", id, 5, pvmdas, pv_mda_count, "Number of metadata areas on this device.", 0)
FIELD(PVS, pv, NUM, "#PMdaUse", id, 8, pvmdasused, pv_mda_used_count, "Number of metadata areas in use on this device.", 0)
-FIELD(PVS, pv, NUM, "BA start", ba_start, 8, size64, pv_ba_start, "Offset to the start of PV Bootloader Area on the underlying device in current units.", 0)
+FIELD(PVS, pv, SIZ, "BA start", ba_start, 8, size64, pv_ba_start, "Offset to the start of PV Bootloader Area on the underlying device in current units.", 0)
FIELD(PVS, pv, SIZ, "BA size", ba_size, 7, size64, pv_ba_size, "Size of PV Bootloader Area in current units.", 0)
FIELD(VGS, vg, STR, "Fmt", cmd, 3, vgfmt, vg_fmt, "Type of metadata.", 0)
@@ -137,8 +141,11 @@ FIELD(VGS, vg, BIN, "Partial", cmd, 10, vgpartial, vg_partial, "Set if VG is par
FIELD(VGS, vg, STR, "AllocPol", cmd, 10, vgallocationpolicy, vg_allocation_policy, "VG allocation policy.", 0)
FIELD(VGS, vg, BIN, "Clustered", cmd, 10, vgclustered, vg_clustered, "Set if VG is clustered.", 0)
FIELD(VGS, vg, SIZ, "VSize", cmd, 5, vgsize, vg_size, "Total size of VG in current units.", 0)
-FIELD(VGS, vg, NUM, "VFree", cmd, 5, vgfree, vg_free, "Total amount of free space in current units.", 0)
-FIELD(VGS, vg, STR, "SYS ID", system_id, 6, string, vg_sysid, "System ID indicating when and where it was created.", 0)
+FIELD(VGS, vg, SIZ, "VFree", cmd, 5, vgfree, vg_free, "Total amount of free space in current units.", 0)
+FIELD(VGS, vg, STR, "SYS ID", cmd, 6, vgsystemid, vg_sysid, "System ID of the VG indicating which host owns it.", 0)
+FIELD(VGS, vg, STR, "System ID", cmd, 9, vgsystemid, vg_systemid, "System ID of the VG indicating which host owns it.", 0)
+FIELD(VGS, vg, STR, "Lock Type", cmd, 9, vglocktype, vg_locktype, "Lock type of the VG used by lvmlockd.", 0)
+FIELD(VGS, vg, STR, "Lock Args", cmd, 9, vglockargs, vg_lockargs, "Lock args of the VG used by lvmlockd.", 0)
FIELD(VGS, vg, SIZ, "Ext", extent_size, 3, size32, vg_extent_size, "Size of Physical Extents in current units.", 0)
FIELD(VGS, vg, NUM, "#Ext", extent_count, 4, uint32, vg_extent_count, "Total number of Physical Extents.", 0)
FIELD(VGS, vg, NUM, "Free", free_count, 4, uint32, vg_free_count, "Total number of unallocated Physical Extents.", 0)
@@ -157,7 +164,21 @@ FIELD(VGS, vg, SIZ, "VMdaSize", cmd, 9, vgmdasize, vg_mda_size, "Size of smalles
FIELD(VGS, vg, NUM, "#VMdaCps", cmd, 8, vgmdacopies, vg_mda_copies, "Target number of in use metadata areas in the VG.", 1)
FIELD(SEGS, seg, STR, "Type", list, 4, segtype, segtype, "Type of LV segment.", 0)
-FIELD(SEGS, seg, NUM, "#Str", area_count, 4, uint32, stripes, "Number of stripes or mirror legs.", 0)
+// FIELD(SEGS, seg, NUM, "#Str", area_count, 4, uint32, stripes, "Number of total stripes or mirror/raid1 legs.", 0)
+FIELD(SEGS, seg, NUM, "#Str", list, 5, seg_stripes, stripes, "Number of data stripes or mirror/raid1 legs.", 0)
+FIELD(SEGS, seg, NUM, "#DStr", list, 5, segdata_stripes, datastripes, "Number of data stripes or mirror/raid1 legs.", 0)
+FIELD(SEGS, seg, NUM, "#DStr", list, 5, segdata_stripes, data_stripes, "Number of data stripes or mirror/raid1 legs.", 0)
+FIELD(SEGS, seg, NUM, "SRes", list, 4, segreshape_len, reshape_len, "Number of reshape extents.", 0)
+FIELD(SEGS, seg, NUM, "SRes", list, 4, segreshape_len, reshapelen, "Number of reshape extents.", 0)
+FIELD(SEGS, seg, NUM, "#Cpy", list, 4, segdata_copies, datacopies, "Number of data copies.", 0)
+FIELD(SEGS, seg, NUM, "#Cpy", list, 4, segdata_copies, data_copies, "Number of data copies.", 0)
+FIELD(SEGS, seg, NUM, "data", list, 4, segdata_offset, dataoffset, "Data offset on each image device.", 0)
+FIELD(SEGS, seg, NUM, "data", list, 4, segdata_offset, data_offset, "Data offset on each image device.", 0)
+FIELD(SEGS, seg, NUM, "ndata", list, 4, segnewdata_offset, newdataoffset, "New data offset after any reshape on each image device.", 0)
+FIELD(SEGS, seg, NUM, "ndata", list, 4, segnewdata_offset, new_data_offset, "New data offset after any reshape on each image device.", 0)
+FIELD(SEGS, seg, NUM, "#Par", list, 4, seg_parity_chunks, parity, "Number of (rotating) parity chunks.", 0)
+FIELD(SEGS, seg, NUM, "#Par", list, 4, seg_parity_chunks, parity_chunks, "Number of (rotating) parity chunks.", 0)
+FIELD(SEGS, seg, NUM, "#Par", list, 4, seg_parity_chunks, parity_devs, "Number of (rotating) parity chunks.", 0)
FIELD(SEGS, seg, SIZ, "Stripe", stripe_size, 6, size32, stripesize, "For stripes, amount of data placed on one device before switching to the next.", 0)
FIELD(SEGS, seg, SIZ, "Stripe", stripe_size, 6, size32, stripe_size, "For stripes, amount of data placed on one device before switching to the next.", 0)
FIELD(SEGS, seg, SIZ, "Region", region_size, 6, size32, regionsize, "For mirrors, the unit of data copied when synchronising devices.", 0)
@@ -170,7 +191,7 @@ FIELD(SEGS, seg, STR, "Cachemode", list, 9, cachemode, cachemode, "For cache poo
FIELD(SEGS, seg, BIN, "Zero", list, 4, thinzero, zero, "For thin pools, if zeroing is enabled.", 0)
FIELD(SEGS, seg, NUM, "TransId", list, 4, transactionid, transaction_id, "For thin pools, the transaction id.", 0)
FIELD(SEGS, seg, NUM, "ThId", list, 4, thinid, thin_id, "For thin volume, the thin device id.", 0)
-FIELD(SEGS, seg, NUM, "Start", list, 5, segstart, seg_start, "Offset within the LV to the start of the segment in current units.", 0)
+FIELD(SEGS, seg, SIZ, "Start", list, 5, segstart, seg_start, "Offset within the LV to the start of the segment in current units.", 0)
FIELD(SEGS, seg, NUM, "Start", list, 5, segstartpe, seg_start_pe, "Offset within the LV to the start of the segment in physical extents.", 0)
FIELD(SEGS, seg, SIZ, "SSize", list, 5, segsize, seg_size, "Size of segment in current units.", 0)
FIELD(SEGS, seg, SIZ, "SSize", list, 5, segsizepe, seg_size_pe, "Size of segment in physical extents.", 0)
diff --git a/lib/report/properties.c b/lib/report/properties.c
index 4796000d4..59233d296 100644
--- a/lib/report/properties.c
+++ b/lib/report/properties.c
@@ -16,6 +16,7 @@
#include "properties.h"
#include "activate.h"
#include "metadata.h"
+#include "segtype.h"
#define GET_VG_NUM_PROPERTY_FN(NAME, VALUE) \
@@ -281,6 +282,8 @@ GET_LV_STR_PROPERTY_FN(lv_attr, lv_attr_dup(lv->vg->vgmem, lv))
GET_LV_NUM_PROPERTY_FN(lv_major, lv->major)
#define _lv_major_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(lv_minor, lv->minor)
+#define _lv_when_full_get prop_not_implemented_get
+#define _lv_when_full_set prop_not_implemented_set
#define _lv_minor_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(lv_read_ahead, lv->read_ahead * SECTOR_SIZE)
#define _lv_read_ahead_set prop_not_implemented_set
@@ -290,7 +293,7 @@ GET_LV_NUM_PROPERTY_FN(lv_kernel_minor, lv_kernel_minor(lv))
#define _lv_kernel_minor_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(lv_kernel_read_ahead, lv_kernel_read_ahead(lv) * SECTOR_SIZE)
#define _lv_kernel_read_ahead_set prop_not_implemented_set
-GET_LV_NUM_PROPERTY_FN(lv_size, lv->size * SECTOR_SIZE)
+GET_LV_NUM_PROPERTY_FN(lv_size, (lv->le_count - first_seg(lv)->reshape_len) * lv->vg->extent_size) // lv->size * SECTOR_SIZE)
#define _lv_size_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(seg_count, dm_list_size(&lv->segments))
#define _seg_count_set prop_not_implemented_set
@@ -298,6 +301,10 @@ GET_LV_STR_PROPERTY_FN(origin, lv_origin_dup(lv->vg->vgmem, lv))
#define _origin_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(origin_size, (SECTOR_SIZE * lv_origin_size(lv)))
#define _origin_size_set prop_not_implemented_set
+#define _lv_ancestors_set prop_not_implemented_set
+#define _lv_ancestors_get prop_not_implemented_get
+#define _lv_descendants_set prop_not_implemented_set
+#define _lv_descendants_get prop_not_implemented_get
GET_LV_NUM_PROPERTY_FN(snap_percent, _snap_percent(lv))
#define _snap_percent_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(copy_percent, _copy_percent(lv))
@@ -336,7 +343,7 @@ GET_LV_NUM_PROPERTY_FN(metadata_percent, _metadata_percent(lv))
#define _metadata_percent_set prop_not_implemented_set
GET_LV_NUM_PROPERTY_FN(lv_metadata_size, lv_metadata_size(lv) * SECTOR_SIZE)
#define _lv_metadata_size_set prop_not_implemented_set
-GET_LV_STR_PROPERTY_FN(lv_time, lv_time_dup(lv->vg->vgmem, lv))
+GET_LV_STR_PROPERTY_FN(lv_time, lv_time_dup(lv->vg->vgmem, lv, 0))
#define _lv_time_set prop_not_implemented_set
GET_LV_STR_PROPERTY_FN(lv_host, lv_host_dup(lv->vg->vgmem, lv))
#define _lv_host_set prop_not_implemented_set
@@ -344,6 +351,8 @@ GET_LV_STR_PROPERTY_FN(lv_active, lv_active_dup(lv->vg->vgmem, lv))
#define _lv_active_set prop_not_implemented_set
GET_LV_STR_PROPERTY_FN(lv_profile, lv_profile_dup(lv->vg->vgmem, lv))
#define _lv_profile_set prop_not_implemented_set
+GET_LV_STR_PROPERTY_FN(lv_lockargs, lv_lock_args_dup(lv->vg->vgmem, lv))
+#define _lv_lockargs_set prop_not_implemented_set
/* VG */
GET_VG_STR_PROPERTY_FN(vg_fmt, vg_fmt_dup(vg))
@@ -360,6 +369,12 @@ GET_VG_NUM_PROPERTY_FN(vg_free, (SECTOR_SIZE * vg_free(vg)))
#define _vg_free_set prop_not_implemented_set
GET_VG_STR_PROPERTY_FN(vg_sysid, vg_system_id_dup(vg))
#define _vg_sysid_set prop_not_implemented_set
+GET_VG_STR_PROPERTY_FN(vg_systemid, vg_system_id_dup(vg))
+#define _vg_systemid_set prop_not_implemented_set
+GET_VG_STR_PROPERTY_FN(vg_locktype, vg_lock_type_dup(vg))
+#define _vg_locktype_set prop_not_implemented_set
+GET_VG_STR_PROPERTY_FN(vg_lockargs, vg_lock_args_dup(vg))
+#define _vg_lockargs_set prop_not_implemented_set
GET_VG_NUM_PROPERTY_FN(vg_extent_size, (SECTOR_SIZE * vg->extent_size))
#define _vg_extent_size_set prop_not_implemented_set
GET_VG_NUM_PROPERTY_FN(vg_extent_count, vg->extent_count)
@@ -394,10 +409,36 @@ GET_VG_STR_PROPERTY_FN(vg_profile, vg_profile_dup(vg))
#define _vg_profile_set prop_not_implemented_set
/* LVSEG */
-GET_LVSEG_STR_PROPERTY_FN(segtype, lvseg_segtype_dup(lvseg->lv->vg->vgmem, lvseg))
#define _segtype_set prop_not_implemented_set
+GET_LVSEG_STR_PROPERTY_FN(segtype, lvseg_segtype_dup(lvseg->lv->vg->vgmem, lvseg))
+#define _datacopies_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(datacopies, lvseg->segtype->parity_devs ? lvseg->segtype->parity_devs + 1 : lvseg->data_copies)
+#define _data_copies_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(data_copies, lvseg->segtype->parity_devs ? lvseg->segtype->parity_devs + 1: lvseg->data_copies)
+#define _reshapelen_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(reshapelen, (lvseg->reshape_len *lvseg->area_count / (lvseg->area_count - lvseg->segtype->parity_devs)))
+#define _reshape_len_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(reshape_len, (lvseg->reshape_len *lvseg->area_count / (lvseg->area_count - lvseg->segtype->parity_devs)))
+GET_LVSEG_NUM_PROPERTY_FN(dataoffset, lvseg->data_offset)
+#define _dataoffset_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(data_offset, lvseg->data_offset)
+#define _data_offset_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(newdataoffset, lvseg->data_offset)
+#define _newdataoffset_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(new_data_offset, lvseg->data_offset)
+#define _new_data_offset_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(parity, lvseg->data_offset)
+#define _parity_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(parity_chunks, lvseg->data_offset)
+#define _parity_chunks_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(parity_devs, lvseg->data_offset)
+#define _parity_devs_set prop_not_implemented_set
GET_LVSEG_NUM_PROPERTY_FN(stripes, lvseg->area_count)
#define _stripes_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(datastripes, lvseg->area_count - lvseg->segtype->parity_devs)
+#define _datastripes_set prop_not_implemented_set
+GET_LVSEG_NUM_PROPERTY_FN(data_stripes, lvseg->area_count - lvseg->segtype->parity_devs)
+#define _data_stripes_set prop_not_implemented_set
GET_LVSEG_NUM_PROPERTY_FN(stripesize, (SECTOR_SIZE * lvseg->stripe_size))
#define _stripesize_set prop_not_implemented_set
GET_LVSEG_NUM_PROPERTY_FN(stripe_size, (SECTOR_SIZE * lvseg->stripe_size))
@@ -454,7 +495,7 @@ GET_PVSEG_NUM_PROPERTY_FN(pvseg_size, (SECTOR_SIZE * pvseg->len))
struct lvm_property_type _properties[] = {
#include "columns.h"
- { 0, "", 0, 0, 0, { .integer = 0 }, prop_not_implemented_get, prop_not_implemented_set },
+ { 0, "", 0, 0, 0, 0, { .integer = 0 }, prop_not_implemented_get, prop_not_implemented_set },
};
#undef STR
@@ -463,6 +504,7 @@ struct lvm_property_type _properties[] = {
#undef SIZ
#undef PCT
#undef STR_LIST
+#undef SNUM
#undef FIELD
int lvseg_get_property(const struct lv_segment *lvseg,
@@ -474,7 +516,7 @@ int lvseg_get_property(const struct lv_segment *lvseg,
int lv_get_property(const struct logical_volume *lv,
struct lvm_property_type *prop)
{
- return prop_get_property(_properties, lv, prop, LVS);
+ return prop_get_property(_properties, lv, prop, LVS | LVSINFO | LVSSTATUS | LVSINFOSTATUS);
}
int vg_get_property(const struct volume_group *vg,
@@ -498,7 +540,7 @@ int pv_get_property(const struct physical_volume *pv,
int lv_set_property(struct logical_volume *lv,
struct lvm_property_type *prop)
{
- return prop_set_property(_properties, lv, prop, LVS);
+ return prop_set_property(_properties, lv, prop, LVS | LVSINFO | LVSSTATUS | LVSINFOSTATUS);
}
int vg_set_property(struct volume_group *vg,
diff --git a/lib/report/report.c b/lib/report/report.c
index 818e3fd2b..0b5c02692 100644
--- a/lib/report/report.c
+++ b/lib/report/report.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -26,6 +26,8 @@
#include "str_list.h"
#include <stddef.h> /* offsetof() */
+#include <float.h> /* DBL_MAX */
+#include <time.h>
struct lvm_report_object {
struct volume_group *vg;
@@ -54,6 +56,7 @@ static const char _str_one[] = "1";
static const char _str_no[] = "no";
static const char _str_yes[] = "yes";
static const char _str_unknown[] = "unknown";
+static const double _siz_max = DBL_MAX;
/*
* 32 bit signed is casted to 64 bit unsigned in dm_report_field internally!
@@ -62,6 +65,1060 @@ static const char _str_unknown[] = "unknown";
*/
static const int32_t _reserved_num_undef_32 = INT32_C(-1);
+typedef enum {
+ /* top-level identification */
+ TIME_NULL,
+ TIME_NUM,
+ TIME_STR,
+
+ /* direct numeric value */
+ TIME_NUM__START,
+ TIME_NUM_MULTIPLIER,
+ TIME_NUM_MULTIPLIER_NEGATIVE,
+ TIME_NUM_DAY,
+ TIME_NUM_YEAR,
+ TIME_NUM__END,
+
+ /* direct string value */
+ TIME_STR_TIMEZONE,
+
+ /* time frame strings */
+ TIME_FRAME__START,
+ TIME_FRAME_AGO,
+ TIME_FRAME__END,
+
+ /* labels for dates */
+ TIME_LABEL_DATE__START,
+
+ TIME_LABEL_DATE_TODAY,
+ TIME_LABEL_DATE_YESTERDAY,
+
+ /* weekday name strings */
+ TIME_WEEKDAY__START,
+ TIME_WEEKDAY_SUNDAY,
+ TIME_WEEKDAY_MONDAY,
+ TIME_WEEKDAY_TUESDAY,
+ TIME_WEEKDAY_WEDNESDAY,
+ TIME_WEEKDAY_THURSDAY,
+ TIME_WEEKDAY_FRIDAY,
+ TIME_WEEKDAY_SATURDAY,
+ TIME_WEEKDAY__END,
+
+ TIME_LABEL_DATE__END,
+
+ /* labels for times */
+ TIME_LABEL_TIME__START,
+ TIME_LABEL_TIME_NOON,
+ TIME_LABEL_TIME_MIDNIGHT,
+ TIME_LABEL_TIME__END,
+
+ /* time unit strings */
+ TIME_UNIT__START,
+ TIME_UNIT_SECOND,
+ TIME_UNIT_SECOND_REL,
+ TIME_UNIT_MINUTE,
+ TIME_UNIT_MINUTE_REL,
+ TIME_UNIT_HOUR,
+ TIME_UNIT_HOUR_REL,
+ TIME_UNIT_AM,
+ TIME_UNIT_PM,
+ TIME_UNIT_DAY,
+ TIME_UNIT_WEEK,
+ TIME_UNIT_MONTH,
+ TIME_UNIT_YEAR,
+ TIME_UNIT_TZ_MINUTE,
+ TIME_UNIT_TZ_HOUR,
+ TIME_UNIT__END,
+
+ /* month name strings */
+ TIME_MONTH__START,
+ TIME_MONTH_JANUARY,
+ TIME_MONTH_FEBRUARY,
+ TIME_MONTH_MARCH,
+ TIME_MONTH_APRIL,
+ TIME_MONTH_MAY,
+ TIME_MONTH_JUNE,
+ TIME_MONTH_JULY,
+ TIME_MONTH_AUGUST,
+ TIME_MONTH_SEPTEMBER,
+ TIME_MONTH_OCTOBER,
+ TIME_MONTH_NOVEMBER,
+ TIME_MONTH_DECEMBER,
+ TIME_MONTH__END,
+} time_id_t;
+
+#define TIME_PROP_DATE 0x00000001 /* date-related */
+#define TIME_PROP_TIME 0x00000002 /* time-related */
+#define TIME_PROP_ABS 0x00000004 /* absolute value */
+#define TIME_PROP_REL 0x00000008 /* relative value */
+
+struct time_prop {
+ time_id_t id;
+ uint32_t prop_flags;
+ time_id_t granularity;
+};
+
+#define ADD_TIME_PROP(id, flags, granularity) [id] = {id, flags, granularity},
+
+static const struct time_prop _time_props[] = {
+ ADD_TIME_PROP(TIME_NULL, 0, TIME_NULL)
+ ADD_TIME_PROP(TIME_NUM, 0, TIME_NULL)
+ ADD_TIME_PROP(TIME_STR, 0, TIME_NULL)
+
+ ADD_TIME_PROP(TIME_NUM_MULTIPLIER, 0, TIME_NULL)
+ ADD_TIME_PROP(TIME_NUM_MULTIPLIER_NEGATIVE, 0, TIME_NULL)
+ ADD_TIME_PROP(TIME_NUM_DAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_NUM_YEAR, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_YEAR)
+
+ ADD_TIME_PROP(TIME_STR_TIMEZONE, TIME_PROP_TIME | TIME_PROP_ABS, TIME_NULL)
+
+ ADD_TIME_PROP(TIME_FRAME_AGO, TIME_PROP_DATE | TIME_PROP_TIME | TIME_PROP_REL, TIME_NULL)
+
+ ADD_TIME_PROP(TIME_LABEL_DATE_TODAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_LABEL_DATE_YESTERDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_SUNDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_MONDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_TUESDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_WEDNESDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_THURSDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_FRIDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_WEEKDAY_SATURDAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_DAY)
+
+ ADD_TIME_PROP(TIME_LABEL_TIME_NOON, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_SECOND)
+ ADD_TIME_PROP(TIME_LABEL_TIME_MIDNIGHT, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_SECOND)
+
+ ADD_TIME_PROP(TIME_UNIT_SECOND, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_SECOND)
+ ADD_TIME_PROP(TIME_UNIT_SECOND_REL, TIME_PROP_TIME | TIME_PROP_REL, TIME_UNIT_SECOND)
+ ADD_TIME_PROP(TIME_UNIT_MINUTE, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_MINUTE)
+ ADD_TIME_PROP(TIME_UNIT_MINUTE_REL, TIME_PROP_TIME | TIME_PROP_REL, TIME_UNIT_MINUTE)
+ ADD_TIME_PROP(TIME_UNIT_HOUR, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_HOUR)
+ ADD_TIME_PROP(TIME_UNIT_HOUR_REL, TIME_PROP_TIME | TIME_PROP_REL, TIME_UNIT_HOUR)
+ ADD_TIME_PROP(TIME_UNIT_AM, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_HOUR)
+ ADD_TIME_PROP(TIME_UNIT_PM, TIME_PROP_TIME | TIME_PROP_ABS, TIME_UNIT_HOUR)
+ ADD_TIME_PROP(TIME_UNIT_DAY, TIME_PROP_DATE | TIME_PROP_REL, TIME_UNIT_DAY)
+ ADD_TIME_PROP(TIME_UNIT_WEEK, TIME_PROP_DATE | TIME_PROP_REL, TIME_UNIT_WEEK)
+ ADD_TIME_PROP(TIME_UNIT_MONTH, TIME_PROP_DATE | TIME_PROP_REL, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_UNIT_YEAR, TIME_PROP_DATE | TIME_PROP_REL, TIME_UNIT_YEAR)
+ ADD_TIME_PROP(TIME_UNIT_TZ_MINUTE, TIME_PROP_TIME | TIME_PROP_ABS, TIME_NULL)
+ ADD_TIME_PROP(TIME_UNIT_TZ_HOUR, TIME_PROP_TIME | TIME_PROP_ABS, TIME_NULL)
+
+ ADD_TIME_PROP(TIME_MONTH_JANUARY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_FEBRUARY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_MARCH, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_APRIL, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_MAY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_JUNE, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_JULY, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_AUGUST, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_SEPTEMBER, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_OCTOBER, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_NOVEMBER, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+ ADD_TIME_PROP(TIME_MONTH_DECEMBER, TIME_PROP_DATE | TIME_PROP_ABS, TIME_UNIT_MONTH)
+};
+
+#define TIME_REG_PLURAL_S 0x00000001 /* also recognize plural form with "s" suffix */
+
+struct time_reg {
+ const char *name;
+ const struct time_prop *prop;
+ uint32_t reg_flags;
+};
+
+#define TIME_PROP(id) (_time_props + id)
+
+static const struct time_reg _time_reg[] = {
+ /*
+ * Group of tokens representing time frame and used
+ * with relative date/time to specify different flavours
+ * of relativity.
+ */
+ {"ago", TIME_PROP(TIME_FRAME_AGO), 0},
+
+ /*
+ * Group of tokens labeling some date and used
+ * instead of direct absolute specification.
+ */
+ {"today", TIME_PROP(TIME_LABEL_DATE_TODAY), 0}, /* 0:00 - 23:59:59 for current date */
+ {"yesterday", TIME_PROP(TIME_LABEL_DATE_YESTERDAY), 0}, /* 0:00 - 23:59:59 for current date minus 1 day*/
+
+ /*
+ * Group of tokens labeling some date - weekday
+ * names used to build up date.
+ */
+ {"Sunday", TIME_PROP(TIME_WEEKDAY_SUNDAY), TIME_REG_PLURAL_S},
+ {"Sun", TIME_PROP(TIME_WEEKDAY_SUNDAY), 0},
+ {"Monday", TIME_PROP(TIME_WEEKDAY_MONDAY), TIME_REG_PLURAL_S},
+ {"Mon", TIME_PROP(TIME_WEEKDAY_MONDAY), 0},
+ {"Tuesday", TIME_PROP(TIME_WEEKDAY_TUESDAY), TIME_REG_PLURAL_S},
+ {"Tue", TIME_PROP(TIME_WEEKDAY_TUESDAY), 0},
+ {"Wednesday", TIME_PROP(TIME_WEEKDAY_WEDNESDAY), TIME_REG_PLURAL_S},
+ {"Wed", TIME_PROP(TIME_WEEKDAY_WEDNESDAY), 0},
+ {"Thursday", TIME_PROP(TIME_WEEKDAY_THURSDAY), TIME_REG_PLURAL_S},
+ {"Thu", TIME_PROP(TIME_WEEKDAY_THURSDAY), 0},
+ {"Friday", TIME_PROP(TIME_WEEKDAY_FRIDAY), TIME_REG_PLURAL_S},
+ {"Fri", TIME_PROP(TIME_WEEKDAY_FRIDAY), 0},
+ {"Saturday", TIME_PROP(TIME_WEEKDAY_SATURDAY), TIME_REG_PLURAL_S},
+ {"Sat", TIME_PROP(TIME_WEEKDAY_SATURDAY), 0},
+
+ /*
+ * Group of tokens labeling some time and used
+ * instead of direct absolute specification.
+ */
+ {"noon", TIME_PROP(TIME_LABEL_TIME_NOON), TIME_REG_PLURAL_S}, /* 12:00:00 */
+ {"midnight", TIME_PROP(TIME_LABEL_TIME_MIDNIGHT), TIME_REG_PLURAL_S}, /* 00:00:00 */
+
+ /*
+ * Group of tokens used to build up time. Most of these
+ * are used either as relative or absolute time units.
+ * The absolute ones are always used with TIME_FRAME_*
+ * token, otherwise the unit is relative.
+ */
+ {"second", TIME_PROP(TIME_UNIT_SECOND), TIME_REG_PLURAL_S},
+ {"sec", TIME_PROP(TIME_UNIT_SECOND), TIME_REG_PLURAL_S},
+ {"s", TIME_PROP(TIME_UNIT_SECOND), 0},
+ {"minute", TIME_PROP(TIME_UNIT_MINUTE), TIME_REG_PLURAL_S},
+ {"min", TIME_PROP(TIME_UNIT_MINUTE), TIME_REG_PLURAL_S},
+ {"m", TIME_PROP(TIME_UNIT_MINUTE), 0},
+ {"hour", TIME_PROP(TIME_UNIT_HOUR), TIME_REG_PLURAL_S},
+ {"hr", TIME_PROP(TIME_UNIT_HOUR), TIME_REG_PLURAL_S},
+ {"h", TIME_PROP(TIME_UNIT_HOUR), 0},
+ {"AM", TIME_PROP(TIME_UNIT_AM), 0},
+ {"PM", TIME_PROP(TIME_UNIT_PM), 0},
+
+ /*
+ * Group of tokens used to build up date.
+ * These are all relative ones.
+ */
+ {"day", TIME_PROP(TIME_UNIT_DAY), TIME_REG_PLURAL_S},
+ {"week", TIME_PROP(TIME_UNIT_WEEK), TIME_REG_PLURAL_S},
+ {"month", TIME_PROP(TIME_UNIT_MONTH), TIME_REG_PLURAL_S},
+ {"year", TIME_PROP(TIME_UNIT_YEAR), TIME_REG_PLURAL_S},
+ {"yr", TIME_PROP(TIME_UNIT_YEAR), TIME_REG_PLURAL_S},
+
+ /*
+ * Group of tokes used to build up date.
+ * These are all absolute.
+ */
+ {"January", TIME_PROP(TIME_MONTH_JANUARY), 0},
+ {"Jan", TIME_PROP(TIME_MONTH_JANUARY), 0},
+ {"February", TIME_PROP(TIME_MONTH_FEBRUARY), 0},
+ {"Feb", TIME_PROP(TIME_MONTH_FEBRUARY), 0},
+ {"March", TIME_PROP(TIME_MONTH_MARCH), 0},
+ {"Mar", TIME_PROP(TIME_MONTH_MARCH), 0},
+ {"April", TIME_PROP(TIME_MONTH_APRIL), 0},
+ {"Apr", TIME_PROP(TIME_MONTH_APRIL), 0},
+ {"May", TIME_PROP(TIME_MONTH_MAY), 0},
+ {"June", TIME_PROP(TIME_MONTH_JUNE), 0},
+ {"Jun", TIME_PROP(TIME_MONTH_JUNE), 0},
+ {"July", TIME_PROP(TIME_MONTH_JULY), 0},
+ {"Jul", TIME_PROP(TIME_MONTH_JULY), 0},
+ {"August", TIME_PROP(TIME_MONTH_AUGUST), 0},
+ {"Aug", TIME_PROP(TIME_MONTH_AUGUST), 0},
+ {"September", TIME_PROP(TIME_MONTH_SEPTEMBER), 0},
+ {"Sep", TIME_PROP(TIME_MONTH_SEPTEMBER), 0},
+ {"October", TIME_PROP(TIME_MONTH_OCTOBER), 0},
+ {"Oct", TIME_PROP(TIME_MONTH_OCTOBER), 0},
+ {"November", TIME_PROP(TIME_MONTH_NOVEMBER), 0},
+ {"Nov", TIME_PROP(TIME_MONTH_NOVEMBER), 0},
+ {"December", TIME_PROP(TIME_MONTH_DECEMBER), 0},
+ {"Dec", TIME_PROP(TIME_MONTH_DECEMBER), 0},
+ {NULL, TIME_PROP(TIME_NULL), 0},
+};
+
+struct time_item {
+ struct dm_list list;
+ const struct time_prop *prop;
+ const char *s;
+ size_t len;
+};
+
+struct time_info {
+ struct dm_pool *mem;
+ struct dm_list *ti_list;
+ time_t *now;
+ time_id_t min_abs_date_granularity;
+ time_id_t max_abs_date_granularity;
+ time_id_t min_abs_time_granularity;
+ time_id_t min_rel_time_granularity;
+};
+
+static int _is_time_num(time_id_t id)
+{
+ return ((id > TIME_NUM__START) && (id < TIME_NUM__END));
+};
+
+/*
+static int _is_time_frame(time_id_t id)
+{
+ return ((id > TIME_FRAME__START) && (id < TIME_FRAME__END));
+};
+*/
+
+static int _is_time_label_date(time_id_t id)
+{
+ return ((id > TIME_LABEL_DATE__START) && (id < TIME_LABEL_DATE__END));
+};
+
+static int _is_time_label_time(time_id_t id)
+{
+ return ((id > TIME_LABEL_TIME__START) && (id < TIME_LABEL_TIME__END));
+};
+
+static int _is_time_unit(time_id_t id)
+{
+ return ((id > TIME_UNIT__START) && (id < TIME_UNIT__END));
+};
+
+static int _is_time_weekday(time_id_t id)
+{
+ return ((id > TIME_WEEKDAY__START) && (id < TIME_WEEKDAY__END));
+};
+
+static int _is_time_month(time_id_t id)
+{
+ return ((id > TIME_MONTH__START) && (id < TIME_MONTH__END));
+};
+
+static const char *_skip_space(const char *s)
+{
+ while (*s && isspace(*s))
+ s++;
+ return s;
+}
+
+/* Move till delim or space */
+static const char *_move_till_item_end(const char *s)
+{
+ char c = *s;
+ int is_num = isdigit(c);
+
+ /*
+ * Allow numbers to be attached to next token, for example
+ * it's correct to write "12 hours" as well as "12hours".
+ */
+ while (c && !isspace(c) && (is_num ? (is_num = isdigit(c)) : 1))
+ c = *++s;
+
+ return s;
+}
+
+static struct time_item *_alloc_time_item(struct dm_pool *mem, time_id_t id,
+ const char *s, size_t len)
+{
+ struct time_item *ti;
+
+ if (!(ti = dm_pool_zalloc(mem, sizeof(struct time_item)))) {
+ log_error("alloc_time_item: dm_pool_zalloc failed");
+ return NULL;
+ }
+
+ ti->prop = &_time_props[id];
+ ti->s = s;
+ ti->len = len;
+
+ return ti;
+}
+
+static int _add_time_part_to_list(struct dm_pool *mem, struct dm_list *list,
+ time_id_t id, int minus, const char *s, size_t len)
+{
+ struct time_item *ti1, *ti2;
+
+ if (!(ti1 = _alloc_time_item(mem, minus ? TIME_NUM_MULTIPLIER_NEGATIVE
+ : TIME_NUM_MULTIPLIER, s, len)) ||
+ !(ti2 = _alloc_time_item(mem, id, s + len, 0)))
+ return 0;
+ dm_list_add(list, &ti1->list);
+ dm_list_add(list, &ti2->list);
+
+ return 1;
+}
+
+static int _get_time(struct dm_pool *mem, const char **str,
+ struct dm_list *list, int tz)
+{
+ const char *end, *s = *str;
+ int r = 0;
+
+ /* hour */
+ end = _move_till_item_end(s);
+ if (!_add_time_part_to_list(mem, list, tz ? TIME_UNIT_TZ_HOUR : TIME_UNIT_HOUR,
+ tz == -1, s, end - s))
+ goto out;
+
+ /* minute */
+ if (*end != ':')
+ /* minute required */
+ goto out;
+ s = end + 1;
+ end = _move_till_item_end(s);
+ if (!_add_time_part_to_list(mem, list, tz ? TIME_UNIT_TZ_MINUTE : TIME_UNIT_MINUTE,
+ tz == -1, s, end - s))
+ goto out;
+
+ /* second */
+ if (*end != ':') {
+ /* second not required */
+ s = end + 1;
+ r = 1;
+ goto out;
+ } else if (tz)
+ /* timezone does not have seconds */
+ goto out;
+
+ s = end + 1;
+ end = _move_till_item_end(s);
+ if (!_add_time_part_to_list(mem, list, TIME_UNIT_SECOND, 0, s, end - s))
+ goto out;
+
+ s = end + 1;
+ r = 1;
+out:
+ *str = s;
+ return r;
+}
+
+static int _preparse_fuzzy_time(const char *s, struct time_info *info)
+{
+ struct dm_list *list;
+ struct time_item *ti;
+ const char *end;
+ int fuzzy = 0;
+ time_id_t id;
+ size_t len;
+ int r = 0;
+ char c;
+
+ if (!(list = dm_pool_alloc(info->mem, sizeof(struct dm_list)))) {
+ log_error("_preparse_fuzzy_time: dm_pool_alloc failed");
+ goto out;
+ }
+ dm_list_init(list);
+ s = _skip_space(s);
+
+ while ((c = *s)) {
+ /*
+ * If the string consists of -:+, digits or spaces,
+ * it's not worth looking for fuzzy names here -
+ * it's standard YYYY-MM-DD HH:MM:SS +-HH:MM format
+ * and that is parseable by libdm directly.
+ */
+ if (!(isdigit(c) || (c == '-') || (c == ':') || (c == '+')))
+ fuzzy = 1;
+
+ end = _move_till_item_end(s);
+
+ if (isalpha(c))
+ id = TIME_STR;
+ else if (isdigit(c)) {
+ if (*end == ':') {
+ /* we have time */
+ if (!_get_time(info->mem, &s, list, 0))
+ goto out;
+ continue;
+ }
+ /* we have some other number */
+ id = TIME_NUM;
+ } else if ((c == '-') || (c == '+')) {
+ s++;
+ /* we have timezone */
+ if (!_get_time(info->mem, &s, list, (c == '-') ? -1 : 1))
+ goto out;
+ continue;
+ } else
+ goto out;
+
+ len = end - s;
+ if (!(ti = _alloc_time_item(info->mem, id, s, len)))
+ goto out;
+ dm_list_add(list, &ti->list);
+ s += len;
+ s = _skip_space(s);
+ }
+
+ info->ti_list = list;
+ r = 1;
+out:
+ if (!(r && fuzzy)) {
+ dm_pool_free(info->mem, list);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _match_time_str(struct dm_list *ti_list, struct time_item *ti)
+{
+ struct time_item *ti_context_p = (struct time_item *) dm_list_prev(ti_list, &ti->list);
+ size_t reg_len;
+ int i;
+
+ ti->prop = TIME_PROP(TIME_NULL);
+
+ for (i = 0; _time_reg[i].name; i++) {
+ reg_len = strlen(_time_reg[i].name);
+ if ((ti->len != reg_len) &&
+ !((_time_reg[i].reg_flags & TIME_REG_PLURAL_S) &&
+ (ti->len == reg_len+1) && (ti->s[reg_len] == 's')))
+ continue;
+
+ if (!strncasecmp(ti->s, _time_reg[i].name, reg_len)) {
+ ti->prop = _time_reg[i].prop;
+ if ((ti->prop->id > TIME_UNIT__START) && (ti->prop->id < TIME_UNIT__END) &&
+ ti_context_p && (ti_context_p->prop->id == TIME_NUM))
+ ti_context_p->prop = TIME_PROP(TIME_NUM_MULTIPLIER);
+ break;
+ }
+ }
+
+ return ti->prop->id;
+}
+
+static int _match_time_num(struct dm_list *ti_list, struct time_item *ti)
+{
+ struct time_item *ti_context_p = (struct time_item *) dm_list_prev(ti_list, &ti->list);
+ struct time_item *ti_context_n = (struct time_item *) dm_list_next(ti_list, &ti->list);
+ struct time_item *ti_context_nn = ti_context_n ? (struct time_item *) dm_list_next(ti_list, &ti_context_n->list) : NULL;
+
+ if (ti_context_n &&
+ (ti_context_n->prop->id > TIME_MONTH__START) &&
+ (ti_context_n->prop->id < TIME_MONTH__END)) {
+ if (ti_context_nn && ti_context_nn->prop->id == TIME_NUM) {
+ if (ti->len < ti_context_nn->len) {
+ /* 24 Feb 2015 */
+ ti->prop = TIME_PROP(TIME_NUM_DAY);
+ ti_context_nn->prop = TIME_PROP(TIME_NUM_YEAR);
+ } else {
+ /* 2015 Feb 24 */
+ ti->prop = TIME_PROP(TIME_NUM_YEAR);
+ ti_context_nn->prop = TIME_PROP(TIME_NUM_DAY);
+ }
+ } else {
+ if (ti->len <= 2)
+ /* 24 Feb */
+ ti->prop = TIME_PROP(TIME_NUM_DAY);
+ else
+ /* 2015 Feb */
+ ti->prop = TIME_PROP(TIME_NUM_YEAR);
+ }
+ } else if (ti_context_p &&
+ (ti_context_p->prop->id > TIME_MONTH__START) &&
+ (ti_context_p->prop->id < TIME_MONTH__END)) {
+ if (ti->len <= 2)
+ /* Feb 24 */
+ ti->prop = TIME_PROP(TIME_NUM_DAY);
+ else
+ /* Feb 2015 */
+ ti->prop = TIME_PROP(TIME_NUM_YEAR);
+ } else
+ ti->prop = TIME_PROP(TIME_NUM_YEAR);
+
+ return ti->prop->id;
+}
+
+static void _detect_time_granularity(struct time_info *info, struct time_item *ti)
+{
+ time_id_t gran = ti->prop->granularity;
+ int is_date, is_abs, is_rel;
+
+ if (gran == TIME_NULL)
+ return;
+
+ is_date = ti->prop->prop_flags & TIME_PROP_DATE;
+ is_abs = ti->prop->prop_flags & TIME_PROP_ABS;
+ is_rel = ti->prop->prop_flags & TIME_PROP_REL;
+
+ if (is_date && is_abs) {
+ if (gran > info->max_abs_date_granularity)
+ info->max_abs_date_granularity = gran;
+ if (gran < info->min_abs_date_granularity)
+ info->min_abs_date_granularity = gran;
+ } else {
+ if (is_abs && (gran < info->min_abs_time_granularity))
+ info->min_abs_time_granularity = gran;
+ else if (is_rel && (gran < info->min_rel_time_granularity))
+ info->min_rel_time_granularity = gran;
+ }
+}
+
+static void _change_to_relative(struct time_info *info, struct time_item *ti)
+{
+ struct time_item *ti2;
+
+ ti2 = ti;
+ while ((ti2 = (struct time_item *) dm_list_prev(info->ti_list, &ti2->list))) {
+ if (ti2->prop->id == TIME_FRAME_AGO)
+ break;
+
+ switch (ti2->prop->id) {
+ case TIME_UNIT_SECOND:
+ ti2->prop = TIME_PROP(TIME_UNIT_SECOND_REL);
+ break;
+ case TIME_UNIT_MINUTE:
+ ti2->prop = TIME_PROP(TIME_UNIT_MINUTE_REL);
+ break;
+ case TIME_UNIT_HOUR:
+ ti2->prop = TIME_PROP(TIME_UNIT_HOUR_REL);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int _recognize_time_items(struct time_info *info)
+{
+ struct time_item *ti;
+
+ /*
+ * At first, try to recognize strings.
+ * Also, if there are any items which may be absolute or
+ * relative and we have "TIME_FRAME_AGO", change them to relative.
+ */
+ dm_list_iterate_items(ti, info->ti_list) {
+ if ((ti->prop->id == TIME_STR) && !_match_time_str(info->ti_list, ti)) {
+ log_error("Unrecognized string in date/time "
+ "specification at \"%s\".", ti->s);
+ return 0;
+ }
+ if (ti->prop->id == TIME_FRAME_AGO)
+ _change_to_relative(info, ti);
+ }
+
+ /*
+ * Now, recognize any numbers and be sensitive to the context
+ * given by strings we recognized before. Also, detect time
+ * granularity used (both for absolute and/or relative parts).
+ */
+ dm_list_iterate_items(ti, info->ti_list) {
+ if ((ti->prop->id == TIME_NUM) && !_match_time_num(info->ti_list, ti)) {
+ log_error("Unrecognized number in date/time "
+ "specification at \"%s\".", ti->s);
+ return 0;
+ }
+ _detect_time_granularity(info, ti);
+ }
+
+ return 1;
+}
+
+static int _check_time_items(struct time_info *info)
+{
+ struct time_item *ti;
+ uint32_t flags;
+ int rel;
+ int date_is_relative = -1, time_is_relative = -1;
+ int label_time = 0, label_date = 0;
+
+ dm_list_iterate_items(ti, info->ti_list) {
+ flags = ti->prop->prop_flags;
+ rel = flags & TIME_PROP_REL;
+
+ if (flags & TIME_PROP_DATE) {
+ if (date_is_relative < 0)
+ date_is_relative = rel;
+ else if ((date_is_relative ^ rel) &&
+ (info->max_abs_date_granularity >= info->min_rel_time_granularity)) {
+ log_error("Mixed absolute and relative date "
+ "specification found at \"%s\".", ti->s);
+ return 0;
+ }
+
+ /* Date label can be used only once and not mixed with other date spec. */
+ if (label_date) {
+ log_error("Ambiguous date specification found at \"%s\".", ti->s);
+ return 0;
+ } else if (_is_time_label_date(ti->prop->id))
+ label_date = 1;
+ }
+
+ else if (flags & TIME_PROP_TIME) {
+ if (time_is_relative < 0)
+ time_is_relative = rel;
+ else if ((time_is_relative ^ rel)) {
+ log_error("Mixed absolute and relative time "
+ "specification found at \"%s\".", ti->s);
+ return 0;
+ }
+
+ /* Time label can be used only once and not mixed with other time spec. */
+ if (label_time) {
+ log_error("Ambiguous time specification found at \"%s\".", ti->s);
+ return 0;
+ } else if (_is_time_label_time(ti->prop->id))
+ label_time = 1;
+ }
+ }
+
+ return 1;
+}
+
+#define CACHE_ID_TIME_NOW "time_now"
+
+static time_t *_get_now(struct dm_report *rh, struct dm_pool *mem)
+{
+ const void *cached_obj;
+ time_t *now;
+
+ if (!(cached_obj = dm_report_value_cache_get(rh, CACHE_ID_TIME_NOW))) {
+ if (!(now = dm_pool_zalloc(mem, sizeof(time_t)))) {
+ log_error("_get_now: dm_pool_zalloc failed");
+ return NULL;
+ }
+ time(now);
+ if (!dm_report_value_cache_set(rh, CACHE_ID_TIME_NOW, now)) {
+ log_error("_get_now: failed to cache current time");
+ return NULL;
+ }
+ } else
+ now = (time_t *) cached_obj;
+
+ return now;
+}
+
+static void _adjust_time_for_granularity(struct time_info *info, struct tm *tm, time_t *t)
+{
+ switch (info->min_abs_date_granularity) {
+ case TIME_UNIT_YEAR:
+ tm->tm_mon = 0;
+ /* fall through */
+ case TIME_UNIT_MONTH:
+ tm->tm_mday = 1;
+ break;
+ default:
+ break;
+ }
+
+ switch (info->min_abs_time_granularity) {
+ case TIME_UNIT_HOUR:
+ tm->tm_min = 0;
+ /* fall through */
+ case TIME_UNIT_MINUTE:
+ tm->tm_sec = 0;
+ break;
+ case TIME_UNIT__END:
+ if (info->min_rel_time_granularity == TIME_UNIT__END)
+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+ break;
+ default:
+ break;
+ }
+
+ if ((info->min_abs_time_granularity == TIME_UNIT__END) &&
+ (info->min_rel_time_granularity >= TIME_UNIT_DAY) &&
+ (info->min_rel_time_granularity <= TIME_UNIT_YEAR))
+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+}
+
+#define SECS_PER_MINUTE 60
+#define SECS_PER_HOUR 3600
+#define SECS_PER_DAY 86400
+
+static int _days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+static int _is_leap_year(long year)
+{
+ return (((year % 4==0) && (year % 100 != 0)) || (year % 400 == 0));
+}
+
+static int _get_days_in_month(long month, long year)
+{
+ return (month == 2 && _is_leap_year(year)) ? _days_in_month[month-1] + 1
+ : _days_in_month[month-1];
+}
+
+static void _get_resulting_time_span(struct time_info *info,
+ struct tm *tm, time_t t,
+ time_t *t_result1, time_t *t_result2)
+{
+ time_t t1 = mktime(tm) - t;
+ time_t t2 = t1;
+ struct tm tmp;
+
+ if (info->min_abs_time_granularity != TIME_UNIT__END) {
+ if (info->min_abs_time_granularity == TIME_UNIT_MINUTE)
+ t2 += (SECS_PER_MINUTE - 1);
+ else if (info->min_abs_time_granularity == TIME_UNIT_HOUR)
+ t2 += (SECS_PER_HOUR - 1);
+ } else if (info->min_rel_time_granularity != TIME_UNIT__END) {
+ if (info->min_rel_time_granularity == TIME_UNIT_MINUTE)
+ t1 -= (SECS_PER_MINUTE + 1);
+ else if (info->min_rel_time_granularity == TIME_UNIT_HOUR)
+ t1 -= (SECS_PER_HOUR + 1);
+ else if ((info->min_rel_time_granularity >= TIME_UNIT_DAY) &&
+ (info->min_rel_time_granularity <= TIME_UNIT_YEAR))
+ t2 += (SECS_PER_DAY - 1);
+ } else {
+ if (info->min_abs_date_granularity == TIME_UNIT_MONTH)
+ t2 += (SECS_PER_DAY * _get_days_in_month(tm->tm_mon + 1, tm->tm_year) - 1);
+ else if (info->min_abs_date_granularity != TIME_UNIT__END)
+ t2 += (SECS_PER_DAY - 1);
+ }
+
+ /* Adjust for DST if needed. */
+ localtime_r(&t1, &tmp);
+ if (tmp.tm_isdst)
+ t1 -= SECS_PER_HOUR;
+ localtime_r(&t2, &tmp);
+ if (tmp.tm_isdst)
+ t2 -= SECS_PER_HOUR;
+
+ *t_result1 = t1;
+ *t_result2 = t2;
+}
+
+static int _translate_time_items(struct dm_report *rh, struct time_info *info,
+ const char **data_out)
+{
+ struct time_item *ti, *ti_p = NULL;
+ long multiplier = 1;
+ struct tm tm_now;
+ time_id_t id;
+ char *end;
+ long num;
+ struct tm tm; /* absolute time */
+ time_t t = 0; /* offset into past before absolute time */
+ time_t t1, t2;
+ char buf[32];
+
+ localtime_r(info->now, &tm_now);
+ tm = tm_now;
+ tm.tm_isdst = 0; /* we'll adjust for dst later */
+ tm.tm_wday = tm.tm_yday = -1;
+
+ dm_list_iterate_items(ti, info->ti_list) {
+ id = ti->prop->id;
+
+ if (_is_time_num(id)) {
+ num = strtol(ti->s, &end, 10);
+ switch (id) {
+ case TIME_NUM_MULTIPLIER_NEGATIVE:
+ multiplier = -num;
+ break;
+ case TIME_NUM_MULTIPLIER:
+ multiplier = num;
+ break;
+ case TIME_NUM_DAY:
+ tm.tm_mday = num;
+ break;
+ case TIME_NUM_YEAR:
+ tm.tm_year = num - 1900;
+ break;
+ default:
+ break;
+ }
+ } else if (_is_time_month(id)) {
+ tm.tm_mon = id - TIME_MONTH__START - 1;
+ } else if (_is_time_label_date(id)) {
+ if (_is_time_weekday(id)) {
+ num = id - TIME_WEEKDAY__START - 1;
+ if (tm_now.tm_wday < num)
+ num = 7 - num + tm_now.tm_wday;
+ else
+ num = tm_now.tm_wday - num;
+ t += num * SECS_PER_DAY;
+ } else switch (id) {
+ case TIME_LABEL_DATE_YESTERDAY:
+ t += SECS_PER_DAY;
+ break;
+ case TIME_LABEL_DATE_TODAY:
+ /* Nothing to do here - we started with today. */
+ break;
+ default:
+ break;
+ }
+ } else if (_is_time_label_time(id)) {
+ switch (id) {
+ case TIME_LABEL_TIME_NOON:
+ tm.tm_hour = 12;
+ tm.tm_min = tm.tm_sec = 0;
+ break;
+ case TIME_LABEL_TIME_MIDNIGHT:
+ tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+ break;
+ default:
+ break;
+ }
+ } else if (_is_time_unit(id)) {
+ switch (id) {
+ case TIME_UNIT_SECOND:
+ tm.tm_sec = multiplier;
+ break;
+ case TIME_UNIT_SECOND_REL:
+ t += multiplier;
+ break;
+ case TIME_UNIT_MINUTE:
+ tm.tm_min = multiplier;
+ break;
+ case TIME_UNIT_MINUTE_REL:
+ t += (multiplier * SECS_PER_MINUTE);
+ break;
+ case TIME_UNIT_HOUR:
+ tm.tm_hour = multiplier;
+ break;
+ case TIME_UNIT_HOUR_REL:
+ t += (multiplier * SECS_PER_HOUR);
+ break;
+ case TIME_UNIT_AM:
+ if (ti_p->prop->id == TIME_NUM_MULTIPLIER)
+ tm.tm_hour = multiplier;
+ break;
+ case TIME_UNIT_PM:
+ if (_is_time_unit(ti_p->prop->id))
+ t -= 12 * SECS_PER_HOUR;
+ else if (ti_p->prop->id == TIME_NUM_MULTIPLIER)
+ tm.tm_hour = multiplier + 12;
+ break;
+ case TIME_UNIT_DAY:
+ t += multiplier * SECS_PER_DAY;
+ break;
+ case TIME_UNIT_WEEK:
+ t += multiplier * 7 * SECS_PER_DAY;
+ break;
+ case TIME_UNIT_MONTH:
+ /* if months > 12, convert to years first */
+ num = multiplier / 12;
+ tm.tm_year -= num;
+
+ num = multiplier % 12;
+ if (num > (tm.tm_mon + 1)) {
+ tm.tm_year--;
+ tm.tm_mon = 12 - num + tm.tm_mon;
+ } else
+ tm.tm_mon -= num;
+ break;
+ case TIME_UNIT_YEAR:
+ tm.tm_year -= multiplier;
+ break;
+ default:
+ break;
+ }
+ }
+
+ ti_p = ti;
+ }
+
+ _adjust_time_for_granularity(info, &tm, &t);
+ _get_resulting_time_span(info, &tm, t, &t1, &t2);
+
+ dm_pool_free(info->mem, info->ti_list);
+ info->ti_list = NULL;
+
+ if (dm_snprintf(buf, sizeof(buf), "@%ld:@%ld", t1, t2) == -1) {
+ log_error("_translate_time_items: dm_snprintf failed");
+ return 0;
+ }
+
+ if (!(*data_out = dm_pool_strdup(info->mem, buf))) {
+ log_error("_translate_time_items: dm_pool_strdup failed");
+ return 0;
+ }
+
+ return 1;
+}
+
+static const char *_lv_time_handler_parse_fuzzy_name(struct dm_report *rh,
+ struct dm_pool *mem,
+ const char *data_in)
+{
+ const char *s = data_in;
+ const char *data_out = NULL;
+ struct time_info info = {.mem = mem,
+ .ti_list = NULL,
+ .now = _get_now(rh, mem),
+ .min_abs_date_granularity = TIME_UNIT__END,
+ .max_abs_date_granularity = TIME_UNIT__START,
+ .min_abs_time_granularity = TIME_UNIT__END,
+ .min_rel_time_granularity = TIME_UNIT__END};
+
+ if (!info.now)
+ goto_out;
+
+ /* recognize top-level parts - string/number/time/timezone? */
+ if (!_preparse_fuzzy_time(s, &info))
+ goto out;
+
+ /* recognize each part in more detail, also look at the context around if needed */
+ if (!_recognize_time_items(&info))
+ goto out;
+
+ /* check if the combination of items is allowed or whether it makes sense at all */
+ if (!_check_time_items(&info))
+ goto out;
+
+ /* translate items into final time range */
+ if (!_translate_time_items(rh, &info, &data_out))
+ goto out;
+out:
+ if (info.ti_list)
+ dm_pool_free(info.mem, info.ti_list);
+ return data_out;
+}
+
+static void *_lv_time_handler_get_dynamic_value(struct dm_report *rh,
+ struct dm_pool *mem,
+ const char *data_in)
+{
+ time_t t1, t2;
+ time_t *result;
+
+ if (sscanf(data_in, "@%ld:@%ld", &t1, &t2) != 2) {
+ log_error("Failed to get value for parsed time specification.");
+ return NULL;
+ }
+
+ if (!(result = dm_pool_alloc(mem, 2 * sizeof(time_t)))) {
+ log_error("Failed to allocate space to store time range.");
+ return NULL;
+ }
+
+ result[0] = t1;
+ result[1] = t2;
+
+ return result;
+}
+
+static int lv_time_handler(struct dm_report *rh, struct dm_pool *mem,
+ uint32_t field_num,
+ dm_report_reserved_action_t action,
+ const void *data_in, const void **data_out)
+{
+ *data_out = NULL;
+ if (!data_in)
+ return 1;
+
+ switch (action) {
+ case DM_REPORT_RESERVED_PARSE_FUZZY_NAME:
+ *data_out = _lv_time_handler_parse_fuzzy_name(rh, mem, data_in);
+ break;
+ case DM_REPORT_RESERVED_GET_DYNAMIC_VALUE:
+ if (!(*data_out = _lv_time_handler_get_dynamic_value(rh, mem, data_in)))
+ return 0;
+ break;
+ default:
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Get type reserved value - the value returned is the direct value of that type.
+ */
+#define GET_TYPE_RESERVED_VALUE(id) _reserved_ ## id
+
+/*
+ * Get field reserved value - the value returned is always a pointer (const void *).
+ */
+#define GET_FIELD_RESERVED_VALUE(id) _reserved_ ## id.value
+
+/*
+ * Get first name assigned to the reserved value - this is the one that
+ * should be reported/displayed. All the other names assigned for the reserved
+ * value are synonyms recognized in selection criteria.
+ */
+#define GET_FIRST_RESERVED_NAME(id) _reserved_ ## id ## _names[0]
+
/*
* Reserved values and their assigned names.
* The first name is the one that is also used for reporting.
@@ -76,30 +1133,40 @@ static const int32_t _reserved_num_undef_32 = INT32_C(-1);
* - 'reserved_value_id_y' (for 1)
* - 'reserved_value_id_n' (for 0)
*/
-#define GET_TYPE_RESERVED_VALUE(id) _reserved_ ## id
-#define GET_FIELD_RESERVED_VALUE(id) _reserved_ ## id.value
-#define GET_FIRST_RESERVED_NAME(id) _reserved_ ## id ## _names[0]
-
#define NUM uint64_t
-
-#define TYPE_RESERVED_VALUE(type, id, desc, value, ...) \
+#define NUM_HND dm_report_reserved_handler
+#define HND (dm_report_reserved_handler)
+#define NOFLAG 0
+#define NAMED DM_REPORT_FIELD_RESERVED_VALUE_NAMED
+#define RANGE DM_REPORT_FIELD_RESERVED_VALUE_RANGE
+#define FUZZY DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES
+#define DYNAMIC DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE
+
+#define TYPE_RESERVED_VALUE(type, flags, id, desc, value, ...) \
static const char *_reserved_ ## id ## _names[] = { __VA_ARGS__, NULL}; \
static const type _reserved_ ## id = value;
-#define FIELD_RESERVED_VALUE(field_id, id, desc, value, ...) \
+#define FIELD_RESERVED_VALUE(flags, field_id, id, desc, value, ...) \
static const char *_reserved_ ## id ## _names[] = { __VA_ARGS__ , NULL}; \
static const struct dm_report_field_reserved_value _reserved_ ## id = {field_ ## field_id, value};
#define FIELD_RESERVED_BINARY_VALUE(field_id, id, desc, ...) \
- FIELD_RESERVED_VALUE(field_id, id ## _y, desc, &_one64, __VA_ARGS__, _str_yes) \
- FIELD_RESERVED_VALUE(field_id, id ## _n, desc, &_zero64, __VA_ARGS__, _str_no)
+ FIELD_RESERVED_VALUE(NAMED, field_id, id ## _y, desc, &_one64, __VA_ARGS__, _str_yes) \
+ FIELD_RESERVED_VALUE(NAMED, field_id, id ## _n, desc, &_zero64, __VA_ARGS__, _str_no)
#include "values.h"
#undef NUM
+#undef NUM_HND
+#undef HND
+#undef NOFLAG
+#undef NAMED
+#undef RANGE
#undef TYPE_RESERVED_VALUE
#undef FIELD_RESERVED_VALUE
#undef FIELD_RESERVED_BINARY_VALUE
+#undef FUZZY
+#undef DYNAMIC
/*
* Create array of reserved values to be registered with reporting code via
@@ -109,21 +1176,35 @@ static const int32_t _reserved_num_undef_32 = INT32_C(-1);
*/
#define NUM DM_REPORT_FIELD_TYPE_NUMBER
+#define NUM_HND DM_REPORT_FIELD_TYPE_NUMBER
+#define HND 0
+#define NOFLAG 0
+#define NAMED DM_REPORT_FIELD_RESERVED_VALUE_NAMED
+#define RANGE DM_REPORT_FIELD_RESERVED_VALUE_RANGE
+#define FUZZY DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES
+#define DYNAMIC DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE
-#define TYPE_RESERVED_VALUE(type, id, desc, value, ...) {type, &_reserved_ ## id, _reserved_ ## id ## _names, desc},
+#define TYPE_RESERVED_VALUE(type, flags, id, desc, value, ...) {type | flags, &_reserved_ ## id, _reserved_ ## id ## _names, desc},
-#define FIELD_RESERVED_VALUE(field_id, id, desc, value, ...) {DM_REPORT_FIELD_TYPE_NONE, &_reserved_ ## id, _reserved_ ## id ## _names, desc},
+#define FIELD_RESERVED_VALUE(flags, field_id, id, desc, value, ...) {DM_REPORT_FIELD_TYPE_NONE | flags, &_reserved_ ## id, _reserved_ ## id ## _names, desc},
#define FIELD_RESERVED_BINARY_VALUE(field_id, id, desc, ...) \
- FIELD_RESERVED_VALUE(field_id, id ## _y, desc, &_one64, __VA_ARGS__) \
- FIELD_RESERVED_VALUE(field_id, id ## _n, desc, &_zero64, __VA_ARGS__)
+ FIELD_RESERVED_VALUE(NAMED, field_id, id ## _y, desc, &_one64, __VA_ARGS__) \
+ FIELD_RESERVED_VALUE(NAMED, field_id, id ## _n, desc, &_zero64, __VA_ARGS__)
static const struct dm_report_reserved_value _report_reserved_values[] = {
#include "values.h"
- {0, NULL, NULL}
+ {0, NULL, NULL, NULL}
};
#undef NUM
+#undef NUM_HND
+#undef HND
+#undef NOFLAG
+#undef NAMED
+#undef RANGE
+#undef FUZZY
+#undef DYNAMIC
#undef TYPE_RESERVED_VALUE
#undef FIELD_RESERVED_VALUE
#undef FIELD_RESERVED_BINARY_VALUE
@@ -240,11 +1321,14 @@ struct _str_list_append_baton {
static int _str_list_append(const char *line, void *baton)
{
struct _str_list_append_baton *b = baton;
- const char *dup = dm_pool_strdup(b->mem, line);
- if (!dup)
+ const char *line2 = dm_pool_strdup(b->mem, line);
+
+ if (!line2)
return_0;
- if (!str_list_add(b->mem, b->result, dup))
+
+ if (!str_list_add(b->mem, b->result, line2))
return_0;
+
return 1;
}
@@ -297,7 +1381,6 @@ static int _cache_policy_disp(struct dm_report *rh, struct dm_pool *mem,
const void *data, void *private)
{
const struct lv_segment *seg = (const struct lv_segment *) data;
- const char *cache_policy_name;
if (seg_is_cache(seg))
seg = first_seg(seg->pool_lv);
@@ -305,16 +1388,12 @@ static int _cache_policy_disp(struct dm_report *rh, struct dm_pool *mem,
return _field_set_value(field, GET_FIRST_RESERVED_NAME(cache_policy_undef),
GET_FIELD_RESERVED_VALUE(cache_policy_undef));
- if (seg->policy_name) {
- if (!(cache_policy_name = dm_pool_strdup(mem, seg->policy_name))) {
- log_error("dm_pool_strdup failed");
- return 0;
- }
- return _field_set_value(field, cache_policy_name, NULL);
- } else {
- log_error(INTERNAL_ERROR "unexpected NULL policy name");
- return_0;
+ if (!seg->policy_name) {
+ log_error(INTERNAL_ERROR "Unexpected NULL policy name.");
+ return 0;
}
+
+ return _field_set_value(field, seg->policy_name, NULL);
}
static int _modules_disp(struct dm_report *rh, struct dm_pool *mem,
@@ -347,6 +1426,16 @@ static int _lvprofile_disp(struct dm_report *rh, struct dm_pool *mem,
return _field_set_value(field, "", NULL);
}
+static int _lvlockargs_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+ const char *repstr = lv->lock_args ? lv->lock_args : "";
+
+ return _string_disp(rh, mem, field, &repstr, private);
+}
+
static int _vgfmt_disp(struct dm_report *rh, struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private)
@@ -380,8 +1469,8 @@ static int _lvkmaj_disp(struct dm_report *rh, struct dm_pool *mem __attribute__(
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info && lvdm->info->exists && lvdm->info->major >= 0)
- return dm_report_field_int(rh, field, &lvdm->info->major);
+ if (lvdm->info.exists && lvdm->info.major >= 0)
+ return dm_report_field_int(rh, field, &lvdm->info.major);
return dm_report_field_int32(rh, field, &GET_TYPE_RESERVED_VALUE(num_undef_32));
}
@@ -392,8 +1481,8 @@ static int _lvkmin_disp(struct dm_report *rh, struct dm_pool *mem __attribute__(
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info && lvdm->info->exists && lvdm->info->minor >= 0)
- return dm_report_field_int(rh, field, &lvdm->info->minor);
+ if (lvdm->info.exists && lvdm->info.minor >= 0)
+ return dm_report_field_int(rh, field, &lvdm->info.minor);
return dm_report_field_int32(rh, field, &GET_TYPE_RESERVED_VALUE(num_undef_32));
}
@@ -402,10 +1491,10 @@ static int _lvstatus_disp(struct dm_report *rh __attribute__((unused)), struct d
struct dm_report_field *field,
const void *data, void *private __attribute__((unused)))
{
- const struct logical_volume *lv = (const struct logical_volume *) data;
+ const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
char *repstr;
- if (!(repstr = lv_attr_dup(mem, lv)))
+ if (!(repstr = lv_attr_dup_with_info_and_seg_status(mem, lvdm)))
return_0;
return _field_set_value(field, repstr, NULL);
@@ -611,6 +1700,106 @@ static int _origin_disp(struct dm_report *rh, struct dm_pool *mem,
return _field_set_value(field, "", NULL);
}
+static int _find_ancestors(struct _str_list_append_baton *ancestors,
+ struct logical_volume *lv)
+{
+ struct logical_volume *ancestor_lv = NULL;
+ struct lv_segment *seg;
+
+ if (lv_is_cow(lv)) {
+ ancestor_lv = origin_from_cow(lv);
+ } else if (lv_is_thin_volume(lv)) {
+ seg = first_seg(lv);
+ if (seg->origin)
+ ancestor_lv = seg->origin;
+ else if (seg->external_lv)
+ ancestor_lv = seg->external_lv;
+ }
+
+ if (ancestor_lv) {
+ if (!_str_list_append(ancestor_lv->name, ancestors))
+ return_0;
+ if (!_find_ancestors(ancestors, ancestor_lv))
+ return_0;
+ }
+
+ return 1;
+}
+
+static int _lvancestors_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ struct logical_volume *lv = (struct logical_volume *) data;
+ struct _str_list_append_baton ancestors;
+
+ ancestors.mem = mem;
+ if (!(ancestors.result = str_list_create(mem)))
+ return_0;
+
+ if (!_find_ancestors(&ancestors, lv)) {
+ dm_pool_free(ancestors.mem, ancestors.result);
+ return_0;
+ }
+
+ return _field_set_string_list(rh, field, ancestors.result, private, 0);
+}
+
+static int _find_descendants(struct _str_list_append_baton *descendants,
+ struct logical_volume *lv)
+{
+ struct logical_volume *descendant_lv = NULL;
+ const struct seg_list *sl;
+ struct lv_segment *seg;
+
+ if (lv_is_origin(lv)) {
+ dm_list_iterate_items_gen(seg, &lv->snapshot_segs, origin_list) {
+ if ((descendant_lv = seg->cow)) {
+ if (!_str_list_append(descendant_lv->name, descendants))
+ return_0;
+ if (!_find_descendants(descendants, descendant_lv))
+ return_0;
+ }
+ }
+ } else {
+ dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
+ if (lv_is_thin_volume(sl->seg->lv)) {
+ seg = first_seg(sl->seg->lv);
+ if ((seg->origin == lv) || (seg->external_lv == lv))
+ descendant_lv = sl->seg->lv;
+ }
+
+ if (descendant_lv) {
+ if (!_str_list_append(descendant_lv->name, descendants))
+ return_0;
+ if (!_find_descendants(descendants, descendant_lv))
+ return_0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static int _lvdescendants_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ struct logical_volume *lv = (struct logical_volume *) data;
+ struct _str_list_append_baton descendants;
+
+ descendants.mem = mem;
+ if (!(descendants.result = str_list_create(mem)))
+ return_0;
+
+ if (!_find_descendants(&descendants, lv)) {
+ dm_pool_free(descendants.mem, descendants.result);
+ return_0;
+ }
+
+ return _field_set_string_list(rh, field, descendants.result, private, 0);
+}
+
static int _movepv_disp(struct dm_report *rh, struct dm_pool *mem __attribute__((unused)),
struct dm_report_field *field,
const void *data, void *private __attribute__((unused)))
@@ -643,7 +1832,7 @@ static int _size32_disp(struct dm_report *rh __attribute__((unused)), struct dm_
{
const uint32_t size = *(const uint32_t *) data;
const char *disp, *repstr;
- uint64_t *sortval;
+ double *sortval;
if (!*(disp = display_size_units(private, (uint64_t) size)))
return_0;
@@ -658,7 +1847,7 @@ static int _size32_disp(struct dm_report *rh __attribute__((unused)), struct dm_
return 0;
}
- *sortval = (uint64_t) size;
+ *sortval = (double) size;
return _field_set_value(field, repstr, sortval);
}
@@ -670,7 +1859,7 @@ static int _size64_disp(struct dm_report *rh __attribute__((unused)),
{
const uint64_t size = *(const uint64_t *) data;
const char *disp, *repstr;
- uint64_t *sortval;
+ double *sortval;
if (!*(disp = display_size_units(private, size)))
return_0;
@@ -680,12 +1869,12 @@ static int _size64_disp(struct dm_report *rh __attribute__((unused)),
return 0;
}
- if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t)))) {
+ if (!(sortval = dm_pool_alloc(mem, sizeof(double)))) {
log_error("dm_pool_alloc failed");
return 0;
}
- *sortval = size;
+ *sortval = (double) size;
return _field_set_value(field, repstr, sortval);
}
@@ -713,6 +1902,25 @@ static int _int32_disp(struct dm_report *rh, struct dm_pool *mem __attribute__((
return dm_report_field_int32(rh, field, data);
}
+static int _lvwhenfull_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private __attribute__((unused)))
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+
+ if (lv_is_thin_pool(lv)) {
+ if (lv->status & LV_ERROR_WHEN_FULL)
+ return _field_set_value(field, GET_FIRST_RESERVED_NAME(lv_when_full_error),
+ GET_FIELD_RESERVED_VALUE(lv_when_full_error));
+ else
+ return _field_set_value(field, GET_FIRST_RESERVED_NAME(lv_when_full_queue),
+ GET_FIELD_RESERVED_VALUE(lv_when_full_queue));
+ }
+
+ return _field_set_value(field, GET_FIRST_RESERVED_NAME(lv_when_full_undef),
+ GET_FIELD_RESERVED_VALUE(lv_when_full_undef));
+}
+
static int _lvreadahead_disp(struct dm_report *rh, struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private __attribute__((unused)))
@@ -721,7 +1929,7 @@ static int _lvreadahead_disp(struct dm_report *rh, struct dm_pool *mem,
if (lv->read_ahead == DM_READ_AHEAD_AUTO)
return _field_set_value(field, GET_FIRST_RESERVED_NAME(lv_read_ahead_auto),
- &GET_FIELD_RESERVED_VALUE(lv_read_ahead_auto));
+ GET_FIELD_RESERVED_VALUE(lv_read_ahead_auto));
return _size32_disp(rh, mem, field, &lv->read_ahead, private);
}
@@ -733,10 +1941,10 @@ static int _lvkreadahead_disp(struct dm_report *rh, struct dm_pool *mem,
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (!lvdm->info || !lvdm->info->exists)
+ if (!lvdm->info.exists)
return dm_report_field_int32(rh, field, &GET_TYPE_RESERVED_VALUE(num_undef_32));
- return _size32_disp(rh, mem, field, &lvdm->info->read_ahead, private);
+ return _size32_disp(rh, mem, field, &lvdm->info.read_ahead, private);
}
static int _vgsize_disp(struct dm_report *rh, struct dm_pool *mem,
@@ -749,16 +1957,207 @@ static int _vgsize_disp(struct dm_report *rh, struct dm_pool *mem,
return _size64_disp(rh, mem, field, &size, private);
}
+static int _seg_len(const struct lv_segment *seg)
+{
+ return seg->len - (((seg->area_count - seg->segtype->parity_devs) * seg->reshape_len) /
+ (seg_is_any_raid10(seg) ? seg->data_copies : 1));
+}
+
+static int _lvsize_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct logical_volume *lv = (const struct logical_volume *) data;
+ const struct lv_segment *seg = first_seg(lv);
+ uint64_t size = lv->size;
+
+ if (seg_type(seg, 0) == AREA_LV &&
+ seg->area_count > 1)
+ size = (uint64_t) _seg_len(seg) * lv->vg->extent_size;
+
+ return _size64_disp(rh, mem, field, &size, private);
+}
+
static int _segmonitor_disp(struct dm_report *rh, struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private)
{
+ const struct lv_segment *seg = (const struct lv_segment *)data;
char *str;
- if (!(str = lvseg_monitor_dup(mem, (const struct lv_segment *)data)))
+ if (!(str = lvseg_monitor_dup(mem, seg)))
return_0;
- return _field_set_value(field, str, NULL);
+ if (*str)
+ return _field_set_value(field, str, NULL);
+
+ return _field_set_value(field, GET_FIRST_RESERVED_NAME(seg_monitor_undef),
+ GET_FIELD_RESERVED_VALUE(seg_monitor_undef));
+}
+
+static int _get_seg_used_stripes(const struct lv_segment *seg)
+{
+ uint32_t s;
+ uint32_t stripes = seg->area_count;
+
+ for (s = seg->area_count - 1; stripes && s; s--) {
+ if (seg_type(seg, s) == AREA_LV &&
+ seg_lv(seg, s)->status & LV_RESHAPE_REMOVED)
+ stripes--;
+ else
+ break;
+ }
+
+ return stripes;
+}
+
+static int _seg_stripes_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ uint32_t stripes;
+ const struct lv_segment *seg = (const struct lv_segment *)data;
+
+ if (seg_is_raid01(seg))
+ stripes = first_seg(seg_lv(seg, 0))->area_count;
+ else
+ stripes = _get_seg_used_stripes((const struct lv_segment *) data);
+
+ return dm_report_field_uint32(rh, field, &stripes);
+}
+
+static int _segdata_stripes_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct lv_segment *seg = (const struct lv_segment *) data;
+ uint32_t stripes;
+
+ if (seg_is_raid01(seg))
+ stripes = first_seg(seg_lv(seg, 0))->area_count;
+ else
+ stripes = _get_seg_used_stripes(seg) - seg->segtype->parity_devs;
+
+ return dm_report_field_uint32(rh, field, &stripes);
+}
+
+static int _segreshape_len_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct lv_segment *seg = (const struct lv_segment *) data;
+ uint32_t reshape_len = seg->reshape_len;
+
+ if (seg_is_raid(seg))
+ reshape_len *= seg->area_count;
+
+ if (reshape_len)
+ return dm_report_field_uint32(rh, field, &reshape_len);
+
+ return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_32));
+}
+
+static int _segdata_copies_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct lv_segment *seg = (const struct lv_segment *) data;
+
+ if (seg->data_copies > 1)
+ return dm_report_field_uint32(rh, field, &seg->data_copies);
+
+ return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_32));
+}
+
+static int __segdata_offset_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private, int new_data_offset)
+{
+ const struct lv_segment *seg = (const struct lv_segment *) data;
+ const char *what = "";
+
+ if (lv_is_raid_image(seg->lv) &&
+ !seg->le &&
+ (seg->reshape_len || !new_data_offset)) {
+ struct lv_list *lvl;
+ char *lv_name;
+
+ if ((lv_name = strdup(seg->lv->name))) {
+ char *p = strchr(lv_name, '_');
+
+ if (p) {
+ /* Handle duplicated sub LVs */
+ if (strstr(p, "_dup_"))
+ p = strchr(p + 5, '_');
+
+ if (p) {
+ *p = '\0';
+
+ if ((lvl = find_lv_in_vg(seg->lv->vg, lv_name))) {
+ if (seg_is_reshapable_raid(first_seg(lvl->lv))) {
+ uint64_t data_offset;
+
+ if (lv_raid_offset_and_sectors(lvl->lv, &data_offset, NULL)) {
+ if (new_data_offset && !lv_raid_image_in_sync(seg->lv))
+ data_offset = data_offset ? 0 :
+ seg->reshape_len * seg->lv->vg->extent_size;
+
+ return dm_report_field_uint64(rh, field, &data_offset);
+ }
+
+ what = _str_unknown;
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ return _field_set_value(field, what, &GET_TYPE_RESERVED_VALUE(num_undef_64));
+}
+
+static int _segdata_offset_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ return __segdata_offset_disp(rh, mem, field, data, private, 0);
+}
+
+static int _segnewdata_offset_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ return __segdata_offset_disp(rh, mem, field, data, private, 1);
+}
+
+static int _seg_parity_chunks_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct lv_segment *seg = (const struct lv_segment *) data;
+ uint32_t parity_chunks = seg->segtype->parity_devs ?: seg->data_copies - 1;
+
+ if (parity_chunks) {
+ uint32_t s, resilient_sub_lvs = 0;
+
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg_type(seg, s) == AREA_LV) {
+ struct lv_segment *seg1 = first_seg(seg_lv(seg, s));
+
+ if (seg1->segtype->parity_devs ||
+ seg1->data_copies > 1)
+ resilient_sub_lvs++;
+ }
+ }
+
+ if (resilient_sub_lvs && resilient_sub_lvs == seg->area_count)
+ parity_chunks++;
+
+ return dm_report_field_uint32(rh, field, &parity_chunks);
+ }
+
+ return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_32));
}
static int _segstart_disp(struct dm_report *rh, struct dm_pool *mem,
@@ -800,7 +2199,13 @@ static int _segsizepe_disp(struct dm_report *rh,
{
const struct lv_segment *seg = (const struct lv_segment *) data;
- return dm_report_field_uint32(rh, field, &seg->len);
+ if (seg) {
+ uint32_t len = (seg->area_count > 1) ? _seg_len(seg) : seg->len;
+
+ return dm_report_field_uint32(rh, field, &len);
+ }
+
+ return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_32));
}
static int _chunksize_disp(struct dm_report *rh, struct dm_pool *mem,
@@ -866,7 +2271,7 @@ static int _cachemode_disp(struct dm_report *rh, struct dm_pool *mem,
seg = first_seg(seg->pool_lv);
if (seg_is_cache_pool(seg)) {
- if (!(cachemode_str = get_cache_pool_cachemode_name(seg)))
+ if (!(cachemode_str = get_cache_mode_name(seg)))
return_0;
return dm_report_field_string(rh, field, &cachemode_str);
@@ -925,7 +2330,7 @@ static int _devsize_disp(struct dm_report *rh, struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private)
{
- const struct device *dev = *(const struct device * const *) data;
+ struct device *dev = *(struct device * const *) data;
uint64_t size;
if (!dev || !dev->dev || !dev_get_size(dev, &size))
@@ -944,6 +2349,36 @@ static int _vgfree_disp(struct dm_report *rh, struct dm_pool *mem,
return _size64_disp(rh, mem, field, &freespace, private);
}
+static int _vgsystemid_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct volume_group *vg = (const struct volume_group *) data;
+ const char *repstr = (vg->system_id && *vg->system_id) ? vg->system_id : vg->lvm1_system_id ? : "";
+
+ return _string_disp(rh, mem, field, &repstr, private);
+}
+
+static int _vglocktype_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct volume_group *vg = (const struct volume_group *) data;
+ const char *repstr = vg->lock_type ? vg->lock_type : "";
+
+ return _string_disp(rh, mem, field, &repstr, private);
+}
+
+static int _vglockargs_disp(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field,
+ const void *data, void *private)
+{
+ const struct volume_group *vg = (const struct volume_group *) data;
+ const char *repstr = vg->lock_args ? vg->lock_args : "";
+
+ return _string_disp(rh, mem, field, &repstr, private);
+}
+
static int _uuid_disp(struct dm_report *rh __attribute__((unused)), struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private __attribute__((unused)))
@@ -1021,7 +2456,7 @@ static int _vgmdacopies_disp(struct dm_report *rh, struct dm_pool *mem,
if (count == VGMETADATACOPIES_UNMANAGED)
return _field_set_value(field, GET_FIRST_RESERVED_NAME(vg_mda_copies_unmanaged),
- &GET_FIELD_RESERVED_VALUE(vg_mda_copies_unmanaged));
+ GET_FIELD_RESERVED_VALUE(vg_mda_copies_unmanaged));
return _uint32_disp(rh, mem, field, &count, private);
}
@@ -1318,7 +2753,7 @@ static int _lvtime_disp(struct dm_report *rh, struct dm_pool *mem,
char *repstr;
uint64_t *sortval;
- if (!(repstr = lv_time_dup(mem, lv)) ||
+ if (!(repstr = lv_time_dup(mem, lv, 0)) ||
!(sortval = dm_pool_alloc(mem, sizeof(uint64_t)))) {
log_error("Failed to allocate buffer for time.");
return 0;
@@ -1520,9 +2955,9 @@ static int _lvpermissions_disp(struct dm_report *rh, struct dm_pool *mem,
if (!lv_is_pvmove(lvdm->lv)) {
if (lvdm->lv->status & LVM_WRITE) {
- if (!lvdm->info->exists)
+ if (!lvdm->info.exists)
perms = _str_unknown;
- else if (lvdm->info->read_only)
+ else if (lvdm->info.read_only)
perms = GET_FIRST_RESERVED_NAME(lv_permissions_r_override);
else
perms = GET_FIRST_RESERVED_NAME(lv_permissions_rw);
@@ -1689,8 +3124,8 @@ static int _lvsuspended_disp(struct dm_report *rh, struct dm_pool *mem,
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info->exists)
- return _binary_disp(rh, mem, field, lvdm->info->suspended, GET_FIRST_RESERVED_NAME(lv_suspended_y), private);
+ if (lvdm->info.exists)
+ return _binary_disp(rh, mem, field, lvdm->info.suspended, GET_FIRST_RESERVED_NAME(lv_suspended_y), private);
return _binary_undef_disp(rh, mem, field, private);
}
@@ -1701,8 +3136,8 @@ static int _lvlivetable_disp(struct dm_report *rh, struct dm_pool *mem,
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info->exists)
- return _binary_disp(rh, mem, field, lvdm->info->live_table, GET_FIRST_RESERVED_NAME(lv_live_table_y), private);
+ if (lvdm->info.exists)
+ return _binary_disp(rh, mem, field, lvdm->info.live_table, GET_FIRST_RESERVED_NAME(lv_live_table_y), private);
return _binary_undef_disp(rh, mem, field, private);
}
@@ -1713,8 +3148,8 @@ static int _lvinactivetable_disp(struct dm_report *rh, struct dm_pool *mem,
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info->exists)
- return _binary_disp(rh, mem, field, lvdm->info->inactive_table, GET_FIRST_RESERVED_NAME(lv_inactive_table_y), private);
+ if (lvdm->info.exists)
+ return _binary_disp(rh, mem, field, lvdm->info.inactive_table, GET_FIRST_RESERVED_NAME(lv_inactive_table_y), private);
return _binary_undef_disp(rh, mem, field, private);
}
@@ -1725,8 +3160,8 @@ static int _lvdeviceopen_disp(struct dm_report *rh, struct dm_pool *mem,
{
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
- if (lvdm->info->exists)
- return _binary_disp(rh, mem, field, lvdm->info->open_count, GET_FIRST_RESERVED_NAME(lv_device_open_y), private);
+ if (lvdm->info.exists)
+ return _binary_disp(rh, mem, field, lvdm->info.open_count, GET_FIRST_RESERVED_NAME(lv_device_open_y), private);
return _binary_undef_disp(rh, mem, field, private);
}
@@ -1747,7 +3182,8 @@ static int _lvhealthstatus_disp(struct dm_report *rh, struct dm_pool *mem,
struct dm_report_field *field,
const void *data, void *private)
{
- const struct logical_volume *lv = (const struct logical_volume *) data;
+ const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data;
+ const struct logical_volume *lv = lvdm->lv;
const char *health = "";
uint64_t n;
@@ -1763,6 +3199,18 @@ static int _lvhealthstatus_disp(struct dm_report *rh, struct dm_pool *mem,
health = "mismatches exist";
} else if (lv->status & LV_WRITEMOSTLY)
health = "writemostly";
+ else if (lv->status & LV_RESHAPE_REMOVED)
+ health = "reshape removed";
+ } else if (lv_is_thin_pool(lv) && (lvdm->seg_status.type != SEG_STATUS_NONE)) {
+ if (lvdm->seg_status.type != SEG_STATUS_THIN_POOL)
+ return _field_set_value(field, GET_FIRST_RESERVED_NAME(health_undef),
+ GET_FIELD_RESERVED_VALUE(health_undef));
+ else if (lvdm->seg_status.thin_pool->fail)
+ health = "failed";
+ else if (lvdm->seg_status.thin_pool->out_of_data_space)
+ health = "out_of_data";
+ else if (lvdm->seg_status.thin_pool->read_only)
+ health = "metadata_read_only";
}
return _string_disp(rh, mem, field, &health, private);
@@ -1788,9 +3236,9 @@ static int _cache_ ## cache_status_field_name ## _disp (struct dm_report *rh, \
void *private) \
{ \
const struct lv_with_info_and_seg_status *lvdm = (const struct lv_with_info_and_seg_status *) data; \
- if (lvdm->seg_status->type != SEG_STATUS_CACHE) \
+ if (lvdm->seg_status.type != SEG_STATUS_CACHE) \
return _field_set_value(field, "", &GET_TYPE_RESERVED_VALUE(num_undef_64)); \
- return dm_report_field_uint64(rh, field, (void *) ((char *) lvdm->seg_status->status + offsetof(struct dm_status_cache, cache_status_field_name))); \
+ return dm_report_field_uint64(rh, field, &lvdm->seg_status.cache->cache_status_field_name); \
}
GENERATE_CACHE_STATUS_DISP_FN(total_blocks)
@@ -1813,6 +3261,7 @@ static struct volume_group _dummy_vg = {
.fid = &_dummy_fid,
.name = "",
.system_id = (char *) "",
+ .lvm1_system_id = (char *) "",
.pvs = DM_LIST_HEAD_INIT(_dummy_vg.pvs),
.lvs = DM_LIST_HEAD_INIT(_dummy_vg.lvs),
.tags = DM_LIST_HEAD_INIT(_dummy_vg.tags),
@@ -1827,7 +3276,7 @@ static void *_obj_get_vg(void *obj)
static void *_obj_get_lv(void *obj)
{
- return ((struct lvm_report_object *)obj)->lvdm->lv;
+ return (struct logical_volume *)((struct lvm_report_object *)obj)->lvdm->lv;
}
static void *_obj_get_lv_with_info_and_seg_status(void *obj)
@@ -1865,6 +3314,7 @@ static const struct dm_report_object_type _report_types[] = {
{ LVS, "Logical Volume", "lv_", _obj_get_lv },
{ LVSINFO, "Logical Volume Device Info", "lv_", _obj_get_lv_with_info_and_seg_status },
{ LVSSTATUS, "Logical Volume Device Status", "lv_", _obj_get_lv_with_info_and_seg_status },
+ { LVSINFOSTATUS, "Logical Volume Device Info and Status Combined", "lv_", _obj_get_lv_with_info_and_seg_status },
{ PVS, "Physical Volume", "pv_", _obj_get_pv },
{ LABEL, "Physical Volume Label", "pv_", _obj_get_label },
{ SEGS, "Logical Volume Segment", "seg_", _obj_get_seg },
@@ -1887,7 +3337,9 @@ static const struct dm_report_object_type _devtypes_report_types[] = {
#define BIN DM_REPORT_FIELD_TYPE_NUMBER
#define SIZ DM_REPORT_FIELD_TYPE_SIZE
#define PCT DM_REPORT_FIELD_TYPE_PERCENT
+#define TIM DM_REPORT_FIELD_TYPE_TIME
#define STR_LIST DM_REPORT_FIELD_TYPE_STRING_LIST
+#define SNUM DM_REPORT_FIELD_TYPE_NUMBER
#define FIELD(type, strct, sorttype, head, field, width, func, id, desc, writeable) \
{type, sorttype, offsetof(type_ ## strct, field), width, \
#id, head, &_ ## func ## _disp, desc},
@@ -1916,6 +3368,7 @@ static const struct dm_report_field_type _devtypes_fields[] = {
#undef BIN
#undef SIZ
#undef STR_LIST
+#undef SNUM
#undef FIELD
void *report_init(struct cmd_context *cmd, const char *format, const char *keys,
@@ -1957,25 +3410,37 @@ void *report_init(struct cmd_context *cmd, const char *format, const char *keys,
return rh;
}
+void *report_init_for_selection(struct cmd_context *cmd,
+ report_type_t *report_type,
+ const char *selection_criteria)
+{
+ return dm_report_init_with_selection(report_type, _report_types, _fields,
+ "", DEFAULT_REP_SEPARATOR,
+ DM_REPORT_OUTPUT_FIELD_UNQUOTED,
+ "", selection_criteria,
+ _report_reserved_values,
+ cmd);
+}
+
/*
* Create a row of data for an object
*/
-int report_object(void *handle, struct volume_group *vg,
- struct logical_volume *lv, struct physical_volume *pv,
- struct lv_segment *seg, struct pv_segment *pvseg,
- struct lvinfo *lvinfo, struct lv_seg_status *lv_seg_status,
- struct label *label)
+int report_object(void *handle, int selection_only, const struct volume_group *vg,
+ const struct logical_volume *lv, const struct physical_volume *pv,
+ const struct lv_segment *seg, const struct pv_segment *pvseg,
+ const struct lv_with_info_and_seg_status *lvdm,
+ const struct label *label)
{
+ struct selection_handle *sh = selection_only ? (struct selection_handle *) handle : NULL;
struct device dummy_device = { .dev = 0 };
struct label dummy_label = { .dev = &dummy_device };
- struct lv_with_info_and_seg_status lvdm = { .lv = lv, .info = lvinfo, .seg_status = lv_seg_status};
struct lvm_report_object obj = {
- .vg = vg,
- .lvdm = &lvdm,
- .pv = pv,
- .seg = seg,
- .pvseg = pvseg,
- .label = label ? : (pv ? pv_label(pv) : NULL)
+ .vg = (struct volume_group *) vg,
+ .lvdm = (struct lv_with_info_and_seg_status *) lvdm,
+ .pv = (struct physical_volume *) pv,
+ .seg = (struct lv_segment *) seg,
+ .pvseg = (struct pv_segment *) pvseg,
+ .label = (struct label *) (label ? : (pv ? pv_label(pv) : NULL))
};
/* FIXME workaround for pv_label going through cache; remove once struct
@@ -2000,7 +3465,8 @@ int report_object(void *handle, struct volume_group *vg,
if (!obj.vg && pv)
_dummy_fid.fmt = pv->fmt;
- return dm_report_object(handle, &obj);
+ return sh ? dm_report_object_is_selected(sh->selection_rh, &obj, 0, &sh->selected)
+ : dm_report_object(handle, &obj);
}
static int _report_devtype_single(void *handle, const dev_known_type_t *devtype)
diff --git a/lib/report/report.h b/lib/report/report.h
index 20fb6bd87..84c5f91d1 100644
--- a/lib/report/report.h
+++ b/lib/report/report.h
@@ -24,15 +24,41 @@ typedef enum {
LVS = 1,
LVSINFO = 2,
LVSSTATUS = 4,
- PVS = 8,
- VGS = 16,
- SEGS = 32,
- SEGSSTATUS = 64,
- PVSEGS = 128,
- LABEL = 256,
- DEVTYPES = 512
+ LVSINFOSTATUS = 8,
+ PVS = 16,
+ VGS = 32,
+ SEGS = 64,
+ SEGSSTATUS = 128,
+ PVSEGS = 256,
+ LABEL = 512,
+ DEVTYPES = 1024
} report_type_t;
+/*
+ * The "struct selection_handle" is used only for selection
+ * of items that should be processed further (not for display!).
+ *
+ * It consists of selection reporting handle "selection_rh"
+ * used for the selection itself (not for display on output!).
+ * The items are reported directly in memory to a buffer and
+ * then compared against selection criteria. Once we know the
+ * result of the selection, the buffer is dropped!
+ *
+ * The "orig_report_type" is the original requested report type.
+ * The "report_type" is the reporting type actually used which
+ * also counts with report types of the fields used in selection
+ * criteria.
+ *
+ * The "selected" variable is used for propagating the result
+ * of the selection.
+ */
+struct selection_handle {
+ struct dm_report *selection_rh;
+ report_type_t orig_report_type;
+ report_type_t report_type;
+ int selected;
+};
+
struct field;
struct report_handle;
@@ -43,12 +69,19 @@ void *report_init(struct cmd_context *cmd, const char *format, const char *keys,
report_type_t *report_type, const char *separator,
int aligned, int buffered, int headings, int field_prefixes,
int quoted, int columns_as_rows, const char *selection);
+void *report_init_for_selection(struct cmd_context *cmd, report_type_t *report_type,
+ const char *selection);
+int report_for_selection(struct cmd_context *cmd,
+ struct selection_handle *sh,
+ struct physical_volume *pv,
+ struct volume_group *vg,
+ struct logical_volume *lv);
void report_free(void *handle);
-int report_object(void *handle, struct volume_group *vg,
- struct logical_volume *lv, struct physical_volume *pv,
- struct lv_segment *seg, struct pv_segment *pvseg,
- struct lvinfo *lvinfo, struct lv_seg_status *lv_seg_status,
- struct label *label);
+int report_object(void *handle, int selection_only, const struct volume_group *vg,
+ const struct logical_volume *lv, const struct physical_volume *pv,
+ const struct lv_segment *seg, const struct pv_segment *pvseg,
+ const struct lv_with_info_and_seg_status *lvdm,
+ const struct label *label);
int report_devtypes(void *handle);
int report_output(void *handle);
diff --git a/lib/report/values.h b/lib/report/values.h
index 4ea92bdd8..728760c5b 100644
--- a/lib/report/values.h
+++ b/lib/report/values.h
@@ -38,15 +38,15 @@
*/
/*
- * TYPE_RESERVED_VALUE(type, reserved_value_id, description, value, reserved_name, ...)
- * FIELD_RESERVED_VALUE(field_id, reserved_value_id, description, value, reserved_name, ...)
+ * TYPE_RESERVED_VALUE(type, flags, reserved_value_id, description, value, reserved_name, ...)
+ * FIELD_RESERVED_VALUE(field_id, flags, reserved_value_id, description, value, reserved_name, ...)
* FIELD_BINARY_RESERVED_VALUE(field_id, reserved_value_id, description, reserved_name for 1, ...)
*/
/* *INDENT-OFF* */
/* Per-type reserved values usable for all fields of certain type. */
-TYPE_RESERVED_VALUE(NUM, num_undef_64, "Reserved value for undefined numeric value.", UINT64_C(-1), "-1", "unknown", "undefined", "undef")
+TYPE_RESERVED_VALUE(NUM, NOFLAG, num_undef_64, "Reserved value for undefined numeric value.", UINT64_C(-1), "-1", "unknown", "undefined", "undef")
/* Reserved values for PV fields */
FIELD_RESERVED_BINARY_VALUE(pv_allocatable, pv_allocatable, "", "allocatable")
@@ -58,9 +58,9 @@ FIELD_RESERVED_BINARY_VALUE(vg_extendable, vg_extendable, "", "extendable")
FIELD_RESERVED_BINARY_VALUE(vg_exported, vg_exported, "", "exported")
FIELD_RESERVED_BINARY_VALUE(vg_partial, vg_partial, "", "partial")
FIELD_RESERVED_BINARY_VALUE(vg_clustered, vg_clustered, "", "clustered")
-FIELD_RESERVED_VALUE(vg_permissions, vg_permissions_rw, "", "writeable", "writeable", "rw", "read-write")
-FIELD_RESERVED_VALUE(vg_permissions, vg_permissions_r, "", "read-only", "read-only", "r", "ro")
-FIELD_RESERVED_VALUE(vg_mda_copies, vg_mda_copies_unmanaged, "", &GET_TYPE_RESERVED_VALUE(num_undef_64), "unmanaged")
+FIELD_RESERVED_VALUE(NAMED, vg_permissions, vg_permissions_rw, "", "writeable", "writeable", "rw", "read-write")
+FIELD_RESERVED_VALUE(NAMED, vg_permissions, vg_permissions_r, "", "read-only", "read-only", "r", "ro")
+FIELD_RESERVED_VALUE(NOFLAG, vg_mda_copies, vg_mda_copies_unmanaged, "", &GET_TYPE_RESERVED_VALUE(num_undef_64), "unmanaged")
/* Reserved values for LV fields */
FIELD_RESERVED_BINARY_VALUE(lv_initial_image_sync, lv_initial_image_sync, "", "initial image sync", "sync")
@@ -80,13 +80,19 @@ FIELD_RESERVED_BINARY_VALUE(lv_inactive_table, lv_inactive_table, "", "inactive
FIELD_RESERVED_BINARY_VALUE(lv_device_open, lv_device_open, "", "open")
FIELD_RESERVED_BINARY_VALUE(lv_skip_activation, lv_skip_activation, "", "skip activation", "skip")
FIELD_RESERVED_BINARY_VALUE(zero, zero, "", "zero")
-FIELD_RESERVED_VALUE(lv_permissions, lv_permissions_rw, "", "writeable", "writeable", "rw", "read-write")
-FIELD_RESERVED_VALUE(lv_permissions, lv_permissions_r, "", "read-only", "read-only", "r", "ro")
-FIELD_RESERVED_VALUE(lv_permissions, lv_permissions_r_override, "", "read-only-override", "read-only-override", "ro-override", "r-override", "R")
-FIELD_RESERVED_VALUE(lv_read_ahead, lv_read_ahead_auto, "", &GET_TYPE_RESERVED_VALUE(num_undef_64), "auto")
+FIELD_RESERVED_VALUE(NAMED, lv_permissions, lv_permissions_rw, "", "writeable", "writeable", "rw", "read-write")
+FIELD_RESERVED_VALUE(NAMED, lv_permissions, lv_permissions_r, "", "read-only", "read-only", "r", "ro")
+FIELD_RESERVED_VALUE(NAMED, lv_permissions, lv_permissions_r_override, "", "read-only-override", "read-only-override", "ro-override", "r-override", "R")
+FIELD_RESERVED_VALUE(NOFLAG, lv_read_ahead, lv_read_ahead_auto, "", &_siz_max, "auto")
+FIELD_RESERVED_VALUE(NAMED, lv_when_full, lv_when_full_error, "", "error", "error", "error when full", "error if no space")
+FIELD_RESERVED_VALUE(NAMED, lv_when_full, lv_when_full_queue, "", "queue", "queue", "queue when full", "queue if no space")
+FIELD_RESERVED_VALUE(NOFLAG, lv_when_full, lv_when_full_undef, "", "", "", "undefined")
+FIELD_RESERVED_VALUE(NAMED | RANGE | FUZZY | DYNAMIC, lv_time, lv_time_fuzzy, "", lv_time_handler, NULL)
/* Reserved values for SEG fields */
-FIELD_RESERVED_VALUE(cache_policy, cache_policy_undef, "", "", "", "undefined")
+FIELD_RESERVED_VALUE(NOFLAG, cache_policy, cache_policy_undef, "", "", "", "undefined")
+FIELD_RESERVED_VALUE(NOFLAG, seg_monitor, seg_monitor_undef, "", "", "", "undefined")
+FIELD_RESERVED_VALUE(NOFLAG, lv_health_status, health_undef, "", "", "", "undefined")
/* TODO the following 2 need STR_LIST support for reserved values
FIELD_RESERVED_VALUE(cache_settings, cache_settings_default, "", "default", "default")
FIELD_RESERVED_VALUE(cache_settings, cache_settings_undef, "", "undefined", "undefined") */
diff --git a/lib/thin/thin.c b/lib/thin/thin.c
index f27f24007..c5f04f37f 100644
--- a/lib/thin/thin.c
+++ b/lib/thin/thin.c
@@ -259,6 +259,7 @@ static int _thin_pool_add_target_line(struct dev_manager *dm,
uint32_t *pvmove_mirror_count __attribute__((unused)))
{
static int _no_discards = 0;
+ static int _no_error_if_no_space = 0;
char *metadata_dlid, *pool_dlid;
const struct lv_thin_message *lmsg;
const struct logical_volume *origin;
@@ -293,7 +294,8 @@ static int _thin_pool_add_target_line(struct dev_manager *dm,
return 0;
}
- if (!dm_tree_node_add_thin_pool_target(node, len, seg->transaction_id,
+ if (!dm_tree_node_add_thin_pool_target(node, len,
+ seg->transaction_id,
metadata_dlid, pool_dlid,
seg->chunk_size, seg->low_water_mark,
seg->zero_new_blocks ? 0 : 1))
@@ -314,6 +316,12 @@ static int _thin_pool_add_target_line(struct dev_manager *dm,
log_warn_suppress(_no_discards++, "WARNING: Thin pool target does "
"not support discards (needs kernel >= 3.4).");
+ if (attr & THIN_FEATURE_ERROR_IF_NO_SPACE)
+ dm_tree_node_set_thin_pool_error_if_no_space(node, lv_is_error_when_full(seg->lv));
+ else if (lv_is_error_when_full(seg->lv))
+ log_warn_suppress(_no_error_if_no_space++, "WARNING: Thin pool target does "
+ "not support error if no space (needs version >= 1.10).");
+
/*
* Add messages only for activation tree.
* Otherwise avoid checking for existence of suspended origin.
@@ -337,7 +345,7 @@ static int _thin_pool_add_target_line(struct dev_manager *dm,
*/
if (!lv_thin_pool_transaction_id(seg->lv, &transaction_id))
return_0; /* Thin pool should exist and work */
- if (transaction_id != seg->transaction_id) {
+ if ((transaction_id + 1) != seg->transaction_id) {
log_error("Can't create snapshot %s as origin %s is not suspended.",
lmsg->u.lv->name, origin->name);
return 0;
@@ -602,14 +610,18 @@ static int _thin_target_percent(void **target_state __attribute__((unused)),
uint64_t *total_denominator)
{
struct dm_status_thin *s;
+ uint64_t csize;
/* Status for thin device is in sectors */
if (!dm_get_status_thin(mem, params, &s))
return_0;
if (seg) {
- *percent = dm_make_percent(s->mapped_sectors, seg->lv->size);
- *total_denominator += seg->lv->size;
+ /* Pool allocates whole chunk so round-up to nearest one */
+ csize = first_seg(seg->pool_lv)->chunk_size;
+ csize = ((seg->lv->size + csize - 1) / csize) * csize;
+ *percent = dm_make_percent(s->mapped_sectors, csize);
+ *total_denominator += csize;
} else {
/* No lv_segment info here */
*percent = DM_PERCENT_INVALID;
@@ -638,7 +650,8 @@ static int _thin_target_present(struct cmd_context *cmd,
{ 1, 4, THIN_FEATURE_BLOCK_SIZE, "block_size" },
{ 1, 5, THIN_FEATURE_DISCARDS_NON_POWER_2, "discards_non_power_2" },
{ 1, 10, THIN_FEATURE_METADATA_RESIZE, "metadata_resize" },
- { 9, 11, THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND, "external_origin_extend" },
+ { 1, 10, THIN_FEATURE_ERROR_IF_NO_SPACE, "error_if_no_space" },
+ { 1, 13, THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND, "external_origin_extend" },
};
static const char _lvmconf[] = "global/thin_disabled_features";
@@ -674,7 +687,7 @@ static int _thin_target_present(struct cmd_context *cmd,
if (attributes) {
if (!_feature_mask) {
/* Support runtime lvm.conf changes, N.B. avoid 32 feature */
- if ((cn = find_config_tree_node(cmd, global_thin_disabled_features_CFG, NULL))) {
+ if ((cn = find_config_tree_array(cmd, global_thin_disabled_features_CFG, NULL))) {
for (cv = cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
log_error("Ignoring invalid string in config file %s.",
@@ -753,7 +766,8 @@ int init_multiple_segtypes(struct cmd_context *cmd, struct segtype_library *segl
const char name[16];
uint32_t flags;
} reg_segtypes[] = {
- { &_thin_pool_ops, "thin-pool", SEG_THIN_POOL | SEG_CANNOT_BE_ZEROED | SEG_ONLY_EXCLUSIVE },
+ { &_thin_pool_ops, "thin-pool", SEG_THIN_POOL | SEG_CANNOT_BE_ZEROED |
+ SEG_ONLY_EXCLUSIVE | SEG_CAN_ERROR_WHEN_FULL },
/* FIXME Maybe use SEG_THIN_VOLUME instead of SEG_VIRTUAL */
{ &_thin_ops, "thin", SEG_THIN_VOLUME | SEG_VIRTUAL | SEG_ONLY_EXCLUSIVE }
};
diff --git a/lib/uuid/uuid.h b/lib/uuid/uuid.h
index d39ad8824..d87de7374 100644
--- a/lib/uuid/uuid.h
+++ b/lib/uuid/uuid.h
@@ -18,7 +18,6 @@
#define ID_LEN 32
-#include <inttypes.h>
#include <sys/types.h>
struct dm_pool;
diff --git a/libdaemon/Makefile.in b/libdaemon/Makefile.in
index dbe651680..0a085db9f 100644
--- a/libdaemon/Makefile.in
+++ b/libdaemon/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -19,7 +19,10 @@ top_builddir = @top_builddir@
SUBDIRS += client
-ifeq ("@BUILD_LVMETAD@", "yes")
+ifeq (@BUILD_LVMETAD@,yes)
+ SUBDIRS += server
+server: client
+else ifeq (@BUILD_LVMPOLLD@,yes)
SUBDIRS += server
server: client
endif
diff --git a/libdaemon/client/config-util.c b/libdaemon/client/config-util.c
index e5f4e2205..023257e2f 100644
--- a/libdaemon/client/config-util.c
+++ b/libdaemon/client/config-util.c
@@ -12,13 +12,15 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-io.h"
#include "dm-logging.h"
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
+#include <math.h> /* fabs() */
+#include <float.h> /* DBL_EPSILON */
int buffer_append_vf(struct buffer *buf, va_list ap)
{
@@ -36,7 +38,7 @@ int buffer_append_vf(struct buffer *buf, va_list ap)
goto fail;
}
keylen = strchr(next, '=') - next;
- if (strstr(next, "%d") || strstr(next, "%" PRId64)) {
+ if (strstr(next, "%d") || strstr(next, FMTd64)) {
value = va_arg(ap, int64_t);
if (dm_asprintf(&append, "%.*s= %" PRId64 "\n", keylen, next, value) < 0)
goto fail;
@@ -132,9 +134,9 @@ int set_flag(struct dm_config_tree *cft, struct dm_config_node *parent,
return 1;
}
-static void chain_node(struct dm_config_node *cn,
- struct dm_config_node *parent,
- struct dm_config_node *pre_sib)
+void chain_node(struct dm_config_node *cn,
+ struct dm_config_node *parent,
+ struct dm_config_node *pre_sib)
{
cn->parent = parent;
cn->sib = NULL;
@@ -206,6 +208,10 @@ struct dm_config_node *make_int_node(struct dm_config_tree *cft,
return cn;
}
+/*
+ * FIXME: return 1 even if VA list is empty and return the
+ * dm_config_node* result as output parameter
+ */
struct dm_config_node *config_make_nodes_v(struct dm_config_tree *cft,
struct dm_config_node *parent,
struct dm_config_node *pre_sib,
@@ -234,7 +240,7 @@ struct dm_config_node *config_make_nodes_v(struct dm_config_tree *cft,
key[fmt - next] = '\0';
fmt += 2;
- if (!strcmp(fmt, "%d") || !strcmp(fmt, "%" PRId64)) {
+ if (!strcmp(fmt, "%d") || !strcmp(fmt, FMTd64)) {
int64_t value = va_arg(ap, int64_t);
if (!(cn = make_int_node(cft, key, value, parent, pre_sib)))
return 0;
@@ -277,6 +283,60 @@ struct dm_config_node *config_make_nodes(struct dm_config_tree *cft,
return res;
}
+/* Test if the doubles are close enough to be considered equal */
+static int close_enough(double d1, double d2)
+{
+ return fabs(d1 - d2) < DBL_EPSILON;
+}
+
+static int compare_value(struct dm_config_value *a, struct dm_config_value *b)
+{
+ int r = 0;
+
+ if (a->type > b->type)
+ return 1;
+ if (a->type < b->type)
+ return -1;
+
+ switch (a->type) {
+ case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break;
+ case DM_CFG_FLOAT: r = close_enough(a->v.f, b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break;
+ case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break;
+ case DM_CFG_EMPTY_ARRAY: return 0;
+ }
+
+ if (r == 0 && a->next && b->next)
+ r = compare_value(a->next, b->next);
+ return r;
+}
+
+int compare_config(struct dm_config_node *a, struct dm_config_node *b)
+{
+ int result = 0;
+ if (a->v && b->v)
+ result = compare_value(a->v, b->v);
+ if (a->v && !b->v)
+ result = 1;
+ if (!a->v && b->v)
+ result = -1;
+ if (a->child && b->child)
+ result = compare_config(a->child, b->child);
+
+ if (result) {
+ // DEBUGLOG("config inequality at %s / %s", a->key, b->key);
+ return result;
+ }
+
+ if (a->sib && b->sib)
+ result = compare_config(a->sib, b->sib);
+ if (a->sib && !b->sib)
+ result = 1;
+ if (!a->sib && b->sib)
+ result = -1;
+
+ return result;
+}
+
int buffer_realloc(struct buffer *buf, int needed)
{
char *new;
diff --git a/libdaemon/client/config-util.h b/libdaemon/client/config-util.h
index 47fab6bcb..0e7de7ead 100644
--- a/libdaemon/client/config-util.h
+++ b/libdaemon/client/config-util.h
@@ -15,10 +15,6 @@
#ifndef _LVM_DAEMON_CONFIG_UTIL_H
#define _LVM_DAEMON_CONFIG_UTIL_H
-#include "libdevmapper.h"
-
-#include <stdarg.h>
-
struct buffer {
int allocated;
int used;
@@ -37,11 +33,17 @@ int buffer_line(const char *line, void *baton);
int set_flag(struct dm_config_tree *cft, struct dm_config_node *parent,
const char *field, const char *flag, int want);
+void chain_node(struct dm_config_node *cn,
+ struct dm_config_node *parent,
+ struct dm_config_node *pre_sib);
+
struct dm_config_node *make_config_node(struct dm_config_tree *cft,
const char *key,
struct dm_config_node *parent,
struct dm_config_node *pre_sib);
+int compare_config(struct dm_config_node *a, struct dm_config_node *b);
+
struct dm_config_node *make_text_node(struct dm_config_tree *cft,
const char *key,
const char *value,
diff --git a/libdaemon/client/daemon-client.c b/libdaemon/client/daemon-client.c
index d37b96658..39a640108 100644
--- a/libdaemon/client/daemon-client.c
+++ b/libdaemon/client/daemon-client.c
@@ -12,17 +12,16 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-io.h"
#include "daemon-client.h"
#include "dm-logging.h"
#include <sys/un.h>
#include <sys/socket.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <assert.h>
-#include <errno.h> // ENOMEM
daemon_handle daemon_open(daemon_info i)
{
@@ -100,7 +99,13 @@ daemon_reply daemon_send(daemon_handle h, daemon_request rq)
{
struct buffer buffer;
daemon_reply reply = { 0 };
- assert(h.socket_fd >= 0);
+
+ if (h.socket_fd < 0) {
+ log_error(INTERNAL_ERROR "Daemon send: socket fd cannot be negative %d", h.socket_fd);
+ reply.error = EINVAL;
+ return reply;
+ }
+
buffer = rq.buffer;
if (!buffer.mem)
@@ -109,7 +114,12 @@ daemon_reply daemon_send(daemon_handle h, daemon_request rq)
return reply;
}
- assert(buffer.mem);
+ if (!buffer.mem) {
+ log_error(INTERNAL_ERROR "Daemon send: no memory available");
+ reply.error = ENOMEM;
+ return reply;
+ }
+
if (!buffer_write(h.socket_fd, &buffer))
reply.error = errno;
diff --git a/libdaemon/client/daemon-client.h b/libdaemon/client/daemon-client.h
index e1445c11b..7f8d2d4d7 100644
--- a/libdaemon/client/daemon-client.h
+++ b/libdaemon/client/daemon-client.h
@@ -15,7 +15,6 @@
#ifndef _LVM_DAEMON_CLIENT_H
#define _LVM_DAEMON_CLIENT_H
-#include "libdevmapper.h"
#include "config-util.h"
typedef struct {
diff --git a/libdaemon/client/daemon-io.c b/libdaemon/client/daemon-io.c
index 6d81ed86e..a9da68f42 100644
--- a/libdaemon/client/daemon-io.c
+++ b/libdaemon/client/daemon-io.c
@@ -12,13 +12,13 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-io.h"
-#include "libdevmapper.h"
#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
/*
* Read a single message from a (socket) filedescriptor. Messages are delimited
@@ -49,9 +49,15 @@ int buffer_read(int fd, struct buffer *buffer) {
} else if (result == 0) {
errno = ECONNRESET;
return 0; /* we should never encounter EOF here */
- } else if (result < 0 && errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR)
+ } else if (result < 0 && ( errno == EAGAIN || errno == EWOULDBLOCK ||
+ errno == EINTR || errno == EIO)) {
+ fd_set in;
+ FD_ZERO(&in);
+ FD_SET(fd, &in);
+ /* ignore the result, this is just a glorified sleep */
+ select(FD_SETSIZE, &in, NULL, NULL, NULL);
+ } else if (result < 0)
return 0;
- /* TODO call select here if we encountered EAGAIN/EWOULDBLOCK/EINTR */
}
return 1;
@@ -60,8 +66,6 @@ int buffer_read(int fd, struct buffer *buffer) {
/*
* Write a buffer to a filedescriptor. Keep trying. Blocks (even on
* SOCK_NONBLOCK) until all of the write went through.
- *
- * TODO use select on EWOULDBLOCK/EAGAIN/EINTR to avoid useless spinning
*/
int buffer_write(int fd, const struct buffer *buffer) {
static const struct buffer _terminate = { .mem = (char *) "\n##\n", .used = 4 };
@@ -74,7 +78,14 @@ int buffer_write(int fd, const struct buffer *buffer) {
result = write(fd, use->mem + written, use->used - written);
if (result > 0)
written += result;
- else if (result < 0 && errno != EWOULDBLOCK && errno != EAGAIN && errno != EINTR)
+ else if (result < 0 && ( errno == EAGAIN || errno == EWOULDBLOCK ||
+ errno == EINTR || errno == EIO)) {
+ fd_set out;
+ FD_ZERO(&out);
+ FD_SET(fd, &out);
+ /* ignore the result, this is just a glorified sleep */
+ select(FD_SETSIZE, NULL, &out, NULL, NULL);
+ } else if (result < 0)
return 0; /* too bad */
}
}
diff --git a/libdaemon/client/daemon-io.h b/libdaemon/client/daemon-io.h
index b48accac0..482f03388 100644
--- a/libdaemon/client/daemon-io.h
+++ b/libdaemon/client/daemon-io.h
@@ -15,12 +15,6 @@
#ifndef _LVM_DAEMON_IO_H
#define _LVM_DAEMON_IO_H
-#include "configure.h"
-
-#define _REENTRANT
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-
#include "config-util.h"
/* TODO function names */
diff --git a/libdaemon/server/daemon-log.c b/libdaemon/server/daemon-log.c
index 91900af1d..5ea5cdc76 100644
--- a/libdaemon/server/daemon-log.c
+++ b/libdaemon/server/daemon-log.c
@@ -1,7 +1,23 @@
+/*
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-server.h"
#include "daemon-log.h"
+
#include <syslog.h>
-#include <assert.h>
struct backend {
int id;
@@ -20,6 +36,7 @@ static void log_syslog(log_state *s, void **state, int type, const char *message
switch (type) {
case DAEMON_LOG_INFO: prio = LOG_INFO; break;
case DAEMON_LOG_WARN: prio = LOG_WARNING; break;
+ case DAEMON_LOG_ERROR: prio = LOG_ERR; break;
case DAEMON_LOG_FATAL: prio = LOG_CRIT; break;
default: prio = LOG_DEBUG; break;
}
@@ -128,7 +145,9 @@ void daemon_log_multi(log_state *s, int type, const char *prefix, const char *ms
void daemon_log_enable(log_state *s, int outlet, int type, int enable)
{
- assert(type < 32);
+ if (type >= 32)
+ return;
+
if (enable)
s->log_config[type] |= outlet;
else
diff --git a/libdaemon/server/daemon-server.c b/libdaemon/server/daemon-server.c
index a8afcc1de..d9d60d1aa 100644
--- a/libdaemon/server/daemon-server.c
+++ b/libdaemon/server/daemon-server.c
@@ -10,6 +10,10 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define _REENTRANT
+
+#include "tool.h"
+
#include "daemon-io.h"
#include "daemon-server.h"
#include "daemon-log.h"
@@ -78,7 +82,28 @@ static void _exit_handler(int sig __attribute__((unused)))
# define SD_LISTEN_FDS_START 3
# define SD_FD_SOCKET_SERVER SD_LISTEN_FDS_START
-# include <stdio.h>
+static int _is_idle(daemon_state s)
+{
+ return s.idle && s.idle->is_idle && !s.threads->next;
+}
+
+static struct timeval *_get_timeout(daemon_state s)
+{
+ return s.idle ? s.idle->ptimeout : NULL;
+}
+
+static void _reset_timeout(daemon_state s)
+{
+ if (s.idle) {
+ s.idle->ptimeout->tv_sec = 1;
+ s.idle->ptimeout->tv_usec = 0;
+ }
+}
+
+static unsigned _get_max_timeouts(daemon_state s)
+{
+ return s.idle ? s.idle->max_timeouts : 0;
+}
static int _set_oom_adj(const char *oom_adj_path, int val)
{
@@ -221,9 +246,7 @@ static int _open_socket(daemon_state s)
goto error;
}
- /* Set Close-on-exec & non-blocking */
- if (fcntl(fd, F_SETFD, 1))
- fprintf(stderr, "setting CLOEXEC on socket fd %d failed: %s\n", fd, strerror(errno));
+ /* Set non-blocking */
if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK))
fprintf(stderr, "setting O_NONBLOCK on socket fd %d failed: %s\n", fd, strerror(errno));
@@ -240,12 +263,12 @@ static int _open_socket(daemon_state s)
}
/* Socket already exists. If it's stale, remove it. */
- if (stat(sockaddr.sun_path, &buf)) {
+ if (lstat(sockaddr.sun_path, &buf)) {
perror("stat failed");
goto error;
}
- if (S_ISSOCK(buf.st_mode)) {
+ if (!S_ISSOCK(buf.st_mode)) {
fprintf(stderr, "%s: not a socket\n", sockaddr.sun_path);
goto error;
}
@@ -470,6 +493,9 @@ static int handle_connect(daemon_state s)
if (client.socket_fd < 0)
return 0;
+ if (fcntl(client.socket_fd, F_SETFD, FD_CLOEXEC))
+ WARN(&s, "setting CLOEXEC on client socket fd %d failed", client.socket_fd);
+
if (!(ts = dm_malloc(sizeof(thread_state)))) {
if (close(client.socket_fd))
perror("close");
@@ -512,6 +538,8 @@ void daemon_start(daemon_state s)
int failed = 0;
log_state _log = { { 0 } };
thread_state _threads = { .next = NULL };
+ unsigned timeout_count = 0;
+ fd_set in;
/*
* Switch to C locale to avoid reading large locale-archive file used by
@@ -543,8 +571,10 @@ void daemon_start(daemon_state s)
* NB. Take care to not keep stale locks around. Best not exit(...)
* after this point.
*/
- if (dm_create_lockfile(s.pidfile) == 0)
+ if (dm_create_lockfile(s.pidfile) == 0) {
+ ERROR(&s, "Failed to acquire lock on %s. Already running?\n", s.pidfile);
exit(EXIT_ALREADYRUNNING);
+ }
(void) dm_prepare_selinux_context(NULL, 0);
}
@@ -569,29 +599,56 @@ void daemon_start(daemon_state s)
failed = 1;
}
+ /* Set Close-on-exec */
+ if (!failed && fcntl(s.socket_fd, F_SETFD, 1))
+ ERROR(&s, "setting CLOEXEC on socket fd %d failed: %s\n", s.socket_fd, strerror(errno));
+
/* Signal parent, letting them know we are ready to go. */
if (!s.foreground)
kill(getppid(), SIGTERM);
+ /*
+ * Use daemon_main for daemon-specific init and polling, or
+ * use daemon_init for daemon-specific init and generic lib polling.
+ */
+
+ if (s.daemon_main) {
+ if (!s.daemon_main(&s))
+ failed = 1;
+ goto out;
+ }
+
if (s.daemon_init)
if (!s.daemon_init(&s))
failed = 1;
while (!_shutdown_requested && !failed) {
- fd_set in;
+ _reset_timeout(s);
FD_ZERO(&in);
FD_SET(s.socket_fd, &in);
- if (select(FD_SETSIZE, &in, NULL, NULL, NULL) < 0 && errno != EINTR)
+ if (select(FD_SETSIZE, &in, NULL, NULL, _get_timeout(s)) < 0 && errno != EINTR)
perror("select error");
- if (FD_ISSET(s.socket_fd, &in))
+ if (FD_ISSET(s.socket_fd, &in)) {
+ timeout_count = 0;
if (!_shutdown_requested && !handle_connect(s))
ERROR(&s, "Failed to handle a client connection.");
+ }
+
reap(s, 0);
+
+ /* s.idle == NULL equals no shutdown on timeout */
+ if (_is_idle(s)) {
+ DEBUGLOG(&s, "timeout occured");
+ if (++timeout_count >= _get_max_timeouts(s)) {
+ INFO(&s, "Inactive for %d seconds. Exiting.", timeout_count);
+ break;
+ }
+ }
}
INFO(&s, "%s waiting for client threads to finish", s.name);
reap(s, 1);
-
+out:
/* If activated by systemd, do not unlink the socket - systemd takes care of that! */
if (!_systemd_activation && s.socket_fd >= 0)
if (unlink(s.socket_path))
diff --git a/libdaemon/server/daemon-server.h b/libdaemon/server/daemon-server.h
index a7673d455..b235edce6 100644
--- a/libdaemon/server/daemon-server.h
+++ b/libdaemon/server/daemon-server.h
@@ -35,6 +35,21 @@ typedef struct {
struct buffer buffer;
} response;
+struct timeval;
+
+/*
+ * is_idle: daemon implementation sets it to true when no background task
+ * is running
+ * max_timeouts: how many seconds do daemon allow to be idle before it shutdowns
+ * ptimeout: internal variable passed to select(). has to be reset to 1 second
+ * before each select
+ */
+typedef struct {
+ volatile unsigned is_idle;
+ unsigned max_timeouts;
+ struct timeval *ptimeout;
+} daemon_idle;
+
struct daemon_state;
/*
@@ -92,12 +107,17 @@ typedef struct daemon_state {
handle_request handler;
int (*daemon_init)(struct daemon_state *st);
int (*daemon_fini)(struct daemon_state *st);
+ int (*daemon_main)(struct daemon_state *st);
/* Global runtime info maintained by the framework. */
int socket_fd;
log_state *log;
struct thread_state *threads;
+
+ /* suport for shutdown on idle */
+ daemon_idle *idle;
+
void *private; /* the global daemon state */
} daemon_state;
diff --git a/libdm/.exported_symbols b/libdm/.exported_symbols
index 5bacc26e2..6000686ff 100644
--- a/libdm/.exported_symbols
+++ b/libdm/.exported_symbols
@@ -1,3 +1,12 @@
+dm_bounds_check_debug
+dm_dump_memory_debug
+dm_free_aux
dm_log
dm_log_with_errno
-dm_task_get_info
+dm_malloc_aux
+dm_malloc_aux_debug
+dm_realloc_aux
+dm_strdup_aux
+dm_task_get_info_with_deferred_remove
+dm_zalloc_aux
+dm_zalloc_aux_debug
diff --git a/libdm/.exported_symbols.Base b/libdm/.exported_symbols.Base
new file mode 100644
index 000000000..865a6a8c6
--- /dev/null
+++ b/libdm/.exported_symbols.Base
@@ -0,0 +1,287 @@
+dm_asprintf
+dm_basename
+dm_bit_and
+dm_bit_get_first
+dm_bit_get_next
+dm_bitset_create
+dm_bitset_destroy
+dm_bitset_equal
+dm_bit_union
+dm_bounds_check_debug
+dm_build_dm_name
+dm_build_dm_uuid
+dm_config_clone_node
+dm_config_clone_node_with_mem
+dm_config_create
+dm_config_create_node
+dm_config_create_value
+dm_config_destroy
+dm_config_find_bool
+dm_config_find_float
+dm_config_find_int
+dm_config_find_int64
+dm_config_find_node
+dm_config_find_str
+dm_config_find_str_allow_empty
+dm_config_flatten
+dm_config_from_string
+dm_config_get_custom
+dm_config_get_list
+dm_config_get_section
+dm_config_get_str
+dm_config_get_uint32
+dm_config_get_uint64
+dm_config_has_node
+dm_config_insert_cascaded_tree
+dm_config_maybe_section
+dm_config_memory
+dm_config_parent_name
+dm_config_parse
+dm_config_remove_cascaded_tree
+dm_config_remove_node
+dm_config_set_custom
+dm_config_tree_find_bool
+dm_config_tree_find_float
+dm_config_tree_find_int
+dm_config_tree_find_int64
+dm_config_tree_find_node
+dm_config_tree_find_str
+dm_config_tree_find_str_allow_empty
+dm_config_value_is_bool
+dm_config_write_node
+dm_config_write_node_out
+dm_config_write_one_node
+dm_config_write_one_node_out
+dm_cookie_supported
+dm_count_chars
+dm_create_dir
+dm_create_lockfile
+dm_daemon_is_running
+dm_device_get_name
+dm_device_has_holders
+dm_device_has_mounted_fs
+dm_dir
+dm_driver_version
+dm_dump_memory_debug
+dm_escaped_len
+dm_escape_double_quotes
+dm_fclose
+dm_format_dev
+dm_free_aux
+dm_get_library_version
+dm_get_name_mangling_mode
+dm_get_next_target
+dm_get_status_cache
+dm_get_status_raid
+dm_get_status_snapshot
+dm_get_status_thin
+dm_get_status_thin_pool
+dm_get_suspended_counter
+dm_hash_create
+dm_hash_destroy
+dm_hash_get_data
+dm_hash_get_first
+dm_hash_get_key
+dm_hash_get_next
+dm_hash_get_num_entries
+dm_hash_insert
+dm_hash_insert_binary
+dm_hash_iter
+dm_hash_lookup
+dm_hash_lookup_binary
+dm_hash_remove
+dm_hash_remove_binary
+dm_hash_wipe
+dm_is_dm_major
+dm_is_empty_dir
+dm_lib_exit
+dm_lib_init
+dm_lib_release
+dm_list_add
+dm_list_add_h
+dm_list_del
+dm_list_empty
+dm_list_end
+dm_list_first
+dm_list_init
+dm_list_last
+dm_list_move
+dm_list_next
+dm_list_prev
+dm_list_size
+dm_list_splice
+dm_list_start
+dm_log
+dm_log_init
+dm_log_init_verbose
+dm_log_is_non_default
+dm_log_with_errno
+dm_log_with_errno_init
+dm_make_percent
+dm_malloc_aux
+dm_malloc_aux_debug
+dm_mknodes
+dm_mountinfo_read
+dm_percent_to_float
+dm_pool_abandon_object
+dm_pool_alloc
+dm_pool_alloc_aligned
+dm_pool_begin_object
+dm_pool_create
+dm_pool_destroy
+dm_pool_empty
+dm_pool_end_object
+dm_pool_free
+dm_pool_grow_object
+dm_pool_lock
+dm_pool_locked
+dm_pool_strdup
+dm_pool_strndup
+dm_pool_unlock
+dm_pool_zalloc
+dm_prepare_selinux_context
+dm_realloc_aux
+dm_regex_create
+dm_regex_fingerprint
+dm_regex_match
+dm_report_compact_fields
+dm_report_field_int
+dm_report_field_int32
+dm_report_field_percent
+dm_report_field_set_value
+dm_report_field_string
+dm_report_field_string_list
+dm_report_field_string_list_unsorted
+dm_report_field_uint32
+dm_report_field_uint64
+dm_report_free
+dm_report_init
+dm_report_init_with_selection
+dm_report_object
+dm_report_object_is_selected
+dm_report_output
+dm_report_set_output_field_name_prefix
+dm_set_dev_dir
+dm_set_name_mangling_mode
+dm_set_selinux_context
+dm_set_sysfs_dir
+dm_set_uuid_prefix
+dm_snprintf
+dm_split_lvm_name
+dm_split_words
+dm_strdup_aux
+dm_strncpy
+dm_sysfs_dir
+dm_task_add_target
+dm_task_create
+dm_task_deferred_remove
+dm_task_destroy
+dm_task_enable_checks
+dm_task_get_deps
+dm_task_get_driver_version
+dm_task_get_info
+dm_task_get_info_with_deferred_remove
+dm_task_get_message_response
+dm_task_get_name
+dm_task_get_name_mangled
+dm_task_get_names
+dm_task_get_name_unmangled
+dm_task_get_read_ahead
+dm_task_get_uuid
+dm_task_get_uuid_mangled
+dm_task_get_uuid_unmangled
+dm_task_get_versions
+dm_task_no_flush
+dm_task_no_open_count
+dm_task_query_inactive_table
+dm_task_retry_remove
+dm_task_run
+dm_task_secure_data
+dm_task_set_add_node
+dm_task_set_cookie
+dm_task_set_event_nr
+dm_task_set_geometry
+dm_task_set_gid
+dm_task_set_major
+dm_task_set_major_minor
+dm_task_set_message
+dm_task_set_minor
+dm_task_set_mode
+dm_task_set_name
+dm_task_set_newname
+dm_task_set_newuuid
+dm_task_set_read_ahead
+dm_task_set_ro
+dm_task_set_sector
+dm_task_set_uid
+dm_task_set_uuid
+dm_task_skip_lockfs
+dm_task_suppress_identical_reload
+dm_task_update_nodes
+dm_tree_activate_children
+dm_tree_add_dev
+dm_tree_add_dev_with_udev_flags
+dm_tree_add_new_dev
+dm_tree_add_new_dev_with_udev_flags
+dm_tree_create
+dm_tree_deactivate_children
+dm_tree_find_node
+dm_tree_find_node_by_uuid
+dm_tree_free
+dm_tree_get_cookie
+dm_tree_children_use_uuid
+dm_tree_next_child
+dm_tree_node_add_cache_target
+dm_tree_node_add_crypt_target
+dm_tree_node_add_error_target
+dm_tree_node_add_linear_target
+dm_tree_node_add_mirror_target
+dm_tree_node_add_mirror_target_log
+dm_tree_node_add_null_area
+dm_tree_node_add_raid_target
+dm_tree_node_add_raid_target_with_params
+dm_tree_node_add_replicator_dev_target
+dm_tree_node_add_replicator_target
+dm_tree_node_add_snapshot_merge_target
+dm_tree_node_add_snapshot_origin_target
+dm_tree_node_add_snapshot_target
+dm_tree_node_add_striped_target
+dm_tree_node_add_target_area
+dm_tree_node_add_thin_pool_message
+dm_tree_node_add_thin_pool_target
+dm_tree_node_add_thin_target
+dm_tree_node_add_zero_target
+dm_tree_node_get_context
+dm_tree_node_get_info
+dm_tree_node_get_name
+dm_tree_node_get_uuid
+dm_tree_node_num_children
+dm_tree_node_set_callback
+dm_tree_node_set_presuspend_node
+dm_tree_node_set_read_ahead
+dm_tree_node_set_thin_external_origin
+dm_tree_node_set_thin_pool_discard
+dm_tree_node_set_thin_pool_error_if_no_space
+dm_tree_node_set_udev_flags
+dm_tree_node_size_changed
+dm_tree_preload_children
+dm_tree_retry_remove
+dm_tree_set_cookie
+dm_tree_set_optional_uuid_suffixes
+dm_tree_skip_lockfs
+dm_tree_suspend_children
+dm_tree_use_no_flush_suspend
+dm_udev_complete
+dm_udev_create_cookie
+dm_udev_get_checking
+dm_udev_get_sync_support
+dm_udev_set_checking
+dm_udev_set_sync_support
+dm_udev_wait
+dm_unescape_colons_and_at_signs
+dm_unescape_double_quotes
+dm_units_to_factor
+dm_uuid_prefix
+dm_vasprintf
+dm_zalloc_aux
+dm_zalloc_aux_debug
diff --git a/libdm/.exported_symbols.DM_1_02_100 b/libdm/.exported_symbols.DM_1_02_100
new file mode 100644
index 000000000..00d7d5b10
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_100
@@ -0,0 +1,2 @@
+dm_config_value_set_format_flags
+dm_config_value_get_format_flags
diff --git a/libdm/.exported_symbols.DM_1_02_101 b/libdm/.exported_symbols.DM_1_02_101
new file mode 100644
index 000000000..75089ba8b
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_101
@@ -0,0 +1,2 @@
+dm_report_value_cache_set
+dm_report_value_cache_get
diff --git a/libdm/.exported_symbols.DM_1_02_103 b/libdm/.exported_symbols.DM_1_02_103
new file mode 100644
index 000000000..5dea5ea94
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_103
@@ -0,0 +1,7 @@
+dm_bounds_check_wrapper
+dm_dump_memory_wrapper
+dm_free_wrapper
+dm_malloc_wrapper
+dm_realloc_wrapper
+dm_strdup_wrapper
+dm_zalloc_wrapper
diff --git a/libdm/.exported_symbols.DM_1_02_104 b/libdm/.exported_symbols.DM_1_02_104
new file mode 100644
index 000000000..712fcf2d5
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_104
@@ -0,0 +1,77 @@
+dm_report_column_headings
+dm_size_to_string
+dm_stats_bind_devno
+dm_stats_bind_name
+dm_stats_bind_uuid
+dm_stats_buffer_destroy
+dm_stats_clear_region
+dm_stats_create
+dm_stats_create_region
+dm_stats_delete_region
+dm_stats_destroy
+dm_stats_get_area_start
+dm_stats_get_average_queue_size
+dm_stats_get_average_rd_wait_time
+dm_stats_get_average_request_size
+dm_stats_get_average_wait_time
+dm_stats_get_average_wr_wait_time
+dm_stats_get_current_area
+dm_stats_get_current_area_len
+dm_stats_get_current_area_start
+dm_stats_get_current_nr_areas
+dm_stats_get_current_region
+dm_stats_get_current_region_area_len
+dm_stats_get_current_region_aux_data
+dm_stats_get_current_region_len
+dm_stats_get_current_region_program_id
+dm_stats_get_current_region_start
+dm_stats_get_io_in_progress
+dm_stats_get_io_nsecs
+dm_stats_get_nr_areas
+dm_stats_get_nr_regions
+dm_stats_get_rd_merges_per_sec
+dm_stats_get_read_nsecs
+dm_stats_get_reads
+dm_stats_get_read_sectors
+dm_stats_get_read_sectors_per_sec
+dm_stats_get_reads_merged
+dm_stats_get_reads_per_sec
+dm_stats_get_region_area_len
+dm_stats_get_region_aux_data
+dm_stats_get_region_len
+dm_stats_get_region_nr_areas
+dm_stats_get_region_program_id
+dm_stats_get_region_start
+dm_stats_get_sampling_interval_ms
+dm_stats_get_sampling_interval_ns
+dm_stats_get_service_time
+dm_stats_get_throughput
+dm_stats_get_total_read_nsecs
+dm_stats_get_total_write_nsecs
+dm_stats_get_utilization
+dm_stats_get_weighted_io_nsecs
+dm_stats_get_write_nsecs
+dm_stats_get_writes
+dm_stats_get_write_sectors
+dm_stats_get_write_sectors_per_sec
+dm_stats_get_writes_merged
+dm_stats_get_writes_per_sec
+dm_stats_get_wr_merges_per_sec
+dm_stats_list
+dm_stats_populate
+dm_stats_print_region
+dm_stats_region_present
+dm_stats_set_program_id
+dm_stats_set_sampling_interval_ms
+dm_stats_set_sampling_interval_ns
+dm_stats_walk_end
+dm_stats_walk_next
+dm_stats_walk_next_region
+dm_stats_walk_start
+dm_task_get_ioctl_timestamp
+dm_task_set_record_timestamp
+dm_timestamp_alloc
+dm_timestamp_compare
+dm_timestamp_delta
+dm_timestamp_destroy
+dm_timestamp_get
diff --git a/libdm/.exported_symbols.DM_1_02_105 b/libdm/.exported_symbols.DM_1_02_105
new file mode 100644
index 000000000..b1556fa77
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_105
@@ -0,0 +1,4 @@
+dm_report_is_empty
+dm_stats_get_area_offset
+dm_stats_get_current_area_offset
+dm_timestamp_copy
diff --git a/libdm/.exported_symbols.DM_1_02_106 b/libdm/.exported_symbols.DM_1_02_106
new file mode 100644
index 000000000..0ce8924c9
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_106
@@ -0,0 +1 @@
+dm_message_supports_precise_timestamps
diff --git a/libdm/.exported_symbols.DM_1_02_97 b/libdm/.exported_symbols.DM_1_02_97
new file mode 100644
index 000000000..dcc513acb
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_97
@@ -0,0 +1 @@
+dm_task_get_info
diff --git a/libdm/.exported_symbols.DM_1_02_98 b/libdm/.exported_symbols.DM_1_02_98
new file mode 100644
index 000000000..f90bcef4d
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_98
@@ -0,0 +1 @@
+dm_task_get_errno
diff --git a/libdm/.exported_symbols.DM_1_02_99 b/libdm/.exported_symbols.DM_1_02_99
new file mode 100644
index 000000000..e586a5c52
--- /dev/null
+++ b/libdm/.exported_symbols.DM_1_02_99
@@ -0,0 +1 @@
+dm_tree_node_set_thin_pool_read_only
diff --git a/libdm/Makefile.in b/libdm/Makefile.in
index e50b174af..1b924cef3 100644
--- a/libdm/Makefile.in
+++ b/libdm/Makefile.in
@@ -25,6 +25,8 @@ SOURCES =\
libdm-deptree.c \
libdm-string.c \
libdm-report.c \
+ libdm-timestamp.c \
+ libdm-stats.c \
libdm-config.c \
mm/dbg_malloc.c \
mm/pool.c \
@@ -33,11 +35,7 @@ SOURCES =\
regex/ttree.c \
$(interface)/libdm-iface.c
-INCLUDES = -I$(srcdir)/$(interface) -I$(srcdir)
-ifeq ("@VALGRIND_POOL@", "yes")
-INCLUDES += @VALGRIND_CFLAGS@
-endif
-CFLAGS += $(UDEV_CFLAGS)
+INCLUDES = -I$(srcdir)/$(interface)
ifeq ("@STATIC_LINK@", "yes")
LIB_STATIC = $(interface)/libdevmapper.a
@@ -45,7 +43,7 @@ endif
LIB_SHARED = $(interface)/libdevmapper.$(LIB_SUFFIX)
LIB_VERSION = $(LIB_VERSION_DM)
-TARGETS += libdevmapper.$(LIB_SUFFIX) libdevmapper.$(LIB_SUFFIX).$(LIB_VERSION)
+TARGETS = libdevmapper.$(LIB_SUFFIX) libdevmapper.$(LIB_SUFFIX).$(LIB_VERSION)
CFLOW_LIST = $(SOURCES)
CFLOW_LIST_TARGET = libdevmapper.cflow
@@ -55,10 +53,9 @@ EXPORTED_FN_PREFIX = dm
include $(top_builddir)/make.tmpl
-DEFS += -DDM_DEVICE_UID=@DM_DEVICE_UID@ -DDM_DEVICE_GID=@DM_DEVICE_GID@ \
- -DDM_DEVICE_MODE=@DM_DEVICE_MODE@
+CFLAGS += $(UDEV_CFLAGS) $(VALGRIND_CFLAGS)
-LIBS += $(SELINUX_LIBS) $(UDEV_LIBS) $(PTHREAD_LIBS)
+LIBS += $(SELINUX_LIBS) $(UDEV_LIBS) $(PTHREAD_LIBS) $(M_LIBS)
device-mapper: all
@@ -99,4 +96,4 @@ install_ioctl_static: $(LIB_STATIC)
$(INSTALL_DATA) -D $< $(usrlibdir)/$(<F)
CLEAN_TARGETS += ioctl/libdevmapper.a
-DISTCLEAN_TARGETS += libdevmapper.pc .exported_symbols_generated
+DISTCLEAN_TARGETS += libdevmapper.pc
diff --git a/libdm/ioctl/libdm-iface.c b/libdm/ioctl/libdm-iface.c
index 4c37a539b..fe0bffacd 100644
--- a/libdm/ioctl/libdm-iface.c
+++ b/libdm/ioctl/libdm-iface.c
@@ -68,6 +68,7 @@ static unsigned _dm_version = DM_VERSION_MAJOR;
static unsigned _dm_version_minor = 0;
static unsigned _dm_version_patchlevel = 0;
static int _log_suppress = 0;
+static struct dm_timestamp *_dm_ioctl_timestamp = NULL;
/*
* If the kernel dm driver only supports one major number
@@ -245,7 +246,7 @@ static int _control_exists(const char *control, uint32_t major, uint32_t minor)
return -1;
}
- if (major && buf.st_rdev != MKDEV((dev_t)major, minor)) {
+ if (major && buf.st_rdev != MKDEV((dev_t)major, (dev_t)minor)) {
log_verbose("%s: Wrong device number: (%u, %u) instead of "
"(%u, %u)", control,
MAJOR(buf.st_mode), MINOR(buf.st_mode),
@@ -288,7 +289,7 @@ static int _create_control(const char *control, uint32_t major, uint32_t minor)
(void) dm_prepare_selinux_context(control, S_IFCHR);
old_umask = umask(DM_CONTROL_NODE_UMASK);
if (mknod(control, S_IFCHR | S_IRUSR | S_IWUSR,
- MKDEV((dev_t)major, minor)) < 0) {
+ MKDEV((dev_t)major, (dev_t)minor)) < 0) {
log_sys_error("mknod", control);
(void) dm_prepare_selinux_context(NULL, 0);
return 0;
@@ -571,8 +572,9 @@ int dm_check_version(void)
dm_get_library_version(libversion, sizeof(libversion));
bad:
- log_error("Incompatible libdevmapper %s%s and kernel driver %s",
- libversion, compat, dmversion);
+ log_error("Incompatible libdevmapper %s%s and kernel driver %s.",
+ *libversion ? libversion : "(unknown version)", compat,
+ *dmversion ? dmversion : "(unknown version)");
_version_ok = 0;
return 0;
@@ -602,6 +604,20 @@ static int dm_inactive_supported(void)
return inactive_supported;
}
+int dm_message_supports_precise_timestamps(void)
+{
+ /*
+ * 4.32.0 supports "precise_timestamps" and "histogram:" options
+ * to @stats_create messages but lacks the ability to report
+ * these properties via a subsequent @stats_list: require at
+ * least 4.33.0 in order to use these features.
+ */
+ if (dm_check_version() && _dm_version >= 4)
+ if (_dm_version_minor >= 33)
+ return 1;
+ return 0;
+}
+
void *dm_get_next_target(struct dm_task *dmt, void *next,
uint64_t *start, uint64_t *length,
char **target_type, char **params)
@@ -665,7 +681,13 @@ int dm_format_dev(char *buf, int bufsize, uint32_t dev_major,
return 1;
}
+#if defined(__GNUC__)
+int dm_task_get_info_v1_02_97(struct dm_task *dmt, struct dm_info *info);
+DM_EXPORTED_SYMBOL(dm_task_get_info, 1_02_97);
+int dm_task_get_info_v1_02_97(struct dm_task *dmt, struct dm_info *info)
+#else
int dm_task_get_info(struct dm_task *dmt, struct dm_info *info)
+#endif
{
if (!dmt->dmi.v4)
return 0;
@@ -682,6 +704,7 @@ int dm_task_get_info(struct dm_task *dmt, struct dm_info *info)
info->inactive_table = dmt->dmi.v4->flags & DM_INACTIVE_PRESENT_FLAG ?
1 : 0;
info->deferred_remove = dmt->dmi.v4->flags & DM_DEFERRED_REMOVE;
+ info->internal_suspend = (dmt->dmi.v4->flags & DM_INTERNAL_SUSPEND_FLAG) ? 1 : 0;
info->target_count = dmt->dmi.v4->target_count;
info->open_count = dmt->dmi.v4->open_count;
info->event_nr = dmt->dmi.v4->event_nr;
@@ -911,6 +934,24 @@ int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr)
return 1;
}
+int dm_task_set_record_timestamp(struct dm_task *dmt)
+{
+ if (!_dm_ioctl_timestamp)
+ _dm_ioctl_timestamp = dm_timestamp_alloc();
+
+ if (!_dm_ioctl_timestamp)
+ return_0;
+
+ dmt->record_timestamp = 1;
+
+ return 1;
+}
+
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt)
+{
+ return dmt->record_timestamp ? _dm_ioctl_timestamp : NULL;
+}
+
struct target *create_target(uint64_t start, uint64_t len, const char *type,
const char *params)
{
@@ -1141,7 +1182,7 @@ static struct dm_ioctl *_flatten(struct dm_task *dmt, unsigned repeat_count)
}
dmi->flags |= DM_PERSISTENT_DEV_FLAG;
- dmi->dev = MKDEV((dev_t)dmt->major, dmt->minor);
+ dmi->dev = MKDEV((dev_t)dmt->major, (dev_t)dmt->minor);
}
/* Does driver support device number referencing? */
@@ -1708,6 +1749,9 @@ static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
{
struct dm_ioctl *dmi;
int ioctl_with_uevent;
+ int r;
+
+ dmt->ioctl_errno = 0;
dmi = _flatten(dmt, buffer_repeat_count);
if (!dmi) {
@@ -1793,30 +1837,50 @@ static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
dmt->sector, _sanitise_message(dmt->message),
dmi->data_size, retry_repeat_count);
#ifdef DM_IOCTLS
- if (ioctl(_control_fd, command, dmi) < 0 &&
- dmt->expected_errno != errno) {
- if (errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) ||
- (dmt->type == DM_DEVICE_MKNODES) ||
- (dmt->type == DM_DEVICE_STATUS)))
+ r = ioctl(_control_fd, command, dmi);
+
+ if (dmt->record_timestamp)
+ if (!dm_timestamp_get(_dm_ioctl_timestamp))
+ stack;
+
+ if (r < 0 && dmt->expected_errno != errno) {
+ dmt->ioctl_errno = errno;
+ if (dmt->ioctl_errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) ||
+ (dmt->type == DM_DEVICE_MKNODES) ||
+ (dmt->type == DM_DEVICE_STATUS)))
dmi->flags &= ~DM_EXISTS_FLAG; /* FIXME */
else {
- if (_log_suppress)
- log_verbose("device-mapper: %s ioctl "
+ if (_log_suppress || dmt->ioctl_errno == EINTR)
+ log_verbose("device-mapper: %s ioctl on %s%s%s%.0d%s%.0d%s%s "
"failed: %s",
_cmd_data_v4[dmt->type].name,
- strerror(errno));
+ dmi->name, dmi->uuid,
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ")" : "",
+ strerror(dmt->ioctl_errno));
else
- log_error("device-mapper: %s ioctl on %s "
+ log_error("device-mapper: %s ioctl on %s%s%s%.0d%s%.0d%s%s "
"failed: %s",
_cmd_data_v4[dmt->type].name,
- dmi->name, strerror(errno));
+ dmi->name, dmi->uuid,
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ")" : "",
+ strerror(dmt->ioctl_errno));
/*
* It's sometimes worth retrying after EBUSY in case
* it's a transient failure caused by an asynchronous
* process quickly scanning the device.
*/
- *retryable = errno == EBUSY;
+ *retryable = dmt->ioctl_errno == EBUSY;
goto error;
}
@@ -1854,6 +1918,11 @@ void dm_task_update_nodes(void)
#define DM_IOCTL_RETRIES 25
#define DM_RETRY_USLEEP_DELAY 200000
+int dm_task_get_errno(struct dm_task *dmt)
+{
+ return dmt->ioctl_errno;
+}
+
int dm_task_run(struct dm_task *dmt)
{
struct dm_ioctl *dmi;
@@ -2019,6 +2088,8 @@ repeat_ioctl:
void dm_lib_release(void)
{
_close_control_fd();
+ dm_timestamp_destroy(_dm_ioctl_timestamp);
+ _dm_ioctl_timestamp = NULL;
update_devs();
}
@@ -2046,6 +2117,12 @@ void dm_lib_exit(void)
_version_checked = 0;
}
+#if defined(__GNUC__)
+/*
+ * Maintain binary backward compatibility.
+ * Version script mechanism works with 'gcc' compatible compilers only.
+ */
+
/*
* This following code is here to retain ABI compatibility after adding
* the field deferred_remove to struct dm_info in version 1.02.89.
@@ -2061,16 +2138,31 @@ void dm_lib_exit(void)
* N.B. Keep this function at the end of the file to make sure that
* no code in this file accidentally calls it.
*/
-#undef dm_task_get_info
-int dm_task_get_info(struct dm_task *dmt, struct dm_info *info);
-int dm_task_get_info(struct dm_task *dmt, struct dm_info *info)
+
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info);
+DM_EXPORTED_SYMBOL_BASE(dm_task_get_info);
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info)
{
struct dm_info new_info;
- if (!dm_task_get_info_with_deferred_remove(dmt, &new_info))
+ if (!dm_task_get_info_v1_02_97(dmt, &new_info))
return 0;
memcpy(info, &new_info, offsetof(struct dm_info, deferred_remove));
return 1;
}
+
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info);
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info)
+{
+ struct dm_info new_info;
+
+ if (!dm_task_get_info_v1_02_97(dmt, &new_info))
+ return 0;
+
+ memcpy(info, &new_info, offsetof(struct dm_info, internal_suspend));
+
+ return 1;
+}
+#endif
diff --git a/libdm/ioctl/libdm-targets.h b/libdm/ioctl/libdm-targets.h
index 100681fec..a3c1cb73d 100644
--- a/libdm/ioctl/libdm-targets.h
+++ b/libdm/ioctl/libdm-targets.h
@@ -68,6 +68,9 @@ struct dm_task {
int deferred_remove;
int enable_checks;
int expected_errno;
+ int ioctl_errno;
+
+ int record_timestamp;
char *uuid;
char *mangled_uuid;
diff --git a/libdm/libdevmapper.h b/libdm/libdevmapper.h
index 646747b4a..393f961af 100644
--- a/libdm/libdevmapper.h
+++ b/libdm/libdevmapper.h
@@ -1,6 +1,7 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -127,6 +128,7 @@ enum {
*/
struct dm_task;
+struct dm_timestamp;
struct dm_task *dm_task_create(int type);
void dm_task_destroy(struct dm_task *dmt);
@@ -151,6 +153,7 @@ struct dm_info {
int32_t target_count;
int deferred_remove;
+ int internal_suspend;
};
struct dm_deps {
@@ -174,8 +177,6 @@ struct dm_versions {
int dm_get_library_version(char *version, size_t size);
int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size);
-
-#define dm_task_get_info dm_task_get_info_with_deferred_remove
int dm_task_get_info(struct dm_task *dmt, struct dm_info *dmi);
/*
@@ -229,6 +230,12 @@ int dm_task_retry_remove(struct dm_task *dmt);
int dm_task_deferred_remove(struct dm_task *dmt);
/*
+ * Record timestamp immediately after the ioctl returns.
+ */
+int dm_task_set_record_timestamp(struct dm_task *dmt);
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt);
+
+/*
* Enable checks for common mistakes such as issuing ioctls in an unsafe order.
*/
int dm_task_enable_checks(struct dm_task *dmt);
@@ -367,8 +374,12 @@ struct dm_status_thin_pool {
uint64_t used_data_blocks;
uint64_t total_data_blocks;
uint64_t held_metadata_root;
- uint32_t read_only;
+ uint32_t read_only; /* metadata may not be changed */
dm_thin_discards_t discards;
+ uint32_t fail : 1; /* all I/O fails */
+ uint32_t error_if_no_space : 1; /* otherwise queue_if_no_space */
+ uint32_t out_of_data_space : 1; /* metadata may be changed, but data may not be allocated */
+ uint32_t reserved : 29;
};
int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
@@ -386,11 +397,534 @@ int dm_get_status_thin(struct dm_pool *mem, const char *params,
struct dm_status_thin **status);
/*
+ * device-mapper statistics support
+ */
+
+/*
+ * Statistics handle.
+ *
+ * Operations on dm_stats objects include managing statistics regions
+ * and obtaining and manipulating current counter values from the
+ * kernel. Methods are provided to return baisc count values and to
+ * derive time-based metrics when a suitable interval estimate is
+ * provided.
+ *
+ * Internally the dm_stats handle contains a pointer to a table of one
+ * or more dm_stats_region objects representing the regions registered
+ * with the dm_stats_create_region() method. These in turn point to a
+ * table of one or more dm_stats_counters objects containing the
+ * counter sets for each defined area within the region:
+ *
+ * dm_stats->dm_stats_region[nr_regions]->dm_stats_counters[nr_areas]
+ *
+ * This structure is private to the library and may change in future
+ * versions: all users should make use of the public interface and treat
+ * the dm_stats type as an opaque handle.
+ *
+ * Regions and counter sets are stored in order of increasing region_id.
+ * Depending on region specifications and the sequence of create and
+ * delete operations this may not correspond to increasing sector
+ * number: users of the library should not assume that this is the case
+ * unless region creation is deliberately managed to ensure this (by
+ * always creating regions in strict order of ascending sector address).
+ *
+ * Regions may also overlap so the same sector range may be included in
+ * more than one region or area: applications should be prepared to deal
+ * with this or manage regions such that it does not occur.
+ */
+struct dm_stats;
+
+/*
+ * Allocate a dm_stats handle to use for subsequent device-mapper
+ * statistics operations. A program_id may be specified and will be
+ * used by default for subsequent operations on this handle.
+ *
+ * If program_id is NULL or the empty string a program_id will be
+ * automatically set to the value contained in /proc/self/comm.
+ */
+struct dm_stats *dm_stats_create(const char *program_id);
+
+/*
+ * Bind a dm_stats handle to the specified device major and minor
+ * values. Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor);
+
+/*
+ * Bind a dm_stats handle to the specified device name.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_name(struct dm_stats *dms, const char *name);
+
+/*
+ * Bind a dm_stats handle to the specified device UUID.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid);
+
+/*
+ * Test whether the running kernel supports the precise_timestamps
+ * feature. Presence of this feature also implies histogram support.
+ * The library will check this call internally and fails any attempt
+ * to use nanosecond counters or histograms on kernels that fail to
+ * meet this check.
+ */
+int dm_message_supports_precise_timestamps(void);
+
+#define DM_STATS_ALL_PROGRAMS ""
+/*
+ * Parse the response from a @stats_list message. dm_stats_list will
+ * allocate the necessary dm_stats and dm_stats region structures from
+ * the embedded dm_pool. No counter data will be obtained (the counters
+ * members of dm_stats_region objects are set to NULL).
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ */
+int dm_stats_list(struct dm_stats *dms, const char *program_id);
+
+#define DM_STATS_REGIONS_ALL UINT64_MAX
+/*
+ * Populate a dm_stats object with statistics for one or more regions of
+ * the specified device.
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ *
+ * Passing the special value DM_STATS_REGIONS_ALL as the region_id
+ * argument will attempt to retrieve all regions selected by the
+ * program_id argument.
+ *
+ * If region_id is used to request a single region_id to be populated
+ * the program_id is ignored.
+ */
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+ uint64_t region_id);
+
+/*
+ * Create a new statistics region on the device bound to dms.
+ *
+ * start and len specify the region start and length in 512b sectors.
+ * Passing zero for both start and len will create a region spanning
+ * the entire device.
+ *
+ * Step determines how to subdivide the region into discrete counter
+ * sets: a positive value specifies the size of areas into which the
+ * region should be split while a negative value will split the region
+ * into a number of areas equal to the absolute value of step:
+ *
+ * - a region with one area spanning the entire device:
+ *
+ * dm_stats_create_region(dms, 0, 0, -1, p, a);
+ *
+ * - a region with areas of 1MiB:
+ *
+ * dm_stats_create_region(dms, 0, 0, 1 << 11, p, a);
+ *
+ * - one 1MiB region starting at 1024 sectors with two areas:
+ *
+ * dm_stats_create_region(dms, 1024, 1 << 11, -2, p, a);
+ *
+ * program_id is an optional string argument that identifies the
+ * program creating the region. If program_id is NULL or the empty
+ * string the default program_id stored in the handle will be used.
+ *
+ * aux_data is an optional string argument passed to the kernel that is
+ * stored with the statistics region. It is not currently accessed by
+ * the library or kernel and may be used to store arbitrary user data.
+ *
+ * The region_id of the newly-created region is returned in *region_id
+ * if it is non-NULL.
+ */
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ const char *program_id, const char *aux_data);
+
+/*
+ * Delete the specified statistics region. This will also mark the
+ * region as not-present and discard any existing statistics data.
+ */
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Clear the specified statistics region. This requests the kernel to
+ * zero all counter values (except in-flight I/O). Note that this
+ * operation is not atomic with respect to reads of the counters; any IO
+ * events occurring between the last print operation and the clear will
+ * be lost. This can be avoided by using the atomic print-and-clear
+ * function of the dm_stats_print_region() call or by using the higher
+ * level dm_stats_populate() interface.
+ */
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Print the current counter values for the specified statistics region
+ * and return them as a string. The memory for the string buffer will
+ * be allocated from the dm_stats handle's private pool and should be
+ * returned by calling dm_stats_buffer_destroy() when no longer
+ * required. The pointer will become invalid following any call that
+ * clears or reinitializes the handle (destroy, list, populate, bind).
+ *
+ * This allows applications that wish to access the raw message response
+ * to obtain it via a dm_stats handle; no parsing of the textual counter
+ * data is carried out by this function.
+ *
+ * Most users are recommended to use the dm_stats_populate() call
+ * instead since this will automatically parse the statistics data into
+ * numeric form accessible via the dm_stats_get_*() counter access
+ * methods.
+ *
+ * A subset of the data lines may be requested by setting the
+ * start_line and num_lines parameters. If both are zero all data
+ * lines are returned.
+ *
+ * If the clear parameter is non-zero the operation will also
+ * atomically reset all counter values to zero (except in-flight IO).
+ */
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+ unsigned start_line, unsigned num_lines,
+ unsigned clear);
+
+/*
+ * Destroy a statistics response buffer obtained from a call to
+ * dm_stats_print_region().
+ */
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer);
+
+/*
+ * Determine the number of regions contained in a dm_stats handle
+ * following a dm_stats_list() or dm_stats_populate() call.
+ *
+ * The value returned is the number of registered regions visible with the
+ * progam_id value used for the list or populate operation and may not be
+ * equal to the highest present region_id (either due to program_id
+ * filtering or gaps in the sequence of region_id values).
+ *
+ * Always returns zero on an empty handle.
+ */
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms);
+
+/*
+ * Test whether region_id is present in this dm_stats handle.
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Returns the number of areas (counter sets) contained in the specified
+ * region_id of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Returns the total number of areas (counter sets) in all regions of the
+ * given dm_stats object.
+ */
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Destroy a dm_stats object and all associated regions and counter
+ * sets.
+ */
+void dm_stats_destroy(struct dm_stats *dms);
+
+/*
+ * Counter sampling interval
+ */
+
+/*
+ * Set the sampling interval for counter data to the specified value in
+ * either nanoseconds or milliseconds.
+ *
+ * The interval is used to calculate time-based metrics from the basic
+ * counter data: an interval must be set before calling any of the
+ * metric methods.
+ *
+ * For best accuracy the duration should be measured and updated at the
+ * end of each interval.
+ *
+ * All values are stored internally with nanosecond precision and are
+ * converted to or from ms when the millisecond interfaces are used.
+ */
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms,
+ uint64_t interval_ns);
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms,
+ uint64_t interval_ms);
+
+/*
+ * Retrieve the configured sampling interval in either nanoseconds or
+ * milliseconds.
+ */
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms);
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms);
+
+/*
+ * Override program_id. This may be used to change the default
+ * program_id value for an existing handle. If the allow_empty argument
+ * is non-zero a NULL or empty program_id is permitted.
+ *
+ * Use with caution! Most users of the library should set a valid,
+ * non-NULL program_id for every statistics region created. Failing to
+ * do so may result in confusing state when multiple programs are
+ * creating and managing statistics regions.
+ *
+ * All users of the library are encouraged to choose an unambiguous,
+ * unique program_id: this could be based on PID (for programs that
+ * create, report, and delete regions in a single process), session id,
+ * executable name, or some other distinguishing string.
+ *
+ * Use of the empty string as a program_id does not simplify use of the
+ * library or the command line tools and use of this value is strongly
+ * discouraged.
+ */
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+ const char *program_id);
+
+/*
+ * Region properties: size, length & area_len.
+ *
+ * Region start and length are returned in units of 512b as specified
+ * at region creation time. The area_len value gives the size of areas
+ * into which the region has been subdivided. For regions with a single
+ * area spanning the range this value is equal to the region length.
+ *
+ * For regions created with a specified number of areas the value
+ * represents the size of the areas into which the kernel divided the
+ * region excluding any rounding of the last area size. The number of
+ * areas may be obtained using the dm_stats_nr_areas_region() call.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id);
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id);
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms,
+ uint64_t *len, uint64_t region_id);
+
+/*
+ * Area properties: start, offset and length.
+ *
+ * The area length is always equal to the area length of the region
+ * that contains it and is obtained from dm_stats_get_region_area_len().
+ *
+ * The start of an area is a function of the area_id and the containing
+ * region's start and area length: it gives the absolute offset into the
+ * containing device of the beginning of the area.
+ *
+ * The offset expresses the area's relative offset into the current
+ * region. I.e. the area start minus the start offset of the containing
+ * region.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+ uint64_t region_id, uint64_t area_id);
+
+/*
+ * Retrieve program_id and aux_data for a specific region. Only valid
+ * following a call to dm_stats_list(). The returned pointer does not
+ * need to be freed separately from the dm_stats handle but will become
+ * invalid after a dm_stats_destroy(), dm_stats_list(),
+ * dm_stats_populate(), or dm_stats_bind*() of the handle from which it
+ * was obtained.
+ */
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+ uint64_t region_id);
+
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Statistics cursor
+ *
+ * A dm_stats handle maintains an optional cursor into the statistics
+ * regions and areas that it stores. Iterators are provided to visit
+ * each region, or each area in a handle and accessor methods are
+ * provided to obtain properties and values for the region or area
+ * at the current cursor position.
+ *
+ * Using the cursor simplifies walking all regions or areas when the
+ * region table is sparse (i.e. contains some present and some
+ * non-present region_id values either due to program_id filtering
+ * or the ordering of region creation and deletion).
+ */
+
+/*
+ * Initialise the cursor of a dm_stats handle to address the first
+ * present region. It is valid to attempt to walk a NULL stats handle
+ * or a handle containing no present regions; in this case any call to
+ * dm_stats_walk_next() becomes a no-op and all calls to
+ * dm_stats_walk_end() return true.
+ */
+void dm_stats_walk_start(struct dm_stats *dms);
+
+/*
+ * Advance the statistics cursor to the next area, or to the next
+ * present region if at the end of the current region.
+ */
+void dm_stats_walk_next(struct dm_stats *dms);
+
+/*
+ * Advance the statistics cursor to the next region.
+ */
+void dm_stats_walk_next_region(struct dm_stats *dms);
+
+/*
+ * Test whether the end of a statistics walk has been reached.
+ */
+int dm_stats_walk_end(struct dm_stats *dms);
+
+/*
+ * Stats iterators
+ *
+ * C 'for' and 'do'/'while' style iterators for dm_stats data.
+ *
+ * It is not safe to call any function that modifies the region table
+ * within the loop body (i.e. dm_stats_list(), dm_stats_populate(),
+ * dm_stats_init(), or dm_stats_destroy()).
+ *
+ * All counter and property (dm_stats_get_*) access methods, as well as
+ * dm_stats_populate_region() can be safely called from loops.
+ *
+ */
+
+/*
+ * Iterate over the regions table visiting each region.
+ *
+ * If the region table is empty or unpopulated the loop body will not be
+ * executed.
+ */
+#define dm_stats_foreach_region(dms) \
+for (dm_stats_walk_start((dms)); \
+ !dm_stats_walk_end((dms)); dm_stats_walk_next_region((dms)))
+
+/*
+ * Iterate over the regions table visiting each area.
+ *
+ * If the region table is empty or unpopulated the loop body will not
+ * be executed.
+ */
+#define dm_stats_foreach_area(dms) \
+for (dm_stats_walk_start((dms)); \
+ !dm_stats_walk_end((dms)); dm_stats_walk_next((dms)))
+
+/*
+ * Start a walk iterating over the regions contained in dm_stats handle
+ * 'dms'.
+ *
+ * The body of the loop should call dm_stats_walk_next() or
+ * dm_stats_walk_next_region() to advance to the next element.
+ *
+ * The loop body is executed at least once even if the stats handle is
+ * empty.
+ */
+#define dm_stats_walk_do(dms) \
+dm_stats_walk_start((dms)); \
+do
+
+/*
+ * Start a 'while' style loop or end a 'do..while' loop iterating over the
+ * regions contained in dm_stats handle 'dms'.
+ */
+#define dm_stats_walk_while(dms) \
+while(!dm_stats_walk_end((dms)))
+
+/*
+ * Cursor relative property methods
+ *
+ * Calls with the prefix dm_stats_get_current_* operate relative to the
+ * current cursor location, returning properties for the current region
+ * or area of the supplied dm_stats handle.
+ *
+ */
+
+/*
+ * Returns the number of areas (counter sets) contained in the current
+ * region of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Retrieve the current values of the stats cursor.
+ */
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms);
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms);
+
+/*
+ * Current region properties: size, length & area_len.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+ uint64_t *start);
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+ uint64_t *len);
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+ uint64_t *area_len);
+
+/*
+ * Current area properties: start and length.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+ uint64_t *start);
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+ uint64_t *offset);
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+ uint64_t *start);
+
+/*
+ * Return a pointer to the program_id string for region at the current
+ * cursor location.
+ */
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms);
+
+/*
+ * Return a pointer to the aux_data string for the region at the current
+ * cursor location.
+ */
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms);
+
+/*
* Call this to actually run the ioctl.
*/
int dm_task_run(struct dm_task *dmt);
/*
+ * The errno from the last device-mapper ioctl performed by dm_task_run.
+ */
+int dm_task_get_errno(struct dm_task *dmt);
+
+/*
* Call this to make or remove the device nodes associated with previously
* issued commands.
*/
@@ -520,12 +1054,12 @@ struct dm_tree_node;
/*
* Initialise an empty dependency tree.
*
- * The tree consists of a root node together with one node for each mapped
+ * The tree consists of a root node together with one node for each mapped
* device which has child nodes for each device referenced in its table.
*
* Every node in the tree has one or more children and one or more parents.
*
- * The root node is the parent/child of every node that doesn't have other
+ * The root node is the parent/child of every node that doesn't have other
* parents/children.
*/
struct dm_tree *dm_tree_create(void);
@@ -633,7 +1167,7 @@ int dm_tree_suspend_children(struct dm_tree_node *dnode,
* Skip the filesystem sync when suspending.
* Does nothing with other functions.
* Use this when no snapshots are involved.
- */
+ */
void dm_tree_skip_lockfs(struct dm_tree_node *dnode);
/*
@@ -704,7 +1238,7 @@ int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
const char *key);
int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
uint64_t size);
-
+
/* Mirror log flags */
#define DM_NOSYNC 0x00000001 /* Known already in sync */
#define DM_FORCESYNC 0x00000002 /* Force resync */
@@ -727,7 +1261,7 @@ int dm_tree_node_add_raid_target(struct dm_tree_node *node,
uint64_t flags);
/*
- * Defines bellow are based on kernel's dm-cache.c defines
+ * Defines below are based on kernel's dm-cache.c defines
* DM_CACHE_MIN_DATA_BLOCK_SIZE (32 * 1024 >> SECTOR_SHIFT)
* DM_CACHE_MAX_DATA_BLOCK_SIZE (1024 * 1024 * 1024 >> SECTOR_SHIFT)
*/
@@ -765,6 +1299,7 @@ struct dm_tree_node_raid_params {
uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
uint32_t max_recovery_rate; /* kB/sec/disk */
uint32_t min_recovery_rate; /* kB/sec/disk */
+ uint32_t data_copies; /* raid10 data copies */
uint32_t stripe_cache; /* sectors */
uint64_t flags; /* [no]sync */
@@ -800,7 +1335,7 @@ int dm_tree_node_add_cache_target(struct dm_tree_node *node,
const char *origin_uuid,
const char *policy_name,
const struct dm_config_node *policy_settings,
- uint32_t chunk_size);
+ uint32_t data_block_size);
/*
* FIXME Add individual cache policy pairs <key> = value, like:
@@ -890,7 +1425,14 @@ int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
unsigned ignore,
unsigned no_passdown);
-
+/*
+ * Set error if no space, instead of queueing for thin pool.
+ */
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+ unsigned error_if_no_space);
+/* Start thin pool with metadata in read-only mode */
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+ unsigned read_only);
/*
* FIXME: Defines bellow are based on kernel's dm-thin.c defines
* MAX_DEV_ID ((1 << 24) - 1)
@@ -951,44 +1493,28 @@ uint32_t dm_tree_get_cookie(struct dm_tree_node *node);
* Memory management
*******************/
-void *dm_malloc_aux(size_t s, const char *file, int line)
+/*
+ * Never use these functions directly - use the macros following instead.
+ */
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
__attribute__((__malloc__)) __attribute__((__warn_unused_result__));
-void *dm_malloc_aux_debug(size_t s, const char *file, int line)
- __attribute__((__warn_unused_result__));
-void *dm_zalloc_aux(size_t s, const char *file, int line)
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
__attribute__((__malloc__)) __attribute__((__warn_unused_result__));
-void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
__attribute__((__warn_unused_result__));
-char *dm_strdup_aux(const char *str, const char *file, int line)
- __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
-void dm_free_aux(void *p);
-void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+void dm_free_wrapper(void *ptr);
+char *dm_strdup_wrapper(const char *s, const char *file, int line)
__attribute__((__warn_unused_result__));
-int dm_dump_memory_debug(void);
-void dm_bounds_check_debug(void);
-
-#ifdef DEBUG_MEM
-
-# define dm_malloc(s) dm_malloc_aux_debug((s), __FILE__, __LINE__)
-# define dm_zalloc(s) dm_zalloc_aux_debug((s), __FILE__, __LINE__)
-# define dm_strdup(s) dm_strdup_aux((s), __FILE__, __LINE__)
-# define dm_free(p) dm_free_aux(p)
-# define dm_realloc(p, s) dm_realloc_aux(p, s, __FILE__, __LINE__)
-# define dm_dump_memory() dm_dump_memory_debug()
-# define dm_bounds_check() dm_bounds_check_debug()
-
-#else
-
-# define dm_malloc(s) dm_malloc_aux((s), __FILE__, __LINE__)
-# define dm_zalloc(s) dm_zalloc_aux((s), __FILE__, __LINE__)
-# define dm_strdup(s) strdup(s)
-# define dm_free(p) free(p)
-# define dm_realloc(p, s) realloc(p, s)
-# define dm_dump_memory() {}
-# define dm_bounds_check() {}
-
-#endif
+int dm_dump_memory_wrapper(void);
+void dm_bounds_check_wrapper(void);
+#define dm_malloc(s) dm_malloc_wrapper((s), __FILE__, __LINE__)
+#define dm_zalloc(s) dm_zalloc_wrapper((s), __FILE__, __LINE__)
+#define dm_strdup(s) dm_strdup_wrapper((s), __FILE__, __LINE__)
+#define dm_free(p) dm_free_wrapper(p)
+#define dm_realloc(p, s) dm_realloc_wrapper((p), (s), __FILE__, __LINE__)
+#define dm_dump_memory() dm_dump_memory_wrapper()
+#define dm_bounds_check() dm_bounds_check_wrapper()
/*
* The pool allocator is useful when you are going to allocate
@@ -1289,7 +1815,7 @@ struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *e
struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem);
/*
- * Given the address v of an instance of 'struct dm_list' called 'head'
+ * Given the address v of an instance of 'struct dm_list' called 'head'
* contained in a structure of type t, return the containing structure.
*/
#define dm_list_struct_base(v, t, head) \
@@ -1321,7 +1847,7 @@ struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *e
for (v = (head)->n; v != head; v = v->n)
/*
- * Set v to each element in a list in turn, starting from the element
+ * Set v to each element in a list in turn, starting from the element
* in front of 'start'.
* You can use this to 'unwind' a list_iterate and back out actions on
* already-processed elements.
@@ -1376,7 +1902,7 @@ struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *e
dm_list_iterate_items_gen_safe(v, t, (head), list)
/*
- * Walk a list backwards, setting 'v' in turn to the containing structure
+ * Walk a list backwards, setting 'v' in turn to the containing structure
* of each item.
* The containing structure should be the same type as 'v'.
* The 'struct dm_list' variable within the containing structure is 'field'.
@@ -1387,7 +1913,7 @@ struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *e
v = dm_list_struct_base(v->field.p, __typeof__(*v), field))
/*
- * Walk a list backwards, setting 'v' in turn to the containing structure
+ * Walk a list backwards, setting 'v' in turn to the containing structure
* of each item.
* The containing structure should be the same type as 'v'.
* The list should be 'struct dm_list list' within the containing structure.
@@ -1436,7 +1962,7 @@ int dm_split_words(char *buffer, unsigned max,
unsigned ignore_comments, /* Not implemented */
char **argv);
-/*
+/*
* Returns -1 if buffer too small
*/
int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
@@ -1537,6 +2063,32 @@ int dm_strncpy(char *dest, const char *src, size_t n);
uint64_t dm_units_to_factor(const char *units, char *unit_type,
int strict, const char **endptr);
+/*
+ * Type of unit specifier used by dm_size_to_string().
+ */
+typedef enum {
+ DM_SIZE_LONG = 0, /* Megabyte */
+ DM_SIZE_SHORT = 1, /* MB or MiB */
+ DM_SIZE_UNIT = 2 /* M or m */
+} dm_size_suffix_t;
+
+/*
+ * Convert a size (in 512-byte sectors) into a printable string using units of unit_type.
+ * An upper-case unit_type indicates output units based on powers of 1000 are
+ * required; a lower-case unit_type indicates powers of 1024.
+ * For correct operation, unit_factor must be one of:
+ * 0 - the correct value will be calculated internally;
+ * or the output from dm_units_to_factor() corresponding to unit_type;
+ * or 'u' or 'U', an arbitrary number of bytes to use as the power base.
+ * Set include_suffix to 1 to include a suffix of suffix_type.
+ * Set use_si_units to 0 for suffixes that don't distinguish between 1000 and 1024.
+ * Set use_si_units to 1 for a suffix that does distinguish.
+ */
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+ char unit_type, int use_si_units,
+ uint64_t unit_factor, int include_suffix,
+ dm_size_suffix_t suffix_type);
+
/**************************
* file/stream manipulation
**************************/
@@ -1643,6 +2195,48 @@ typedef int32_t dm_percent_t;
float dm_percent_to_float(dm_percent_t percent);
dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator);
+/********************
+ * timestamp handling
+ ********************/
+
+/*
+ * Create a dm_timestamp object to use with dm_timestamp_get.
+ */
+struct dm_timestamp *dm_timestamp_alloc(void);
+
+/*
+ * Update dm_timestamp object to represent the current time.
+ */
+int dm_timestamp_get(struct dm_timestamp *ts);
+
+/*
+ * Copy a timestamp from ts_old to ts_new.
+ */
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old);
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ * 0 if ts1 is equal to ts2
+ * 1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Destroy a dm_timestamp object.
+ */
+void dm_timestamp_destroy(struct dm_timestamp *ts);
+
/*********************
* reporting functions
*********************/
@@ -1651,6 +2245,7 @@ struct dm_report_object_type {
uint32_t id; /* Powers of 2 */
const char *desc;
const char *prefix; /* field id string prefix (optional) */
+ /* FIXME: convert to proper usage of const pointers here */
void *(*data_fn)(void *object); /* callback from report_object() */
};
@@ -1659,17 +2254,25 @@ struct dm_report_field;
/*
* dm_report_field_type flags
*/
-#define DM_REPORT_FIELD_MASK 0x00000FFF
-#define DM_REPORT_FIELD_ALIGN_MASK 0x0000000F
-#define DM_REPORT_FIELD_ALIGN_LEFT 0x00000001
-#define DM_REPORT_FIELD_ALIGN_RIGHT 0x00000002
-#define DM_REPORT_FIELD_TYPE_MASK 0x00000FF0
-#define DM_REPORT_FIELD_TYPE_NONE 0x00000000
-#define DM_REPORT_FIELD_TYPE_STRING 0x00000010
-#define DM_REPORT_FIELD_TYPE_NUMBER 0x00000020
-#define DM_REPORT_FIELD_TYPE_SIZE 0x00000040
-#define DM_REPORT_FIELD_TYPE_PERCENT 0x00000080
-#define DM_REPORT_FIELD_TYPE_STRING_LIST 0x00000100
+#define DM_REPORT_FIELD_MASK 0x00000FFF
+#define DM_REPORT_FIELD_ALIGN_MASK 0x0000000F
+#define DM_REPORT_FIELD_ALIGN_LEFT 0x00000001
+#define DM_REPORT_FIELD_ALIGN_RIGHT 0x00000002
+#define DM_REPORT_FIELD_TYPE_MASK 0x00000FF0
+#define DM_REPORT_FIELD_TYPE_NONE 0x00000000
+#define DM_REPORT_FIELD_TYPE_STRING 0x00000010
+#define DM_REPORT_FIELD_TYPE_NUMBER 0x00000020
+#define DM_REPORT_FIELD_TYPE_SIZE 0x00000040
+#define DM_REPORT_FIELD_TYPE_PERCENT 0x00000080
+#define DM_REPORT_FIELD_TYPE_STRING_LIST 0x00000100
+#define DM_REPORT_FIELD_TYPE_TIME 0x00000200
+
+/* For use with reserved values only! */
+#define DM_REPORT_FIELD_RESERVED_VALUE_MASK 0x0000000F
+#define DM_REPORT_FIELD_RESERVED_VALUE_NAMED 0x00000001 /* only named value, less strict form of reservation */
+#define DM_REPORT_FIELD_RESERVED_VALUE_RANGE 0x00000002 /* value is range - low and high value defined */
+#define DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE 0x00000004 /* value is computed in runtime */
+#define DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES 0x00000008 /* value names are recognized in runtime */
#define DM_REPORT_FIELD_TYPE_ID_LEN 32
#define DM_REPORT_FIELD_TYPE_HEADING_LEN 32
@@ -1703,7 +2306,8 @@ struct dm_report_field_reserved_value {
};
/*
- * Reserved value is a 'value' that is used directly if any of the 'names' is hit.
+ * Reserved value is a 'value' that is used directly if any of the 'names' is hit
+ * or in case of fuzzy names, if such fuzzy name matches.
*
* If type is any of DM_REPORT_FIELD_TYPE_*, the reserved value is recognized
* for all fields of that type.
@@ -1716,18 +2320,61 @@ struct dm_report_field_reserved_value {
* selection enabled (see also dm_report_init_with_selection function).
*/
struct dm_report_reserved_value {
- const unsigned type; /* DM_REPORT_FIELD_TYPE_* */
+ const uint32_t type; /* DM_REPORT_FIELD_RESERVED_VALUE_* and DM_REPORT_FIELD_TYPE_* */
const void *value; /* reserved value:
- struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE
uint64_t for DM_REPORT_FIELD_TYPE_NUMBER
uint64_t for DM_REPORT_FIELD_TYPE_SIZE (number of 512-byte sectors)
uint64_t for DM_REPORT_FIELD_TYPE_PERCENT
- const char * for DM_REPORT_FIELD_TYPE_STRING */
- const char **names; /* null-terminated array of names for this reserved value */
+ const char* for DM_REPORT_FIELD_TYPE_STRING
+ struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE
+ dm_report_reserved_handler* if DM_REPORT_FIELD_RESERVED_VALUE_{DYNAMIC_VALUE,FUZZY_NAMES} is used */
+ const char **names; /* null-terminated array of static names for this reserved value */
const char *description; /* description of the reserved value */
};
/*
+ * Available actions for dm_report_reserved_value_handler.
+ */
+typedef enum {
+ DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+ DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+} dm_report_reserved_action_t;
+
+/*
+ * Generic reserved value handler to process reserved value names and/or values.
+ *
+ * Actions and their input/output:
+ *
+ * DM_REPORT_RESERVED_PARSE_FUZZY_NAME
+ * data_in: const char *fuzzy_name
+ * data_out: const char *canonical_name, NULL if fuzzy_name not recognized
+ *
+ * DM_REPORT_RESERVED_GET_DYNAMIC_VALUE
+ * data_in: const char *canonical_name
+ * data_out: void *value, NULL if canonical_name not recognized
+ *
+ * All actions return:
+ *
+ * -1 if action not implemented
+ * 0 on error
+ * 1 on success
+ */
+typedef int (*dm_report_reserved_handler) (struct dm_report *rh,
+ struct dm_pool *mem,
+ uint32_t field_num,
+ dm_report_reserved_action_t action,
+ const void *data_in,
+ const void **data_out);
+
+/*
+ * The dm_report_value_cache_{set,get} are helper functions to store and retrieve
+ * various values used during reporting (dm_report_field_type.report_fn) and/or
+ * selection processing (dm_report_reserved_handler instances) to avoid
+ * recalculation of these values or to share values among calls.
+ */
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data);
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name);
+/*
* dm_report_init output_flags
*/
#define DM_REPORT_OUTPUT_MASK 0x000000FF
@@ -1756,7 +2403,17 @@ struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
const char *selection,
const struct dm_report_reserved_value reserved_values[],
void *private_data);
+/*
+ * Report an object, pass it through the selection criteria if they
+ * are present and display the result on output if it passes the criteria.
+ */
int dm_report_object(struct dm_report *rh, void *object);
+/*
+ * The same as dm_report_object, but display the result on output only if
+ * 'do_output' arg is set. Also, save the result of selection in 'selected'
+ * arg if it's not NULL (either 1 if the object passes, otherwise 0).
+ */
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected);
/*
* Compact report output so that if field value is empty for all rows in
@@ -1766,7 +2423,20 @@ int dm_report_object(struct dm_report *rh, void *object);
*/
int dm_report_compact_fields(struct dm_report *rh);
+/*
+ * Returns 1 if there is no data waiting to be output.
+ */
+int dm_report_is_empty(struct dm_report *rh);
+
int dm_report_output(struct dm_report *rh);
+
+/*
+ * Output the report headings for a columns-based report, even if they
+ * have already been shown. Useful for repeating reports that wish to
+ * issue a periodic reminder of the column headings.
+ */
+int dm_report_column_headings(struct dm_report *rh);
+
void dm_report_free(struct dm_report *rh);
/*
@@ -1804,6 +2474,153 @@ int dm_report_field_percent(struct dm_report *rh, struct dm_report_field *field,
void dm_report_field_set_value(struct dm_report_field *field, const void *value,
const void *sortvalue);
+/*
+ * Stats counter access methods
+ *
+ * Each method returns the corresponding stats counter value from the
+ * supplied dm_stats handle for the specified region_id and area_id.
+ * If either region_id or area_id uses one of the special values
+ * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then the region
+ * or area is selected according to the current state of the dm_stats
+ * handle's embedded cursor.
+ *
+ * See the kernel documentation for complete descriptions of each
+ * counter field:
+ *
+ * Documentation/device-mapper/statistics.txt
+ * Documentation/iostats.txt
+ *
+ * reads: the number of reads completed
+ * reads_merged: the number of reads merged
+ * read_sectors: the number of sectors read
+ * read_nsecs: the number of nanoseconds spent reading
+ * writes: the number of writes completed
+ * writes_merged: the number of writes merged
+ * write_sectors: the number of sectors written
+ * write_nsecs: the number of nanoseconds spent writing
+ * io_in_progress: the number of I/Os currently in progress
+ * io_nsecs: the number of nanoseconds spent doing I/Os
+ * weighted_io_nsecs: the weighted number of nanoseconds spent doing I/Os
+ * total_read_nsecs: the total time spent reading in nanoseconds
+ * total_write_nsecs: the total time spent writing in nanoseconds
+ */
+
+#define DM_STATS_REGION_CURRENT UINT64_MAX
+#define DM_STATS_AREA_CURRENT UINT64_MAX
+
+uint64_t dm_stats_get_reads(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_reads_merged(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_sectors(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes_merged(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_sectors(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_in_progress(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_weighted_io_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_read_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_write_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+/*
+ * Derived statistics access methods
+ *
+ * Each method returns the corresponding value calculated from the
+ * counters stored in the supplied dm_stats handle for the specified
+ * region_id and area_id. If either region_id or area_id uses one of the
+ * special values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then
+ * the region or area is selected according to the current state of the
+ * dm_stats handle's embedded cursor.
+ *
+ * The set of metrics is based on the fields provided by the Linux
+ * iostats program.
+ *
+ * rd_merges_per_sec: the number of reads merged per second
+ * wr_merges_per_sec: the number of writes merged per second
+ * reads_per_sec: the number of reads completed per second
+ * writes_per_sec: the number of writes completed per second
+ * read_sectors_per_sec: the number of sectors read per second
+ * write_sectors_per_sec: the number of sectors written per second
+ * average_request_size: the average size of requests submitted
+ * service_time: the average service time (in ns) for requests issued
+ * average_queue_size: the average queue length
+ * average_wait_time: the average time for requests to be served (in ns)
+ * average_rd_wait_time: the average read wait time
+ * average_wr_wait_time: the average write wait time
+ */
+
+int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms,
+ double *rsec_s, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms,
+ double *wr_s, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_average_request_size(const struct dm_stats *dms,
+ double *arqsz, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_throughput(const struct dm_stats *dms, double *tput,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+ uint64_t region_id, uint64_t area_id);
+
/*************************
* config file parse/print
*************************/
@@ -1825,6 +2642,7 @@ struct dm_config_value {
} v;
struct dm_config_value *next; /* For arrays */
+ uint32_t format_flags;
};
struct dm_config_node {
@@ -1927,6 +2745,24 @@ struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const c
struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft);
struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *cn, int siblings);
+/*
+ * Common formatting flags applicable to all config node types (lower 16 bits).
+ */
+#define DM_CONFIG_VALUE_FMT_COMMON_ARRAY 0x00000001 /* value is array */
+#define DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES 0x00000002 /* add spaces in "key = value" pairs in constrast to "key=value" for better readability */
+
+/*
+ * Type-related config node formatting flags (higher 16 bits).
+ */
+/* int-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_INT_OCTAL 0x00010000 /* print number in octal form */
+
+/* string-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES 0x00010000 /* do not print quotes around string value */
+
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags);
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv);
+
struct dm_pool *dm_config_memory(struct dm_config_tree *cft);
/* Udev device directory. */
diff --git a/libdm/libdevmapper.pc.in b/libdm/libdevmapper.pc.in
index eb7071db2..63c07201d 100644
--- a/libdm/libdevmapper.pc.in
+++ b/libdm/libdevmapper.pc.in
@@ -8,4 +8,4 @@ Description: device-mapper library
Version: @DM_LIB_PATCHLEVEL@
Cflags: -I${includedir}
Libs: -L${libdir} -ldevmapper
-Requires.private: @SELINUX_PC@ @UDEV_PC@
+Requires.private: @SELINUX_PC@ @UDEV_PC@ @RT_PC@
diff --git a/libdm/libdm-common.c b/libdm/libdm-common.c
index bd51645eb..e1bd44e96 100644
--- a/libdm/libdm-common.c
+++ b/libdm/libdm-common.c
@@ -110,7 +110,7 @@ void dm_lib_init(void)
__attribute__((format(printf, 5, 0)))
static void _default_log_line(int level,
const char *file __attribute__((unused)),
- int line __attribute__((unused)), int dm_errno_or_class,
+ int line __attribute__((unused)), int dm_errno_or_class,
const char *f, va_list ap)
{
static int _abort_on_internal_errors = -1;
@@ -138,7 +138,7 @@ static void _default_log_line(int level,
__attribute__((format(printf, 5, 6)))
static void _default_log_with_errno(int level,
const char *file __attribute__((unused)),
- int line __attribute__((unused)), int dm_errno_or_class,
+ int line __attribute__((unused)), int dm_errno_or_class,
const char *f, ...)
{
va_list ap;
@@ -277,6 +277,7 @@ struct dm_task *dm_task_create(int type)
dmt->query_inactive_table = 0;
dmt->new_uuid = 0;
dmt->secure_data = 0;
+ dmt->record_timestamp = 0;
return dmt;
}
@@ -546,35 +547,57 @@ static int _dm_task_set_name_from_path(struct dm_task *dmt, const char *path,
{
char buf[PATH_MAX];
struct stat st1, st2;
- const char *final_name;
+ const char *final_name = NULL;
+ size_t len;
if (dmt->type == DM_DEVICE_CREATE) {
log_error("Name \"%s\" invalid. It contains \"/\".", path);
return 0;
}
- if (stat(path, &st1)) {
- log_error("Device %s not found", path);
- return 0;
+ if (!stat(path, &st1)) {
+ /*
+ * Found directly.
+ * If supplied path points to same device as last component
+ * under /dev/mapper, use that name directly.
+ */
+ if (dm_snprintf(buf, sizeof(buf), "%s/%s", _dm_dir, name) == -1) {
+ log_error("Couldn't create path for %s", name);
+ return 0;
+ }
+
+ if (!stat(buf, &st2) && (st1.st_rdev == st2.st_rdev))
+ final_name = name;
+ } else {
+ /* Not found. */
+ /* If there is exactly one '/' try a prefix of /dev */
+ if ((len = strlen(path)) < 3 || path[0] == '/' ||
+ dm_count_chars(path, len, '/') != 1) {
+ log_error("Device %s not found", path);
+ return 0;
+ }
+ if (dm_snprintf(buf, sizeof(buf), "%s/../%s", _dm_dir, path) == -1) {
+ log_error("Couldn't create /dev path for %s", path);
+ return 0;
+ }
+ if (stat(buf, &st1)) {
+ log_error("Device %s not found", path);
+ return 0;
+ }
+ /* Found */
}
/*
- * If supplied path points to same device as last component
- * under /dev/mapper, use that name directly. Otherwise call
- * _find_dm_name_of_device() to scan _dm_dir for a match.
+ * If we don't have the dm name yet, Call _find_dm_name_of_device() to
+ * scan _dm_dir for a match.
*/
- if (dm_snprintf(buf, sizeof(buf), "%s/%s", _dm_dir, name) == -1) {
- log_error("Couldn't create path for %s", name);
- return 0;
- }
-
- if (!stat(buf, &st2) && (st1.st_rdev == st2.st_rdev))
- final_name = name;
- else if (_find_dm_name_of_device(st1.st_rdev, buf, sizeof(buf)))
- final_name = buf;
- else {
- log_error("Device %s not found", name);
- return 0;
+ if (!final_name) {
+ if (_find_dm_name_of_device(st1.st_rdev, buf, sizeof(buf)))
+ final_name = buf;
+ else {
+ log_error("Device %s not found", name);
+ return 0;
+ }
}
/* This is an already existing path - do not mangle! */
@@ -959,7 +982,7 @@ static int _add_dev_node(const char *dev_name, uint32_t major, uint32_t minor,
{
char path[PATH_MAX];
struct stat info;
- dev_t dev = MKDEV((dev_t)major, minor);
+ dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
mode_t old_mask;
if (!_build_dev_path(path, sizeof(path), dev_name))
@@ -1219,7 +1242,7 @@ static int _set_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
}
/* Sysfs is kB based, round up to kB */
- if ((len = dm_snprintf(buf, sizeof(buf), "%" PRIu32,
+ if ((len = dm_snprintf(buf, sizeof(buf), FMTu32,
(read_ahead + 1) / 2)) < 0) {
log_error("Failed to build size in kB.");
return 0;
diff --git a/libdm/libdm-config.c b/libdm/libdm-config.c
index 1ec604f34..b2082c865 100644
--- a/libdm/libdm-config.c
+++ b/libdm/libdm-config.c
@@ -273,11 +273,13 @@ static int _line_end(const struct dm_config_node *cn, struct config_output *out)
static int _write_value(struct config_output *out, const struct dm_config_value *v)
{
char *buf;
+ const char *s;
switch (v->type) {
case DM_CFG_STRING:
buf = alloca(dm_escaped_len(v->v.str));
- line_append("\"%s\"", dm_escape_double_quotes(buf, v->v.str));
+ s = (v->format_flags & DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES) ? "" : "\"";
+ line_append("%s%s%s", s, dm_escape_double_quotes(buf, v->v.str), s);
break;
case DM_CFG_FLOAT:
@@ -285,11 +287,15 @@ static int _write_value(struct config_output *out, const struct dm_config_value
break;
case DM_CFG_INT:
- line_append("%" PRId64, v->v.i);
+ if (v->format_flags & DM_CONFIG_VALUE_FMT_INT_OCTAL)
+ line_append("0%" PRIo64, v->v.i);
+ else
+ line_append(FMTd64, v->v.i);
break;
case DM_CFG_EMPTY_ARRAY:
- line_append("[]");
+ s = (v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES) ? " " : "";
+ line_append("[%s]", s);
break;
default:
@@ -303,6 +309,8 @@ static int _write_value(struct config_output *out, const struct dm_config_value
static int _write_config(const struct dm_config_node *n, int only_one,
struct config_output *out, int level)
{
+ const char *extra_space;
+ int format_array;
char space[MAX_INDENT + 1];
int l = (level < MAX_INDENT) ? level : MAX_INDENT;
int i;
@@ -316,6 +324,9 @@ static int _write_config(const struct dm_config_node *n, int only_one,
space[i] = '\0';
do {
+ extra_space = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES)) ? " " : "";
+ format_array = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_ARRAY));
+
if (out->spec && out->spec->prefix_fn)
out->spec->prefix_fn(n, space, out->baton);
@@ -341,20 +352,25 @@ static int _write_config(const struct dm_config_node *n, int only_one,
} else {
/* it's a value */
const struct dm_config_value *v = n->v;
- line_append("=");
+ line_append("%s=%s", extra_space, extra_space);
if (v->next) {
- line_append("[");
+ line_append("[%s", extra_space);
while (v && v->type != DM_CFG_EMPTY_ARRAY) {
if (!_write_value(out, v))
return_0;
v = v->next;
if (v && v->type != DM_CFG_EMPTY_ARRAY)
- line_append(", ");
+ line_append(",%s", extra_space);
}
- line_append("]");
- } else
+ line_append("%s]", extra_space);
+ } else {
+ if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+ line_append("[%s", extra_space);
if (!_write_value(out, v))
return_0;
+ if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+ line_append("%s]", extra_space);
+ }
}
if (!_line_end(n, out))
return_0;
@@ -522,7 +538,7 @@ static struct dm_config_node *_section(struct parser *p, struct dm_config_node *
{
/* IDENTIFIER SECTION_B_CHAR VALUE* SECTION_E_CHAR */
- struct dm_config_node *root, *n;
+ struct dm_config_node *root;
struct dm_config_value *value;
char *str;
@@ -555,7 +571,7 @@ static struct dm_config_node *_section(struct parser *p, struct dm_config_node *
if (p->t == TOK_SECTION_B) {
match(TOK_SECTION_B);
while (p->t != TOK_SECTION_E) {
- if (!(n = _section(p, root)))
+ if (!(_section(p, root)))
return_NULL;
}
match(TOK_SECTION_E);
@@ -1292,6 +1308,8 @@ struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const
return NULL;
}
+ new_cn->id = cn->id;
+
if ((cn->v && !(new_cn->v = _clone_config_value(mem, cn->v))) ||
(cn->child && !(new_cn->child = dm_config_clone_node_with_mem(mem, cn->child, 1))) ||
(siblings && cn->sib && !(new_cn->sib = dm_config_clone_node_with_mem(mem, cn->sib, siblings))))
@@ -1328,6 +1346,22 @@ struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft)
return _create_value(cft->mem);
}
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags)
+{
+ if (!cv)
+ return;
+
+ cv->format_flags = format_flags;
+}
+
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv)
+{
+ if (!cv)
+ return 0;
+
+ return cv->format_flags;
+}
+
struct dm_pool *dm_config_memory(struct dm_config_tree *cft)
{
return cft->mem;
diff --git a/libdm/libdm-deptree.c b/libdm/libdm-deptree.c
index b423323f7..f0e4ae122 100644
--- a/libdm/libdm-deptree.c
+++ b/libdm/libdm-deptree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2005-2016 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -12,15 +12,6 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
#include "dmlib.h"
#include "libdm-targets.h"
#include "libdm-common.h"
@@ -54,7 +45,10 @@ enum {
SEG_RAID0,
SEG_RAID0_META,
SEG_RAID1,
- SEG_RAID10,
+ SEG_RAID01,
+ SEG_RAID10_NEAR,
+ SEG_RAID10_OFFSET,
+ SEG_RAID10_FAR,
SEG_RAID4,
SEG_RAID5_N,
SEG_RAID5_LA,
@@ -94,7 +88,10 @@ static const struct {
{ SEG_RAID0, "raid0"},
{ SEG_RAID0_META, "raid0_meta"},
{ SEG_RAID1, "raid1"},
- { SEG_RAID10, "raid10"},
+ { SEG_RAID01, "raid01"},
+ { SEG_RAID10_NEAR, "raid10_near"},
+ { SEG_RAID10_OFFSET, "raid10_offset"},
+ { SEG_RAID10_FAR, "raid10_far"},
{ SEG_RAID4, "raid4"},
{ SEG_RAID5_N, "raid5_n"},
{ SEG_RAID5_LA, "raid5_la"},
@@ -116,6 +113,7 @@ static const struct {
*/
{ SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */
{ SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */
+ { SEG_RAID10_NEAR, "raid10" } /* same as "raid10_near" */
};
/* Some segment types have a list of areas of other devices attached */
@@ -184,7 +182,7 @@ struct load_segment {
uint32_t stripe_size; /* Striped + raid */
int persistent; /* Snapshot */
- uint32_t chunk_size; /* Snapshot + cache */
+ uint32_t chunk_size; /* Snapshot */
struct dm_tree_node *cow; /* Snapshot */
struct dm_tree_node *origin; /* Snapshot + Snapshot origin + Cache */
struct dm_tree_node *merge; /* Snapshot */
@@ -220,6 +218,7 @@ struct load_segment {
uint32_t writebehind; /* raid */
uint32_t max_recovery_rate; /* raid kB/sec/disk */
uint32_t min_recovery_rate; /* raid kB/sec/disk */
+ uint32_t data_copies; /* raid10 data_copies */
struct dm_tree_node *metadata; /* Thin_pool + Cache */
struct dm_tree_node *pool; /* Thin_pool, Thin */
@@ -227,10 +226,12 @@ struct load_segment {
struct dm_list thin_messages; /* Thin_pool */
uint64_t transaction_id; /* Thin_pool */
uint64_t low_water_mark; /* Thin_pool */
- uint32_t data_block_size; /* Thin_pool */
+ uint32_t data_block_size; /* Thin_pool + cache */
unsigned skip_block_zeroing; /* Thin_pool */
unsigned ignore_discard; /* Thin_pool target vsn 1.1 */
unsigned no_discard_passdown; /* Thin_pool target vsn 1.1 */
+ unsigned error_if_no_space; /* Thin pool target vsn 1.10 */
+ unsigned read_only; /* Thin pool target vsn 1.3 */
uint32_t device_id; /* Thin */
};
@@ -266,8 +267,15 @@ struct load_properties {
*/
unsigned delay_resume_if_new;
- /* Send messages for this node in preload */
+ /*
+ * Call node_send_messages(), set to 2 if there are messages
+ * When != 0, it validates matching transaction id, thus thin-pools
+ * where transation_id is passed as 0 are never validated, this
+ * allows external managment of thin-pool TID.
+ */
unsigned send_messages;
+ /* Skip suspending node's children, used when sending messages to thin-pool */
+ int skip_suspend;
};
/* Two of these used to join two nodes with uses and used_by. */
@@ -326,6 +334,7 @@ struct dm_tree {
int no_flush; /* 1 sets noflush (mirrors/multipath) */
int retry_remove; /* 1 retries remove if not successful */
uint32_t cookie;
+ char buf[DM_NAME_LEN + 32]; /* print buffer for device_name (major:minor) */
const char **optional_uuid_suffixes; /* uuid suffixes ignored when matching */
};
@@ -537,7 +546,7 @@ static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
dm_list_init(&node->activated);
dm_list_init(&node->props.segs);
- dev = MKDEV((dev_t)info->major, info->minor);
+ dev = MKDEV((dev_t)info->major, (dev_t)info->minor);
if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev,
sizeof(dev), node)) {
@@ -561,7 +570,7 @@ static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree,
uint32_t major, uint32_t minor)
{
- dev_t dev = MKDEV((dev_t)major, minor);
+ dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
return dm_hash_lookup_binary(dtree->devs, (const char *) &dev,
sizeof(dev));
@@ -620,6 +629,19 @@ static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree,
return NULL;
}
+/* Return node's device_name (major:minor) for debug messages */
+static const char *_node_name(struct dm_tree_node *dnode)
+{
+ if (dm_snprintf(dnode->dtree->buf, sizeof(dnode->dtree->buf),
+ "%s (%" PRIu32 ":%" PRIu32 ")",
+ dnode->name, dnode->info.major, dnode->info.minor) < 0) {
+ stack;
+ return dnode->name;
+ }
+
+ return dnode->dtree->buf;
+}
+
void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags)
{
@@ -1462,12 +1484,14 @@ static int _thin_pool_status_transaction_id(struct dm_tree_node *dnode, uint64_t
goto out;
}
- if (!params || (sscanf(params, "%" PRIu64, transaction_id) != 1)) {
+ if (!params || (sscanf(params, FMTu64, transaction_id) != 1)) {
log_error("Failed to parse transaction_id from %s.", params);
goto out;
}
- log_debug_activation("Thin pool transaction id: %" PRIu64 " status: %s.", *transaction_id, params);
+ log_debug_activation("Found transaction id %" PRIu64 " for thin pool %s "
+ "with status line: %s.",
+ *transaction_id, _node_name(dnode), params);
r = 1;
out:
@@ -1532,11 +1556,13 @@ static int _thin_pool_node_message(struct dm_tree_node *dnode, struct thin_messa
if (!dm_task_set_message(dmt, buf))
goto_out;
- /* Internal functionality of dm_task */
+ /* Internal functionality of dm_task */
dmt->expected_errno = tm->expected_errno;
- if (!dm_task_run(dmt))
- goto_out;
+ if (!dm_task_run(dmt)) {
+ log_error("Failed to process thin pool message \"%s\".", buf);
+ goto out;
+ }
r = 1;
out:
@@ -1579,15 +1605,16 @@ static int _node_send_messages(struct dm_tree_node *dnode,
if (trans_id == seg->transaction_id) {
dnode->props.send_messages = 0; /* messages already committed */
if (have_messages)
- log_debug_activation("Thin pool transaction_id matches %" PRIu64
- ", skipping messages.", trans_id);
+ log_debug_activation("Thin pool %s transaction_id matches %"
+ PRIu64 ", skipping messages.",
+ _node_name(dnode), trans_id);
return 1;
}
/* Error if there are no stacked messages or id mismatches */
- if (trans_id != (seg->transaction_id - have_messages)) {
- log_error("Thin pool transaction_id is %" PRIu64 ", while expected %" PRIu64 ".",
- trans_id, seg->transaction_id - have_messages);
+ if ((trans_id + 1) != seg->transaction_id) {
+ log_error("Thin pool %s transaction_id is %" PRIu64 ", while expected %" PRIu64 ".",
+ _node_name(dnode), trans_id, seg->transaction_id - have_messages);
return 0;
}
@@ -1601,9 +1628,9 @@ static int _node_send_messages(struct dm_tree_node *dnode,
if (!_thin_pool_status_transaction_id(dnode, &trans_id))
return_0;
if (trans_id != tmsg->message.u.m_set_transaction_id.new_id) {
- log_error("Thin pool transaction_id is %" PRIu64
+ log_error("Thin pool %s transaction_id is %" PRIu64
" and does not match expected %" PRIu64 ".",
- trans_id,
+ _node_name(dnode), trans_id,
tmsg->message.u.m_set_transaction_id.new_id);
return 0;
}
@@ -1774,6 +1801,19 @@ int dm_tree_suspend_children(struct dm_tree_node *dnode,
!info.exists || info.suspended)
continue;
+ /* If child has some real messages send them */
+ if ((child->props.send_messages > 1) && r) {
+ if (!(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1)))
+ stack;
+ else {
+ log_debug_activation("Sent messages to thin-pool %s."
+ "skipping suspend of its children.",
+ _node_name(child));
+ child->props.skip_suspend++;
+ }
+ continue;
+ }
+
if (!_suspend_node(name, info.major, info.minor,
child->dtree->skip_lockfs,
child->dtree->no_flush, &newinfo)) {
@@ -1792,6 +1832,9 @@ int dm_tree_suspend_children(struct dm_tree_node *dnode,
handle = NULL;
while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (child->props.skip_suspend)
+ continue;
+
if (!(uuid = dm_tree_node_get_uuid(child))) {
stack;
continue;
@@ -2118,7 +2161,10 @@ static int _emit_areas_line(struct dm_task *dmt __attribute__((unused)),
case SEG_RAID0:
case SEG_RAID0_META:
case SEG_RAID1:
- case SEG_RAID10:
+ case SEG_RAID01:
+ case SEG_RAID10_NEAR:
+ case SEG_RAID10_FAR:
+ case SEG_RAID10_OFFSET:
case SEG_RAID4:
case SEG_RAID5_N:
case SEG_RAID5_LA:
@@ -2209,6 +2255,7 @@ static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *s
const char *logtype;
unsigned kmaj = 0, kmin = 0, krel = 0;
+PFL();
if (uname(&uts) == -1) {
log_error("Cannot read kernel release version.");
return 0;
@@ -2317,7 +2364,7 @@ static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *s
if (handle_errors)
EMIT_PARAMS(pos, " 1 handle_errors");
-
+PFLA("params=%s", params);
return 1;
}
@@ -2355,7 +2402,6 @@ static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major,
if (seg->area_count % 2)
return 0;
-PFLA("seg->area_count=%u", seg->area_count);
if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC))
param_count++;
@@ -2364,41 +2410,51 @@ PFLA("seg->area_count=%u", seg->area_count);
_2_if_value(seg->region_size) +
_2_if_value(seg->writebehind) +
_2_if_value(seg->min_recovery_rate) +
- _2_if_value(seg->max_recovery_rate);
+ _2_if_value(seg->max_recovery_rate) +
+ _2_if_value(seg->data_copies > 1);
/* rebuilds and writemostly are 4 * 64 bits */
param_count += _get_params_count(seg->rebuilds);
param_count += _get_params_count(seg->writemostly);
if ((seg->type == SEG_RAID1) && seg->stripe_size)
- log_error("WARNING: Ignoring RAID1 stripe size");
+ log_info("WARNING: Ignoring RAID1 stripe size");
/* Kernel only expects "raid0", not "raid0_meta" */
type = seg->type;
- if (seg->type == SEG_RAID0_META)
+ if (type == SEG_RAID0_META)
type = SEG_RAID0;
-
- EMIT_PARAMS(pos, "%s %d %u", _dm_segtypes[type].target,
+ /* Kernel only expects "raid10", not "raid10_{far,offset}" */
+ else if (type == SEG_RAID10_FAR ||
+ type == SEG_RAID10_OFFSET) {
+ param_count += 2;
+ type = SEG_RAID10_NEAR;
+ } else if (type == SEG_RAID01)
+ type = SEG_RAID1;
+
+ EMIT_PARAMS(pos, "%s %d %u",
+ type == SEG_RAID10_NEAR ? "raid10" : _dm_segtypes[type].target,
param_count, seg->stripe_size);
+ if (seg->type == SEG_RAID10_FAR)
+ EMIT_PARAMS(pos, " raid10_format far");
+ else if (seg->type == SEG_RAID10_OFFSET)
+ EMIT_PARAMS(pos, " raid10_format offset");
+
+ if (seg->data_copies > 1 && type == SEG_RAID10_NEAR)
+ EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
+
if (seg->flags & DM_NOSYNC)
EMIT_PARAMS(pos, " nosync");
else if (seg->flags & DM_FORCESYNC)
EMIT_PARAMS(pos, " sync");
-#if 0
- if (seg->raid10_format)
- EMIT_PARAMS(pos, " %s", seg->raid10_format);
-
- if (seg->raid10_copies)
- EMIT_PARAMS(pos, " raid10_copies %u", seg->raid_copies);
-#endif
-
if (seg->region_size)
EMIT_PARAMS(pos, " region_size %u", seg->region_size);
+ /* If seg-data_offset = 1, kernel needs a zero offset to adjust to it */
if (seg->data_offset)
- EMIT_PARAMS(pos, " data_offset %d", seg->data_offset);
+ EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
if (seg->delta_disks)
EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
@@ -2412,16 +2468,20 @@ PFLA("seg->area_count=%u", seg->area_count);
EMIT_PARAMS(pos, " write_mostly %u", i);
if (seg->writebehind)
- EMIT_PARAMS(pos, " writebehind %u", seg->writebehind);
-
- if (seg->min_recovery_rate)
- EMIT_PARAMS(pos, " min_recovery_rate %u",
- seg->min_recovery_rate);
+ EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
+ /*
+ * Has to be before "min_recovery_rate" or the kernels
+ * check will fail when both set and min > previous max
+ */
if (seg->max_recovery_rate)
EMIT_PARAMS(pos, " max_recovery_rate %u",
seg->max_recovery_rate);
+ if (seg->min_recovery_rate)
+ EMIT_PARAMS(pos, " min_recovery_rate %u",
+ seg->min_recovery_rate);
+
/* Print number of metadata/data device pairs */
EMIT_PARAMS(pos, " %u", area_count);
@@ -2457,8 +2517,8 @@ static int _cache_emit_segment_line(struct dm_task *dmt,
EMIT_PARAMS(pos, " %s %s %s", metadata, data, origin);
- /* Chunk size */
- EMIT_PARAMS(pos, " %u", seg->chunk_size);
+ /* Data block size */
+ EMIT_PARAMS(pos, " %u", seg->data_block_size);
/* Features */
/* feature_count = hweight32(seg->flags); */
@@ -2489,9 +2549,11 @@ static int _thin_pool_emit_segment_line(struct dm_task *dmt,
{
int pos = 0;
char pool[DM_FORMAT_DEV_BUFSIZE], metadata[DM_FORMAT_DEV_BUFSIZE];
- int features = (seg->skip_block_zeroing ? 1 : 0) +
- (seg->ignore_discard ? 1 : 0) +
- (seg->no_discard_passdown ? 1 : 0);
+ int features = (seg->error_if_no_space ? 1 : 0) +
+ (seg->read_only ? 1 : 0) +
+ (seg->ignore_discard ? 1 : 0) +
+ (seg->no_discard_passdown ? 1 : 0) +
+ (seg->skip_block_zeroing ? 1 : 0);
if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
return_0;
@@ -2499,8 +2561,10 @@ static int _thin_pool_emit_segment_line(struct dm_task *dmt,
if (!_build_dev_string(pool, sizeof(pool), seg->pool))
return_0;
- EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s", metadata, pool,
+ EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s%s%s", metadata, pool,
seg->data_block_size, seg->low_water_mark, features,
+ seg->error_if_no_space ? " error_if_no_space" : "",
+ seg->read_only ? " read_only" : "",
seg->skip_block_zeroing ? " skip_block_zeroing" : "",
seg->ignore_discard ? " ignore_discard" : "",
seg->no_discard_passdown ? " no_discard_passdown" : ""
@@ -2543,7 +2607,7 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
int r;
int target_type_is_raid = 0;
char originbuf[DM_FORMAT_DEV_BUFSIZE], cowbuf[DM_FORMAT_DEV_BUFSIZE];
-
+PFL()
switch(seg->type) {
case SEG_ERROR:
case SEG_ZERO:
@@ -2597,7 +2661,10 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
case SEG_RAID0:
case SEG_RAID0_META:
case SEG_RAID1:
- case SEG_RAID10:
+ case SEG_RAID01:
+ case SEG_RAID10_NEAR:
+ case SEG_RAID10_FAR:
+ case SEG_RAID10_OFFSET:
case SEG_RAID4:
case SEG_RAID5_N:
case SEG_RAID5_LA:
@@ -2819,6 +2886,7 @@ int dm_tree_preload_children(struct dm_tree_node *dnode,
struct dm_info newinfo;
int update_devs_flag = 0;
+PFL();
/* Preload children first */
while ((child = dm_tree_next_child(&handle, dnode, 0))) {
/* Skip existing non-device-mapper devices */
@@ -3108,7 +3176,7 @@ int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
return 0;
}
- r = sscanf(params, "%" PRIu64 "/%" PRIu64 " %" PRIu64,
+ r = sscanf(params, FMTu64 "/" FMTu64 " " FMTu64,
&s->used_sectors, &s->total_sectors,
&s->metadata_sectors);
@@ -3281,6 +3349,7 @@ PFL();
seg->writebehind = p->writebehind;
seg->min_recovery_rate = p->min_recovery_rate;
seg->max_recovery_rate = p->max_recovery_rate;
+ seg->data_copies = p->data_copies;
seg->flags = p->flags;
return 1;
@@ -3436,11 +3505,37 @@ int dm_tree_node_add_cache_target(struct dm_tree_node *node,
const char *origin_uuid,
const char *policy_name,
const struct dm_config_node *policy_settings,
- uint32_t chunk_size)
+ uint32_t data_block_size)
{
struct dm_config_node *cn;
struct load_segment *seg;
+ switch (feature_flags &
+ (DM_CACHE_FEATURE_PASSTHROUGH |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_WRITEBACK)) {
+ case DM_CACHE_FEATURE_PASSTHROUGH:
+ case DM_CACHE_FEATURE_WRITETHROUGH:
+ case DM_CACHE_FEATURE_WRITEBACK:
+ break;
+ default:
+ log_error("Invalid cache's feature flag " FMTu64 ".",
+ feature_flags);
+ return 0;
+ }
+
+ if (data_block_size < DM_CACHE_MIN_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is lower then %u sectors.",
+ data_block_size, DM_CACHE_MIN_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
+ if (data_block_size > DM_CACHE_MAX_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is higher then %u sectors.",
+ data_block_size, DM_CACHE_MAX_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
if (!(seg = _add_segment(node, SEG_CACHE, size)))
return_0;
@@ -3462,7 +3557,6 @@ int dm_tree_node_add_cache_target(struct dm_tree_node *node,
if (!_link_tree_nodes(node, seg->metadata))
return_0;
-
if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree,
origin_uuid))) {
log_error("Missing cache's origin uuid %s.",
@@ -3472,7 +3566,7 @@ int dm_tree_node_add_cache_target(struct dm_tree_node *node,
if (!_link_tree_nodes(node, seg->origin))
return_0;
- seg->chunk_size = chunk_size;
+ seg->data_block_size = data_block_size;
seg->flags = feature_flags;
seg->policy_name = policy_name;
@@ -3491,7 +3585,6 @@ int dm_tree_node_add_cache_target(struct dm_tree_node *node,
}
}
-
return 1;
}
@@ -3947,7 +4040,8 @@ int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
tm->message.type = type;
dm_list_add(&seg->thin_messages, &tm->list);
- node->props.send_messages = 1;
+ /* Higher value >1 identifies there are really some messages */
+ node->props.send_messages = 2;
return 1;
}
@@ -3967,6 +4061,32 @@ int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
return 1;
}
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+ unsigned error_if_no_space)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ seg->error_if_no_space = error_if_no_space;
+
+ return 1;
+}
+
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+ unsigned read_only)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ seg->read_only = read_only;
+
+ return 1;
+}
+
int dm_tree_node_add_thin_target(struct dm_tree_node *node,
uint64_t size,
const char *pool_uuid,
@@ -4036,7 +4156,7 @@ int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
}
/* FIXME: add support for held metadata root */
- if (sscanf(params, "%" PRIu64 " %" PRIu64 "/%" PRIu64 " %" PRIu64 "/%" PRIu64 "%n",
+ if (sscanf(params, FMTu64 " " FMTu64 "/" FMTu64 " " FMTu64 "/" FMTu64 "%n",
&s->transaction_id,
&s->used_metadata_blocks,
&s->total_metadata_blocks,
@@ -4055,7 +4175,15 @@ int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
else /* default discard_passdown */
s->discards = DM_THIN_DISCARDS_PASSDOWN;
- s->read_only = (strstr(params + pos, "ro ")) ? 1 : 0;
+ if (strstr(params + pos, "ro "))
+ s->read_only = 1;
+ else if (strstr(params + pos, "fail"))
+ s->fail = 1;
+ else if (strstr(params + pos, "out_of_data_space"))
+ s->out_of_data_space = 1;
+
+ if (strstr(params + pos, "error_if_no_space"))
+ s->error_if_no_space = 1;
*status = s;
@@ -4080,7 +4208,7 @@ int dm_get_status_thin(struct dm_pool *mem, const char *params,
if (strchr(params, '-')) {
s->mapped_sectors = 0;
s->highest_mapped_sector = 0;
- } else if (sscanf(params, "%" PRIu64 " %" PRIu64,
+ } else if (sscanf(params, FMTu64 " " FMTu64,
&s->mapped_sectors,
&s->highest_mapped_sector) != 2) {
dm_pool_free(mem, s);
@@ -4171,6 +4299,7 @@ int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset)
switch (seg->type) {
case SEG_RAID0:
case SEG_RAID0_META:
+ case SEG_RAID01:
case SEG_RAID1:
case SEG_RAID4:
case SEG_RAID5_N:
diff --git a/libdm/libdm-file.c b/libdm/libdm-file.c
index 26c8241df..52f9d4540 100644
--- a/libdm/libdm-file.c
+++ b/libdm/libdm-file.c
@@ -199,10 +199,10 @@ retry_fcntl:
fail_close_unlink:
if (unlink(lockfile))
- stack;
+ log_sys_debug("unlink", lockfile);
fail_close:
if (close(fd))
- stack;
+ log_sys_debug("close", lockfile);
return 0;
}
diff --git a/libdm/libdm-report.c b/libdm/libdm-report.c
index 5ff6cda39..f9300681e 100644
--- a/libdm/libdm-report.c
+++ b/libdm/libdm-report.c
@@ -18,6 +18,7 @@
#include <ctype.h>
#include <math.h> /* fabs() */
#include <float.h> /* DBL_EPSILON */
+#include <time.h>
/*
* Internal flags
@@ -34,6 +35,12 @@ struct selection {
struct dm_report {
struct dm_pool *mem;
+ /**
+ * Cache the first row allocated so that all rows and fields
+ * can be disposed of in a single dm_pool_free() call.
+ */
+ struct row *first_row;
+
/* To report all available types */
#define REPORT_TYPES_ALL UINT32_MAX
uint32_t report_types;
@@ -52,6 +59,7 @@ struct dm_report {
/* Array of field definitions */
const struct dm_report_field_type *fields;
+ const char **canonical_field_ids;
const struct dm_report_object_type *types;
/* To store caller private data */
@@ -62,6 +70,7 @@ struct dm_report {
/* Null-terminated array of reserved values */
const struct dm_report_reserved_value *reserved_values;
+ struct dm_hash_table *value_cache;
};
/*
@@ -100,11 +109,13 @@ struct op_def {
#define FLD_CMP_LT 0x01000000
#define FLD_CMP_REGEX 0x02000000
#define FLD_CMP_NUMBER 0x04000000
+#define FLD_CMP_TIME 0x08000000
/*
- * #define FLD_CMP_STRING 0x08000000
- * We could defined FLD_CMP_STRING here for completeness here,
+ * #define FLD_CMP_STRING 0x10000000
+ * We could define FLD_CMP_STRING here for completeness here,
* but it's not needed - we can check operator compatibility with
- * field type by using FLD_CMP_REGEX and FLD_CMP_NUMBER flags only.
+ * field type by using FLD_CMP_REGEX, FLD_CMP_NUMBER and
+ * FLD_CMP_TIME flags only.
*/
/*
@@ -115,12 +126,16 @@ struct op_def {
static struct op_def _op_cmp[] = {
{ "=~", FLD_CMP_REGEX, "Matching regular expression. [regex]" },
{ "!~", FLD_CMP_REGEX|FLD_CMP_NOT, "Not matching regular expression. [regex]" },
- { "=", FLD_CMP_EQUAL, "Equal to. [number, size, percent, string, string list]" },
- { "!=", FLD_CMP_NOT|FLD_CMP_EQUAL, "Not equal to. [number, size, percent, string, string_list]" },
- { ">=", FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL, "Greater than or equal to. [number, size, percent]" },
- { ">", FLD_CMP_NUMBER|FLD_CMP_GT, "Greater than. [number, size, percent]" },
- { "<=", FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL, "Less than or equal to. [number, size, percent]" },
- { "<", FLD_CMP_NUMBER|FLD_CMP_LT, "Less than. [number, size, percent]" },
+ { "=", FLD_CMP_EQUAL, "Equal to. [number, size, percent, string, string list, time]" },
+ { "!=", FLD_CMP_NOT|FLD_CMP_EQUAL, "Not equal to. [number, size, percent, string, string_list, time]" },
+ { ">=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Greater than or equal to. [number, size, percent, time]" },
+ { ">", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT, "Greater than. [number, size, percent, time]" },
+ { "<=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Less than or equal to. [number, size, percent, time]" },
+ { "<", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT, "Less than. [number, size, percent, time]" },
+ { "since", FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Since specified time (same as '>='). [time]" },
+ { "after", FLD_CMP_TIME|FLD_CMP_GT, "After specified time (same as '>'). [time]"},
+ { "until", FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Until specified time (same as '<='). [time]"},
+ { "before", FLD_CMP_TIME|FLD_CMP_LT, "Before specified time (same as '<'). [time]"},
{ NULL, 0, NULL }
};
@@ -143,18 +158,18 @@ static struct op_def _op_cmp[] = {
#define SEL_LIST_SUBSET_LE 0x00080000
static struct op_def _op_log[] = {
- { "&&", SEL_AND, "All fields must match" },
+ { "&&", SEL_AND, "All fields must match" },
{ ",", SEL_AND, "All fields must match" },
- { "||", SEL_OR, "At least one field must match" },
+ { "||", SEL_OR, "At least one field must match" },
{ "#", SEL_OR, "At least one field must match" },
- { "!", SEL_MODIFIER_NOT, "Logical negation" },
- { "(", SEL_PRECEDENCE_PS, "Left parenthesis" },
- { ")", SEL_PRECEDENCE_PE, "Right parenthesis" },
- { "[", SEL_LIST_LS, "List start" },
- { "]", SEL_LIST_LE, "List end"},
- { "{", SEL_LIST_SUBSET_LS, "List subset start"},
- { "}", SEL_LIST_SUBSET_LE, "List subset end"},
- { NULL, 0, NULL},
+ { "!", SEL_MODIFIER_NOT, "Logical negation" },
+ { "(", SEL_PRECEDENCE_PS, "Left parenthesis" },
+ { ")", SEL_PRECEDENCE_PE, "Right parenthesis" },
+ { "[", SEL_LIST_LS, "List start" },
+ { "]", SEL_LIST_LE, "List end"},
+ { "{", SEL_LIST_SUBSET_LS, "List subset start"},
+ { "}", SEL_LIST_SUBSET_LE, "List subset end"},
+ { NULL, 0, NULL},
};
struct selection_str_list {
@@ -162,16 +177,22 @@ struct selection_str_list {
struct dm_list *list;
};
-struct field_selection {
- struct field_properties *fp;
- uint32_t flags;
+struct field_selection_value {
union {
const char *s;
uint64_t i;
+ time_t t;
double d;
struct dm_regex *r;
struct selection_str_list *l;
} v;
+ struct field_selection_value *next;
+};
+
+struct field_selection {
+ struct field_properties *fp;
+ uint32_t flags;
+ struct field_selection_value *value;
};
struct selection_node {
@@ -183,6 +204,12 @@ struct selection_node {
} selection;
};
+struct reserved_value_wrapper {
+ const char *matched_name;
+ const struct dm_report_reserved_value *reserved;
+ const void *value;
+};
+
/*
* Report data field
*/
@@ -230,7 +257,7 @@ static int _selected_disp(struct dm_report *rh,
const void *data,
void *private __attribute__((unused)))
{
- struct row *row = (struct row *)data;
+ const struct row *row = (const struct row *)data;
return dm_report_field_int(rh, field, &row->selected);
}
@@ -619,7 +646,7 @@ int dm_report_field_uint64(struct dm_report *rh,
return 0;
}
- if (dm_snprintf(repstr, 21, "%" PRIu64 , value) < 0) {
+ if (dm_snprintf(repstr, 21, FMTu64 , value) < 0) {
log_error("dm_report_field_uint64: uint64 too big: %" PRIu64, value);
return 0;
}
@@ -651,6 +678,7 @@ static const char *_get_field_type_name(unsigned field_type)
case DM_REPORT_FIELD_TYPE_NUMBER: return "number";
case DM_REPORT_FIELD_TYPE_SIZE: return "size";
case DM_REPORT_FIELD_TYPE_PERCENT: return "percent";
+ case DM_REPORT_FIELD_TYPE_TIME: return "time";
case DM_REPORT_FIELD_TYPE_STRING_LIST: return "string list";
default: return "unknown";
}
@@ -767,7 +795,7 @@ static struct field_properties * _add_field(struct dm_report *rh,
{
struct field_properties *fp;
- if (!(fp = dm_pool_zalloc(rh->mem, sizeof(struct field_properties)))) {
+ if (!(fp = dm_pool_zalloc(rh->mem, sizeof(*fp)))) {
log_error("dm_report: struct field_properties allocation "
"failed");
return NULL;
@@ -793,25 +821,52 @@ static struct field_properties * _add_field(struct dm_report *rh,
return fp;
}
+static int _get_canonical_field_name(const char *field,
+ size_t flen,
+ char *canonical_field,
+ size_t fcanonical_len,
+ int *differs)
+{
+ size_t i;
+ int diff = 0;
+
+ for (i = 0; *field && flen; field++, flen--) {
+ if (*field == '_') {
+ diff = 1;
+ continue;
+ }
+ if (i >= fcanonical_len) {
+ log_error("%s: field name too long", field);
+ return 0;
+ }
+ canonical_field[i++] = *field;
+ }
+
+ canonical_field[i] = '\0';
+ if (differs)
+ *differs = diff;
+ return 1;
+}
+
/*
- * Compare name1 against name2 or prefix plus name2
- * name2 is not necessarily null-terminated.
- * len2 is the length of name2.
+ * Compare canonical_name1 against canonical_name2 or prefix
+ * plus canonical_name2. Canonical name is a name where all
+ * superfluous characters are removed (underscores for now).
+ * Both names are always null-terminated.
*/
-static int _is_same_field(const char *name1, const char *name2,
- size_t len2, const char *prefix)
+static int _is_same_field(const char *canonical_name1, const char *canonical_name2,
+ const char *prefix)
{
size_t prefix_len;
/* Exact match? */
- if (!strncasecmp(name1, name2, len2) && strlen(name1) == len2)
+ if (!strcasecmp(canonical_name1, canonical_name2))
return 1;
/* Match including prefix? */
- prefix_len = strlen(prefix);
- if (!strncasecmp(prefix, name1, prefix_len) &&
- !strncasecmp(name1 + prefix_len, name2, len2) &&
- strlen(name1) == prefix_len + len2)
+ prefix_len = strlen(prefix) - 1;
+ if (!strncasecmp(prefix, canonical_name1, prefix_len) &&
+ !strcasecmp(canonical_name1 + prefix_len, canonical_name2))
return 1;
return 0;
@@ -825,15 +880,20 @@ static void _all_match_combine(const struct dm_report_object_type *types,
const char *field, size_t flen,
uint32_t *report_types)
{
+ char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
const struct dm_report_object_type *t;
size_t prefix_len;
+ if (!_get_canonical_field_name(field, flen, field_canon, DM_REPORT_FIELD_TYPE_ID_LEN, NULL))
+ return;
+ flen = strlen(field_canon);
+
for (t = types; t->data_fn; t++) {
- prefix_len = strlen(t->prefix);
+ prefix_len = strlen(t->prefix) - 1;
- if (!strncasecmp(t->prefix, field, prefix_len) &&
+ if (!strncasecmp(t->prefix, field_canon, prefix_len) &&
((unprefixed_all_matched && (flen == prefix_len)) ||
- (!strncasecmp(field + prefix_len, "all", 3) &&
+ (!strncasecmp(field_canon + prefix_len, "all", 3) &&
(flen == prefix_len + 3))))
*report_types |= t->id;
}
@@ -878,13 +938,17 @@ static int _add_all_fields(struct dm_report *rh, uint32_t type)
static int _get_field(struct dm_report *rh, const char *field, size_t flen,
uint32_t *f_ret, int *implicit)
{
+ char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
uint32_t f;
if (!flen)
return 0;
+ if (!_get_canonical_field_name(field, flen, field_canon, DM_REPORT_FIELD_TYPE_ID_LEN, NULL))
+ return 0;
+
for (f = 0; _implicit_report_fields[f].report_fn; f++) {
- if (_is_same_field(_implicit_report_fields[f].id, field, flen, rh->field_prefix)) {
+ if (_is_same_field(_implicit_report_fields[f].id, field_canon, rh->field_prefix)) {
*f_ret = f;
*implicit = 1;
return 1;
@@ -892,7 +956,7 @@ static int _get_field(struct dm_report *rh, const char *field, size_t flen,
}
for (f = 0; rh->fields[f].report_fn; f++) {
- if (_is_same_field(rh->fields[f].id, field, flen, rh->field_prefix)) {
+ if (_is_same_field(rh->canonical_field_ids[f], field_canon, rh->field_prefix)) {
*f_ret = f;
*implicit = 0;
return 1;
@@ -971,6 +1035,7 @@ static int _add_sort_key(struct dm_report *rh, uint32_t field_num, int implicit,
static int _key_match(struct dm_report *rh, const char *key, size_t len,
unsigned report_type_only)
{
+ char key_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
uint32_t f;
uint32_t flags;
@@ -993,12 +1058,15 @@ static int _key_match(struct dm_report *rh, const char *key, size_t len,
return 0;
}
+ if (!_get_canonical_field_name(key, len, key_canon, DM_REPORT_FIELD_TYPE_ID_LEN, NULL))
+ return 0;
+
for (f = 0; _implicit_report_fields[f].report_fn; f++)
- if (_is_same_field(_implicit_report_fields[f].id, key, len, rh->field_prefix))
+ if (_is_same_field(_implicit_report_fields[f].id, key_canon, rh->field_prefix))
return _add_sort_key(rh, f, 1, flags, report_type_only);
for (f = 0; rh->fields[f].report_fn; f++)
- if (_is_same_field(rh->fields[f].id, key, len, rh->field_prefix))
+ if (_is_same_field(rh->canonical_field_ids[f], key_canon, rh->field_prefix))
return _add_sort_key(rh, f, 0, flags, report_type_only);
return 0;
@@ -1106,6 +1174,36 @@ static int _help_requested(struct dm_report *rh)
return 0;
}
+static int _canonicalize_field_ids(struct dm_report *rh)
+{
+ size_t registered_field_count = 0, i;
+ char canonical_field[DM_REPORT_FIELD_TYPE_ID_LEN];
+ char *canonical_field_dup;
+ int differs;
+
+ while (*rh->fields[registered_field_count].id)
+ registered_field_count++;
+
+ if (!(rh->canonical_field_ids = dm_pool_alloc(rh->mem, registered_field_count * sizeof(const char *)))) {
+ log_error("_canonicalize_field_ids: dm_pool_alloc failed");
+ return 0;
+ }
+
+ for (i = 0; i < registered_field_count; i++) {
+ if (!_get_canonical_field_name(rh->fields[i].id, strlen(rh->fields[i].id),
+ canonical_field, DM_REPORT_FIELD_TYPE_ID_LEN, &differs))
+ return_0;
+
+ if (differs) {
+ canonical_field_dup = dm_pool_strdup(rh->mem, canonical_field);
+ rh->canonical_field_ids[i] = canonical_field_dup;
+ } else
+ rh->canonical_field_ids[i] = rh->fields[i].id;
+ }
+
+ return 1;
+}
+
struct dm_report *dm_report_init(uint32_t *report_types,
const struct dm_report_object_type *types,
const struct dm_report_field_type *fields,
@@ -1166,6 +1264,11 @@ struct dm_report *dm_report_init(uint32_t *report_types,
return NULL;
}
+ if (!_canonicalize_field_ids(rh)) {
+ dm_report_free(rh);
+ return NULL;
+ }
+
/*
* To keep the code needed to add the "all" field to a minimum, we parse
* the field lists twice. The first time we only update the report type.
@@ -1202,6 +1305,8 @@ void dm_report_free(struct dm_report *rh)
{
if (rh->selection)
dm_pool_destroy(rh->selection->mem);
+ if (rh->value_cache)
+ dm_hash_destroy(rh->value_cache);
dm_pool_destroy(rh->mem);
dm_free(rh);
}
@@ -1257,40 +1362,132 @@ static void *_report_get_implicit_field_data(struct dm_report *rh __attribute__(
return NULL;
}
-static int _close_enough(double d1, double d2)
+static int _dbl_equal(double d1, double d2)
{
return fabs(d1 - d2) < DBL_EPSILON;
}
+static int _dbl_greater(double d1, double d2)
+{
+ return (d1 > d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_less(double d1, double d2)
+{
+ return (d1 < d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_greater_or_equal(double d1, double d2)
+{
+ return _dbl_greater(d1, d2) || _dbl_equal(d1, d2);
+}
+
+static int _dbl_less_or_equal(double d1, double d2)
+{
+ return _dbl_less(d1, d2) || _dbl_equal(d1, d2);
+}
+
+#define _uint64 *(const uint64_t *)
+#define _uint64arr(var,index) ((const uint64_t *)var)[index]
+#define _str (const char *)
+#define _dbl *(const double *)
+#define _dblarr(var,index) ((const double *)var)[index]
+
+static int _do_check_value_is_strictly_reserved(unsigned type, const void *res_val, int res_range,
+ const void *val, struct field_selection *fs)
+{
+ int sel_range = fs ? fs->value->next != NULL : 0;
+
+ switch (type & DM_REPORT_FIELD_TYPE_MASK) {
+ case DM_REPORT_FIELD_TYPE_NUMBER:
+ if (res_range && sel_range) {
+ /* both reserved value and selection value are ranges */
+ if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+ (fs && ((fs->value->v.i == _uint64arr(res_val,0)) && (fs->value->next->v.i == _uint64arr(res_val,1)))))
+ return 1;
+ } else if (res_range) {
+ /* only reserved value is a range */
+ if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+ (fs && ((fs->value->v.i >= _uint64arr(res_val,0)) && (fs->value->v.i <= _uint64arr(res_val,1)))))
+ return 1;
+ } else if (sel_range) {
+ /* only selection value is a range */
+ if (((_uint64 val >= _uint64 res_val) && (_uint64 val <= _uint64 res_val)) ||
+ (fs && ((fs->value->v.i >= _uint64 res_val) && (fs->value->next->v.i <= _uint64 res_val))))
+ return 1;
+ } else {
+ /* neither selection value nor reserved value is a range */
+ if ((_uint64 val == _uint64 res_val) ||
+ (fs && (fs->value->v.i == _uint64 res_val)))
+ return 1;
+ }
+ break;
+
+ case DM_REPORT_FIELD_TYPE_STRING:
+ /* there are no ranges for string type yet */
+ if ((!strcmp(_str val, _str res_val)) ||
+ (fs && (!strcmp(fs->value->v.s, _str res_val))))
+ return 1;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_SIZE:
+ if (res_range && sel_range) {
+ /* both reserved value and selection value are ranges */
+ if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+ (fs && (_dbl_equal(fs->value->v.d, _dblarr(res_val,0)) && (_dbl_equal(fs->value->next->v.d, _dblarr(res_val,1))))))
+ return 1;
+ } else if (res_range) {
+ /* only reserved value is a range */
+ if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+ (fs && (_dbl_greater_or_equal(fs->value->v.d, _dblarr(res_val,0)) && _dbl_less_or_equal(fs->value->v.d, _dblarr(res_val,1)))))
+ return 1;
+ } else if (sel_range) {
+ /* only selection value is a range */
+ if ((_dbl_greater_or_equal(_dbl val, _dbl res_val) && (_dbl_less_or_equal(_dbl val, _dbl res_val))) ||
+ (fs && (_dbl_greater_or_equal(fs->value->v.d, _dbl res_val) && _dbl_less_or_equal(fs->value->next->v.d, _dbl res_val))))
+ return 1;
+ } else {
+ /* neither selection value nor reserved value is a range */
+ if ((_dbl_equal(_dbl val, _dbl res_val)) ||
+ (fs && (_dbl_equal(fs->value->v.d, _dbl res_val))))
+ return 1;
+ }
+ break;
+
+ case DM_REPORT_FIELD_TYPE_STRING_LIST:
+ /* FIXME Add comparison for string list */
+ break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ /* FIXME Add comparison for time */
+ break;
+ }
+
+ return 0;
+}
+
/*
* Used to check whether a value of certain type used in selection is reserved.
*/
-static int _check_value_is_reserved(struct dm_report *rh, unsigned type, const void *value)
+static int _check_value_is_strictly_reserved(struct dm_report *rh, uint32_t field_num, unsigned type,
+ const void *val, struct field_selection *fs)
{
const struct dm_report_reserved_value *iter = rh->reserved_values;
+ const struct dm_report_field_reserved_value *frv;
+ int res_range;
if (!iter)
return 0;
- while (iter->type) {
- if (iter->type & type) {
- switch (type) {
- case DM_REPORT_FIELD_TYPE_NUMBER:
- if (*(uint64_t *)iter->value == *(uint64_t *)value)
- return 1;
- break;
- case DM_REPORT_FIELD_TYPE_STRING:
- if (!strcmp((const char *)iter->value, (const char *) value))
- return 1;
- break;
- case DM_REPORT_FIELD_TYPE_SIZE:
- if (_close_enough(*(double *)iter->value, *(double *) value))
- return 1;
- break;
- case DM_REPORT_FIELD_TYPE_STRING_LIST:
- /* FIXME Add comparison for string list */
- break;
- }
+ while (iter->value) {
+ /* Only check strict reserved values, not the weaker form ("named" reserved value). */
+ if (!(iter->type & DM_REPORT_FIELD_RESERVED_VALUE_NAMED)) {
+ res_range = iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE;
+ if ((iter->type & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_NONE) {
+ frv = (const struct dm_report_field_reserved_value *) iter->value;
+ if (frv->field_num == field_num && _do_check_value_is_strictly_reserved(type, frv->value, res_range, val, fs))
+ return 1;
+ } else if (iter->type & type && _do_check_value_is_strictly_reserved(type, iter->value, res_range, val, fs))
+ return 1;
}
iter++;
}
@@ -1298,22 +1495,40 @@ static int _check_value_is_reserved(struct dm_report *rh, unsigned type, const v
return 0;
}
-static int _cmp_field_int(struct dm_report *rh, const char *field_id,
- uint64_t a, uint64_t b, uint32_t flags)
+static int _cmp_field_int(struct dm_report *rh, uint32_t field_num, const char *field_id,
+ uint64_t val, struct field_selection *fs)
{
- switch(flags & FLD_CMP_MASK) {
+ int range = fs->value->next != NULL;
+ const uint64_t sel1 = fs->value->v.i;
+ const uint64_t sel2 = range ? fs->value->next->v.i : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
case FLD_CMP_EQUAL:
- return a == b;
+ return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+
case FLD_CMP_NOT|FLD_CMP_EQUAL:
- return a != b;
+ return range ? !((val >= sel1) && (val <= sel2)) : val != sel1;
+
case FLD_CMP_NUMBER|FLD_CMP_GT:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_NUMBER, &a) ? 0 : a > b;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return range ? val > sel2 : val > sel1;
+
case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_NUMBER, &a) ? 0 : a >= b;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return val >= sel1;
+
case FLD_CMP_NUMBER|FLD_CMP_LT:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_NUMBER, &a) ? 0 : a < b;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return val < sel1;
+
case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_NUMBER, &a) ? 0 : a <= b;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return range ? val <= sel2 : val <= sel1;
+
default:
log_error(INTERNAL_ERROR "_cmp_field_int: unsupported number "
"comparison type for field %s", field_id);
@@ -1322,22 +1537,43 @@ static int _cmp_field_int(struct dm_report *rh, const char *field_id,
return 0;
}
-static int _cmp_field_double(struct dm_report *rh, const char *field_id,
- double a, double b, uint32_t flags)
+static int _cmp_field_double(struct dm_report *rh, uint32_t field_num, const char *field_id,
+ double val, struct field_selection *fs)
{
- switch(flags & FLD_CMP_MASK) {
+ int range = fs->value->next != NULL;
+ double sel1 = fs->value->v.d;
+ double sel2 = range ? fs->value->next->v.d : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
case FLD_CMP_EQUAL:
- return _close_enough(a, b);
+ return range ? (_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+ : _dbl_equal(val, sel1);
+
case FLD_CMP_NOT|FLD_CMP_EQUAL:
- return !_close_enough(a, b);
+ return range ? !(_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+ : !_dbl_equal(val, sel1);
+
case FLD_CMP_NUMBER|FLD_CMP_GT:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_SIZE, &a) ? 0 : (a > b) && !_close_enough(a, b);
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return range ? _dbl_greater(val, sel2)
+ : _dbl_greater(val, sel1);
+
case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_SIZE, &a) ? 0 : (a > b) || _close_enough(a, b);
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return _dbl_greater_or_equal(val, sel1);
+
case FLD_CMP_NUMBER|FLD_CMP_LT:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_SIZE, &a) ? 0 : (a < b) && !_close_enough(a, b);
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return _dbl_less(val, sel1);
+
case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
- return _check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_SIZE, &a) ? 0 : a < b || _close_enough(a, b);
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return range ? _dbl_less_or_equal(val, sel2) : _dbl_less_or_equal(val, sel1);
+
default:
log_error(INTERNAL_ERROR "_cmp_field_double: unsupported number "
"comparison type for selection field %s", field_id);
@@ -1346,14 +1582,17 @@ static int _cmp_field_double(struct dm_report *rh, const char *field_id,
return 0;
}
-static int _cmp_field_string(struct dm_report *rh __attribute__((unused)), const char *field_id,
- const char *a, const char *b, uint32_t flags)
+static int _cmp_field_string(struct dm_report *rh __attribute__((unused)),
+ uint32_t field_num, const char *field_id,
+ const char *val, struct field_selection *fs)
{
- switch (flags & FLD_CMP_MASK) {
+ const char *sel = fs->value->v.s;
+
+ switch (fs->flags & FLD_CMP_MASK) {
case FLD_CMP_EQUAL:
- return !strcmp(a, b);
+ return !strcmp(val, sel);
case FLD_CMP_NOT|FLD_CMP_EQUAL:
- return strcmp(a, b);
+ return strcmp(val, sel);
default:
log_error(INTERNAL_ERROR "_cmp_field_string: unsupported string "
"comparison type for selection field %s", field_id);
@@ -1362,6 +1601,43 @@ static int _cmp_field_string(struct dm_report *rh __attribute__((unused)), const
return 0;
}
+static int _cmp_field_time(struct dm_report *rh,
+ uint32_t field_num, const char *field_id,
+ time_t val, struct field_selection *fs)
+{
+ int range = fs->value->next != NULL;
+ time_t sel1 = fs->value->v.t;
+ time_t sel2 = range ? fs->value->next->v.t : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
+ case FLD_CMP_EQUAL:
+ return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+ case FLD_CMP_NOT|FLD_CMP_EQUAL:
+ return range ? ((val >= sel1) && (val <= sel2)) : val != sel1;
+ case FLD_CMP_TIME|FLD_CMP_GT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return range ? val > sel2 : val > sel1;
+ case FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return val >= sel1;
+ case FLD_CMP_TIME|FLD_CMP_LT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return val < sel1;
+ case FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return range ? val <= sel2 : val <= sel1;
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_time: unsupported time "
+ "comparison type for field %s", field_id);
+ }
+
+ return 0;
+}
+
/* Matches if all items from selection string list match list value strictly 1:1. */
static int _cmp_field_string_list_strict_all(const struct str_list_sort_value *val,
const struct selection_str_list *sel)
@@ -1440,13 +1716,14 @@ static int _cmp_field_string_list_any(const struct str_list_sort_value *val,
}
static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)),
- const char *field_id,
- const struct str_list_sort_value *value,
- const struct selection_str_list *selection, uint32_t flags)
+ uint32_t field_num, const char *field_id,
+ const struct str_list_sort_value *val,
+ struct field_selection *fs)
{
+ const struct selection_str_list *sel = fs->value->v.l;
int subset, r;
- switch (selection->type & SEL_LIST_MASK) {
+ switch (sel->type & SEL_LIST_MASK) {
case SEL_LIST_LS:
subset = 0;
break;
@@ -1458,13 +1735,13 @@ static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)),
return 0;
}
- switch (selection->type & SEL_MASK) {
+ switch (sel->type & SEL_MASK) {
case SEL_AND:
- r = subset ? _cmp_field_string_list_subset_all(value, selection)
- : _cmp_field_string_list_strict_all(value, selection);
+ r = subset ? _cmp_field_string_list_subset_all(val, sel)
+ : _cmp_field_string_list_strict_all(val, sel);
break;
case SEL_OR:
- r = _cmp_field_string_list_any(value, selection);
+ r = _cmp_field_string_list_any(val, sel);
break;
default:
log_error(INTERNAL_ERROR "_cmp_field_string_list: unsupported string "
@@ -1473,13 +1750,13 @@ static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)),
return 0;
}
- return flags & FLD_CMP_NOT ? !r : r;
+ return fs->flags & FLD_CMP_NOT ? !r : r;
}
-static int _cmp_field_regex(const char *s, struct dm_regex *r, uint32_t flags)
+static int _cmp_field_regex(const char *s, struct field_selection *fs)
{
- int match = dm_regex_match(r, s) >= 0;
- return flags & FLD_CMP_NOT ? !match : match;
+ int match = dm_regex_match(fs->value->v.r, s) >= 0;
+ return fs->flags & FLD_CMP_NOT ? !match : match;
}
static int _compare_selection_field(struct dm_report *rh,
@@ -1498,7 +1775,7 @@ static int _compare_selection_field(struct dm_report *rh,
}
if (fs->flags & FLD_CMP_REGEX)
- r = _cmp_field_regex((const char *) f->sort_value, fs->v.r, fs->flags);
+ r = _cmp_field_regex((const char *) f->sort_value, fs);
else {
switch(f->props->flags & DM_REPORT_FIELD_TYPE_MASK) {
case DM_REPORT_FIELD_TYPE_PERCENT:
@@ -1510,17 +1787,19 @@ static int _compare_selection_field(struct dm_report *rh,
return 0;
/* fall through */
case DM_REPORT_FIELD_TYPE_NUMBER:
- r = _cmp_field_int(rh, field_id, *(const uint64_t *) f->sort_value, fs->v.i, fs->flags);
+ r = _cmp_field_int(rh, f->props->field_num, field_id, *(const uint64_t *) f->sort_value, fs);
break;
case DM_REPORT_FIELD_TYPE_SIZE:
- r = _cmp_field_double(rh, field_id, *(const uint64_t *) f->sort_value, fs->v.d, fs->flags);
+ r = _cmp_field_double(rh, f->props->field_num, field_id, *(const double *) f->sort_value, fs);
break;
case DM_REPORT_FIELD_TYPE_STRING:
- r = _cmp_field_string(rh, field_id, (const char *) f->sort_value, fs->v.s, fs->flags);
+ r = _cmp_field_string(rh, f->props->field_num, field_id, (const char *) f->sort_value, fs);
break;
case DM_REPORT_FIELD_TYPE_STRING_LIST:
- r = _cmp_field_string_list(rh, field_id, (const struct str_list_sort_value *) f->sort_value,
- fs->v.l, fs->flags);
+ r = _cmp_field_string_list(rh, f->props->field_num, field_id, (const struct str_list_sort_value *) f->sort_value, fs);
+ break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ r = _cmp_field_time(rh, f->props->field_num, field_id, *(const time_t *) f->sort_value, fs);
break;
default:
log_error(INTERNAL_ERROR "_compare_selection_field: unknown field type for field %s", field_id);
@@ -1575,7 +1854,7 @@ static int _check_report_selection(struct dm_report *rh, struct dm_list *fields)
return _check_selection(rh, rh->selection->selection_root, fields);
}
-int dm_report_object(struct dm_report *rh, void *object)
+static int _do_report_object(struct dm_report *rh, void *object, int do_output, int *selected)
{
const struct dm_report_field_type *fields;
struct field_properties *fp;
@@ -1586,7 +1865,13 @@ int dm_report_object(struct dm_report *rh, void *object)
int r = 0;
if (!rh) {
- log_error(INTERNAL_ERROR "dm_report handler is NULL.");
+ log_error(INTERNAL_ERROR "_do_report_object: dm_report handler is NULL.");
+ return 0;
+ }
+
+ if (!do_output && !selected) {
+ log_error(INTERNAL_ERROR "_do_report_object: output not requested and "
+ "selected output variable is NULL too.");
return 0;
}
@@ -1594,17 +1879,20 @@ int dm_report_object(struct dm_report *rh, void *object)
return 1;
if (!(row = dm_pool_zalloc(rh->mem, sizeof(*row)))) {
- log_error("dm_report_object: struct row allocation failed");
+ log_error("_do_report_object: struct row allocation failed");
return 0;
}
+ if (!rh->first_row)
+ rh->first_row = row;
+
row->rh = rh;
if ((rh->flags & RH_SORT_REQUIRED) &&
!(row->sort_fields =
dm_pool_zalloc(rh->mem, sizeof(struct dm_report_field *) *
rh->keys_count))) {
- log_error("dm_report_object: "
+ log_error("_do_report_object: "
"row sort value structure allocation failed");
goto out;
}
@@ -1615,7 +1903,7 @@ int dm_report_object(struct dm_report *rh, void *object)
/* For each field to be displayed, call its report_fn */
dm_list_iterate_items(fp, &rh->field_props) {
if (!(field = dm_pool_zalloc(rh->mem, sizeof(*field)))) {
- log_error("dm_report_object: "
+ log_error("_do_report_object: "
"struct dm_report_field allocation failed");
goto out;
}
@@ -1632,7 +1920,7 @@ int dm_report_object(struct dm_report *rh, void *object)
data = fp->implicit ? _report_get_implicit_field_data(rh, fp, row)
: _report_get_field_data(rh, fp, object);
if (!data) {
- log_error("dm_report_object: "
+ log_error("_do_report_object: "
"no data assigned to field %s",
fields[fp->field_num].id);
goto out;
@@ -1641,7 +1929,7 @@ int dm_report_object(struct dm_report *rh, void *object)
if (!fields[fp->field_num].report_fn(rh, rh->mem,
field, data,
rh->private)) {
- log_error("dm_report_object: "
+ log_error("_do_report_object: "
"report function failed for field %s",
fields[fp->field_num].id);
goto out;
@@ -1650,11 +1938,14 @@ int dm_report_object(struct dm_report *rh, void *object)
dm_list_add(&row->fields, &field->list);
}
+ r = 1;
+
if (!_check_report_selection(rh, &row->fields)) {
- if (!field_sel_status) {
- r = 1;
+ row->selected = 0;
+
+ if (!field_sel_status)
goto out;
- }
+
/*
* If field with id "selected" is reported,
* report the row although it does not pass
@@ -1662,7 +1953,6 @@ int dm_report_object(struct dm_report *rh, void *object)
* The "selected" field reports the result
* of the selection.
*/
- row->selected = 0;
_implicit_report_fields[field_sel_status->props->field_num].report_fn(rh,
rh->mem, field_sel_status, row, rh->private);
/*
@@ -1670,12 +1960,13 @@ int dm_report_object(struct dm_report *rh, void *object)
* because it is part of the sort field list,
* skip the display of the row as usual.
*/
- if (field_sel_status->props->flags & FLD_HIDDEN) {
- r = 1;
+ if (field_sel_status->props->flags & FLD_HIDDEN)
goto out;
- }
}
+ if (!do_output)
+ goto out;
+
dm_list_add(&rh->rows, &row->list);
dm_list_iterate_items(field, &row->fields) {
@@ -1691,10 +1982,10 @@ int dm_report_object(struct dm_report *rh, void *object)
if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED))
return dm_report_output(rh);
-
- r = 1;
out:
- if (!r)
+ if (selected)
+ *selected = row->selected;
+ if (!do_output || !r)
dm_pool_free(rh->mem, row);
return r;
}
@@ -1705,15 +1996,15 @@ int dm_report_compact_fields(struct dm_report *rh)
struct field_properties *fp;
struct row *row;
- if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED) ||
- dm_list_empty(&rh->rows))
- return 1;
-
if (!rh) {
log_error("dm_report_enable_compact_output: dm report handler is NULL.");
return 0;
}
+ if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED) ||
+ dm_list_empty(&rh->rows))
+ return 1;
+
/*
* At first, mark all fields with FLD_HIDDEN flag.
* Also, mark field with FLD_COMPACTED flag, but only
@@ -1751,6 +2042,16 @@ int dm_report_compact_fields(struct dm_report *rh)
return 1;
}
+int dm_report_object(struct dm_report *rh, void *object)
+{
+ return _do_report_object(rh, object, 1, NULL);
+}
+
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected)
+{
+ return _do_report_object(rh, object, do_output, selected);
+}
+
/*
* Selection parsing
*/
@@ -1907,14 +2208,48 @@ static const char *_tok_value_string(const char *s,
return s;
}
-static const char *_reserved_name(const char **names, const char *s, size_t len)
+static const char *_reserved_name(struct dm_report *rh,
+ const struct dm_report_reserved_value *reserved,
+ const struct dm_report_field_reserved_value *frv,
+ uint32_t field_num, const char *s, size_t len)
{
- const char **name = names;
+ dm_report_reserved_handler handler;
+ const char *canonical_name;
+ const char **name;
+ char *tmp_s;
+ char c;
+ int r;
+
+ name = reserved->names;
while (*name) {
if ((strlen(*name) == len) && !strncmp(*name, s, len))
return *name;
name++;
}
+
+ if (reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES) {
+ handler = (dm_report_reserved_handler) frv ? frv->value : reserved->value;
+ c = s[len];
+ tmp_s = (char *) s;
+ tmp_s[len] = '\0';
+ if ((r = handler(rh, rh->selection->mem, field_num,
+ DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+ tmp_s, (const void **) &canonical_name)) <= 0) {
+ if (r == -1)
+ log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing "
+ "implementation of DM_REPORT_RESERVED_PARSE_FUZZY_NAME action",
+ (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ else
+ log_error("Error occured while processing %s reserved value handler for field %s",
+ (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ }
+ tmp_s[len] = c;
+ if (r && canonical_name)
+ return canonical_name;
+ }
+
return NULL;
}
@@ -1925,14 +2260,15 @@ static const char *_reserved_name(const char **names, const char *s, size_t len)
static const char *_get_reserved(struct dm_report *rh, unsigned type,
uint32_t field_num, int implicit,
const char *s, const char **begin, const char **end,
- const struct dm_report_reserved_value **reserved)
+ struct reserved_value_wrapper *rvw)
{
const struct dm_report_reserved_value *iter = implicit ? NULL : rh->reserved_values;
+ const struct dm_report_field_reserved_value *frv;
const char *tmp_begin, *tmp_end, *tmp_s = s;
const char *name = NULL;
char c;
- *reserved = NULL;
+ rvw->reserved = NULL;
if (!iter)
return s;
@@ -1942,14 +2278,16 @@ static const char *_get_reserved(struct dm_report *rh, unsigned type,
return s;
while (iter->value) {
- if (!iter->type) {
+ if (!(iter->type & DM_REPORT_FIELD_TYPE_MASK)) {
/* DM_REPORT_FIELD_TYPE_NONE - per-field reserved value */
- if (((((const struct dm_report_field_reserved_value *) iter->value)->field_num) == field_num) &&
- (name = _reserved_name(iter->names, tmp_begin, tmp_end - tmp_begin)))
+ frv = (const struct dm_report_field_reserved_value *) iter->value;
+ if ((frv->field_num == field_num) && (name = _reserved_name(rh, iter, frv, field_num,
+ tmp_begin, tmp_end - tmp_begin)))
break;
} else if (iter->type & type) {
/* DM_REPORT_FIELD_TYPE_* - per-type reserved value */
- if ((name = _reserved_name(iter->names, tmp_begin, tmp_end - tmp_begin)))
+ if ((name = _reserved_name(rh, iter, NULL, field_num,
+ tmp_begin, tmp_end - tmp_begin)))
break;
}
iter++;
@@ -1960,7 +2298,8 @@ static const char *_get_reserved(struct dm_report *rh, unsigned type,
*begin = tmp_begin;
*end = tmp_end;
s = tmp_s;
- *reserved = iter;
+ rvw->reserved = iter;
+ rvw->matched_name = name;
}
return s;
@@ -1991,6 +2330,21 @@ dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator)
}
}
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data)
+{
+ if (!rh->value_cache && (!(rh->value_cache = dm_hash_create(64)))) {
+ log_error("Failed to create cache for values used during reporting.");
+ return 0;
+ }
+
+ return dm_hash_insert(rh->value_cache, name, (void *) data);
+}
+
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name)
+{
+ return (rh->value_cache) ? dm_hash_lookup(rh->value_cache, name) : NULL;
+}
+
/*
* Used to check whether the reserved_values definition passed to
* dm_report_init_with_selection contains only supported reserved value types.
@@ -2004,7 +2358,14 @@ static int _check_reserved_values_supported(const struct dm_report_field_type fi
static uint32_t supported_reserved_types = DM_REPORT_FIELD_TYPE_NUMBER |
DM_REPORT_FIELD_TYPE_SIZE |
DM_REPORT_FIELD_TYPE_PERCENT |
- DM_REPORT_FIELD_TYPE_STRING;
+ DM_REPORT_FIELD_TYPE_STRING |
+ DM_REPORT_FIELD_TYPE_TIME;
+ static uint32_t supported_reserved_types_with_range = DM_REPORT_FIELD_RESERVED_VALUE_RANGE |
+ DM_REPORT_FIELD_TYPE_NUMBER |
+ DM_REPORT_FIELD_TYPE_SIZE |
+ DM_REPORT_FIELD_TYPE_PERCENT |
+ DM_REPORT_FIELD_TYPE_TIME;
+
if (!reserved_values)
return 1;
@@ -2012,8 +2373,10 @@ static int _check_reserved_values_supported(const struct dm_report_field_type fi
iter = reserved_values;
while (iter->value) {
- if (iter->type) {
- if (!(iter->type & supported_reserved_types)) {
+ if (iter->type & DM_REPORT_FIELD_TYPE_MASK) {
+ if (!(iter->type & supported_reserved_types) ||
+ ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+ !(iter->type & supported_reserved_types_with_range))) {
log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
"global reserved value for type 0x%x not supported",
iter->type);
@@ -2022,7 +2385,9 @@ static int _check_reserved_values_supported(const struct dm_report_field_type fi
} else {
field_res = (const struct dm_report_field_reserved_value *) iter->value;
field = &fields[field_res->field_num];
- if (!(field->flags & supported_reserved_types)) {
+ if (!(field->flags & supported_reserved_types) ||
+ ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+ !(iter->type & supported_reserved_types_with_range))) {
log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
"field-specific reserved value of type 0x%x for "
"field %s not supported",
@@ -2049,10 +2414,10 @@ static const char *_tok_value_regex(struct dm_report *rh,
const struct dm_report_field_type *ft,
const char *s, const char **begin,
const char **end, uint32_t *flags,
- const struct dm_report_reserved_value **reserved)
+ struct reserved_value_wrapper *rvw)
{
char c;
- *reserved = NULL;
+ rvw->reserved = NULL;
s = _skip_space(s);
@@ -2081,8 +2446,8 @@ static const char *_tok_value_regex(struct dm_report *rh,
static int _str_list_item_cmp(const void *a, const void *b)
{
- const struct dm_str_list **item_a = (const struct dm_str_list **) a;
- const struct dm_str_list **item_b = (const struct dm_str_list **) b;
+ const struct dm_str_list * const *item_a = (const struct dm_str_list * const *) a;
+ const struct dm_str_list * const *item_b = (const struct dm_str_list * const *) b;
return strcmp((*item_a)->str, (*item_b)->str);
}
@@ -2257,6 +2622,456 @@ bad:
return s;
}
+struct time_value {
+ int range;
+ time_t t1;
+ time_t t2;
+};
+
+static const char *_out_of_range_msg = "Field selection value %s out of supported range for field %s.";
+
+/*
+ * Standard formatted date and time - ISO8601.
+ *
+ * date time timezone
+ *
+ * date:
+ * YYYY-MM-DD (or shortly YYYYMMDD)
+ * YYYY-MM (shortly YYYYMM), auto DD=1
+ * YYYY, auto MM=01 and DD=01
+ *
+ * time:
+ * hh:mm:ss (or shortly hhmmss)
+ * hh:mm (or shortly hhmm), auto ss=0
+ * hh (or shortly hh), auto mm=0, auto ss=0
+ *
+ * timezone:
+ * +hh:mm or -hh:mm (or shortly +hhmm or -hhmm)
+ * +hh or -hh
+*/
+
+#define DELIM_DATE '-'
+#define DELIM_TIME ':'
+
+static int _days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+static int _is_leap_year(long year)
+{
+ return (((year % 4==0) && (year % 100 != 0)) || (year % 400 == 0));
+}
+
+static int _get_days_in_month(long month, long year)
+{
+ return (month == 2 && _is_leap_year(year)) ? _days_in_month[month-1] + 1
+ : _days_in_month[month-1];
+}
+
+typedef enum {
+ RANGE_NONE,
+ RANGE_SECOND,
+ RANGE_MINUTE,
+ RANGE_HOUR,
+ RANGE_DAY,
+ RANGE_MONTH,
+ RANGE_YEAR
+} time_range_t;
+
+static char *_get_date(char *str, struct tm *tm, time_range_t *range)
+{
+ static const char incorrect_date_format_msg[] = "Incorrect date format.";
+ time_range_t tmp_range = RANGE_NONE;
+ long n1, n2 = -1, n3 = -1;
+ char *s = str, *end;
+ size_t len = 0;
+
+ if (!isdigit(*s))
+ /* we need a year at least */
+ return NULL;
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_DATE) {
+ len += (4 - (end - s)); /* diff in length from standard YYYY */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard MM */
+ if (*end == DELIM_DATE) {
+ s = end + 1;
+ n3 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard DD */
+ }
+ }
+ }
+
+ len = len + end - str;
+
+ /* variations from standard YYYY-MM-DD */
+ if (n3 == -1) {
+ if (n2 == -1) {
+ if (len == 4) {
+ /* YYYY */
+ tmp_range = RANGE_YEAR;
+ n3 = n2 = 1;
+ } else if (len == 6) {
+ /* YYYYMM */
+ tmp_range = RANGE_MONTH;
+ n3 = 1;
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else if (len == 8) {
+ tmp_range = RANGE_DAY;
+ /* YYYYMMDD */
+ n3 = n1 % 100;
+ n2 = (n1 / 100) % 100;
+ n1 = n1 / 10000;
+ } else {
+ log_error(incorrect_date_format_msg);
+ return NULL;
+ }
+ } else {
+ if (len == 7) {
+ tmp_range = RANGE_MONTH;
+ /* YYYY-MM */
+ n3 = 1;
+ } else {
+ log_error(incorrect_date_format_msg);
+ return NULL;
+ }
+ }
+ }
+
+ if (n2 < 1 || n2 > 12) {
+ log_error("Specified month out of range.");
+ return NULL;
+ }
+
+ if (n3 < 1 || n3 > _get_days_in_month(n2, n1)) {
+ log_error("Specified day out of range.");
+ return NULL;
+ }
+
+ if (tmp_range == RANGE_NONE)
+ tmp_range = RANGE_DAY;
+
+ tm->tm_year = n1 - 1900;
+ tm->tm_mon = n2 - 1;
+ tm->tm_mday = n3;
+ *range = tmp_range;
+
+ return (char *) _skip_space(end);
+}
+
+static char *_get_time(char *str, struct tm *tm, time_range_t *range)
+{
+ static const char incorrect_time_format_msg[] = "Incorrect time format.";
+ time_range_t tmp_range = RANGE_NONE;
+ long n1, n2 = -1, n3 = -1;
+ char *s = str, *end;
+ size_t len = 0;
+
+ if (!isdigit(*s)) {
+ /* time is not compulsory */
+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+ return (char *) _skip_space(s);
+ }
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_TIME) {
+ len += (2 - (end - s)); /* diff in length from standard HH */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard MM */
+ if (*end == DELIM_TIME) {
+ s = end + 1;
+ n3 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard SS */
+ }
+ }
+ }
+
+ len = len + end - str;
+
+ /* variations from standard HH:MM:SS */
+ if (n3 == -1) {
+ if (n2 == -1) {
+ if (len == 2) {
+ /* HH */
+ tmp_range = RANGE_HOUR;
+ n3 = n2 = 0;
+ } else if (len == 4) {
+ /* HHMM */
+ tmp_range = RANGE_MINUTE;
+ n3 = 0;
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else if (len == 6) {
+ /* HHMMSS */
+ tmp_range = RANGE_SECOND;
+ n3 = n1 % 100;
+ n2 = (n1 / 100) % 100;
+ n1 = n1 / 10000;
+ } else {
+ log_error(incorrect_time_format_msg);
+ return NULL;
+ }
+ } else {
+ if (len == 5) {
+ /* HH:MM */
+ tmp_range = RANGE_MINUTE;
+ n3 = 0;
+ } else {
+ log_error(incorrect_time_format_msg);
+ return NULL;
+ }
+ }
+ }
+
+ if (n1 < 0 || n1 > 23) {
+ log_error("Specified hours out of range.");
+ return NULL;
+ }
+
+ if (n2 < 0 || n2 > 60) {
+ log_error("Specified minutes out of range.");
+ return NULL;
+ }
+
+ if (n3 < 0 || n3 > 60) {
+ log_error("Specified seconds out of range.");
+ return NULL;
+ }
+
+ /* Just time without exact date is incomplete! */
+ if (*range != RANGE_DAY) {
+ log_error("Full date specification needed.");
+ return NULL;
+ }
+
+ tm->tm_hour = n1;
+ tm->tm_min = n2;
+ tm->tm_sec = n3;
+ *range = tmp_range;
+
+ return (char *) _skip_space(end);
+}
+
+/* The offset is always an absolute offset against GMT! */
+static char *_get_tz(char *str, int *tz_supplied, int *offset)
+{
+ long n1, n2 = -1;
+ char *s = str, *end;
+ int sign = 1; /* +HH:MM by default */
+ size_t len = 0;
+
+ *tz_supplied = 0;
+ *offset = 0;
+
+ if (!isdigit(*s)) {
+ if (*s == '+') {
+ sign = 1;
+ s = s + 1;
+ } else if (*s == '-') {
+ sign = -1;
+ s = s + 1;
+ } else
+ return (char *) _skip_space(s);
+ }
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_TIME) {
+ len = (2 - (end - s)); /* diff in length from standard HH */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len = (2 - (end - s)); /* diff in length from standard MM */
+ }
+ }
+
+ len = len + end - s;
+
+ /* variations from standard HH:MM */
+ if (n2 == -1) {
+ if (len == 2) {
+ /* HH */
+ n2 = 0;
+ } else if (len == 4) {
+ /* HHMM */
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else
+ return NULL;
+ }
+
+ if (n2 < 0 || n2 > 60)
+ return NULL;
+
+ if (n1 < 0 || n1 > 14)
+ return NULL;
+
+ /* timezone offset in seconds */
+ *offset = sign * ((n1 * 3600) + (n2 * 60));
+ *tz_supplied = 1;
+ return (char *) _skip_space(end);
+}
+
+static int _local_tz_offset(time_t t_local)
+{
+ struct tm tm_gmt;
+ time_t t_gmt;
+
+ gmtime_r(&t_local, &tm_gmt);
+ t_gmt = mktime(&tm_gmt);
+
+ /*
+ * gmtime returns time that is adjusted
+ * for DST.Subtract this adjustment back
+ * to give us proper *absolute* offset
+ * for our local timezone.
+ */
+ if (tm_gmt.tm_isdst)
+ t_gmt -= 3600;
+
+ return t_local - t_gmt;
+}
+
+static void _get_final_time(time_range_t range, struct tm *tm,
+ int tz_supplied, int offset,
+ struct time_value *tval)
+{
+
+ struct tm tm_up = *tm;
+
+ switch (range) {
+ case RANGE_SECOND:
+ if (tm_up.tm_sec < 59) {
+ tm_up.tm_sec += 1;
+ break;
+ }
+ case RANGE_MINUTE:
+ if (tm_up.tm_min < 59) {
+ tm_up.tm_min += 1;
+ break;
+ }
+ case RANGE_HOUR:
+ if (tm_up.tm_hour < 23) {
+ tm_up.tm_hour += 1;
+ break;
+ }
+ case RANGE_DAY:
+ if (tm_up.tm_mday < _get_days_in_month(tm_up.tm_mon, tm_up.tm_year)) {
+ tm_up.tm_mday += 1;
+ break;
+ }
+ case RANGE_MONTH:
+ if (tm_up.tm_mon < 11) {
+ tm_up.tm_mon += 1;
+ break;
+ }
+ case RANGE_YEAR:
+ tm_up.tm_year += 1;
+ break;
+ case RANGE_NONE:
+ /* nothing to do here */
+ break;
+ }
+
+ tval->range = (range != RANGE_NONE);
+ tval->t1 = mktime(tm);
+ tval->t2 = mktime(&tm_up) - 1;
+
+ if (tz_supplied) {
+ /*
+ * The 'offset' is with respect to the GMT.
+ * Calculate what the offset is with respect
+ * to our local timezone and adjust times
+ * so they represent time in our local timezone.
+ */
+ offset -= _local_tz_offset(tval->t1);
+ tval->t1 -= offset;
+ tval->t2 -= offset;
+ }
+}
+
+static int _parse_formatted_date_time(char *str, struct time_value *tval)
+{
+ time_range_t range = RANGE_NONE;
+ struct tm tm = {0};
+ int gmt_offset;
+ int tz_supplied;
+
+ tm.tm_year = tm.tm_mday = tm.tm_mon = -1;
+ tm.tm_hour = tm.tm_min = tm.tm_sec = -1;
+ tm.tm_isdst = tm.tm_wday = tm.tm_yday = -1;
+
+ if (!(str = _get_date(str, &tm, &range)))
+ return 0;
+
+ if (!(str = _get_time(str, &tm, &range)))
+ return 0;
+
+ if (!(str = _get_tz(str, &tz_supplied, &gmt_offset)))
+ return 0;
+
+ if (*str)
+ return 0;
+
+ _get_final_time(range, &tm, tz_supplied, gmt_offset, tval);
+
+ return 1;
+}
+
+static const char *_tok_value_time(const struct dm_report_field_type *ft,
+ struct dm_pool *mem, const char *s,
+ const char **begin, const char **end,
+ struct time_value *tval)
+{
+ char *time_str = NULL;
+ const char *r = NULL;
+ uint64_t t;
+ char c;
+
+ s = _skip_space(s);
+
+ if (*s == '@') {
+ /* Absolute time value in number of seconds since epoch. */
+ if (!(s = _tok_value_number(s+1, begin, end)))
+ goto_out;
+
+ if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+ log_error("_tok_value_time: dm_pool_strndup failed");
+ goto out;
+ }
+
+ if (((t = strtoull(time_str, NULL, 10)) == ULLONG_MAX) && errno == ERANGE) {
+ log_error(_out_of_range_msg, time_str, ft->id);
+ goto out;
+ }
+
+ tval->range = 0;
+ tval->t1 = (time_t) t;
+ tval->t2 = 0;
+ r = s;
+ } else {
+ c = _get_and_skip_quote_char(&s);
+ if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL)))
+ goto_out;
+
+ if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+ log_error("tok_value_time: dm_pool_strndup failed");
+ goto out;
+ }
+
+ if (!_parse_formatted_date_time(time_str, tval))
+ goto_out;
+ r = s;
+ }
+out:
+ if (time_str)
+ dm_pool_free(mem, time_str);
+ return r;
+}
+
/*
* Input:
* ft - field type for which the value is parsed
@@ -2275,19 +3090,28 @@ static const char *_tok_value(struct dm_report *rh,
const char *s,
const char **begin, const char **end,
uint32_t *flags,
- const struct dm_report_reserved_value **reserved,
+ struct reserved_value_wrapper *rvw,
struct dm_pool *mem, void *custom)
{
int expected_type = ft->flags & DM_REPORT_FIELD_TYPE_MASK;
struct selection_str_list **str_list;
+ struct time_value *tval;
uint64_t *factor;
const char *tmp;
char c;
s = _skip_space(s);
- s = _get_reserved(rh, expected_type, field_num, implicit, s, begin, end, reserved);
- if (*reserved) {
+ s = _get_reserved(rh, expected_type, field_num, implicit, s, begin, end, rvw);
+ if (rvw->reserved) {
+ /*
+ * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+ * so adjust flags here based on expected type.
+ */
+ if (expected_type == DM_REPORT_FIELD_TYPE_TIME)
+ *flags &= ~FLD_CMP_NUMBER;
+ else if (expected_type == DM_REPORT_FIELD_TYPE_NUMBER)
+ *flags &= ~FLD_CMP_TIME;
*flags |= expected_type;
return s;
}
@@ -2359,6 +3183,28 @@ static const char *_tok_value(struct dm_report *rh,
}
*flags |= expected_type;
+ /*
+ * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+ * but we have NUMBER here, so remove FLD_CMP_TIME.
+ */
+ *flags &= ~FLD_CMP_TIME;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_TIME:
+ tval = (struct time_value *) custom;
+ if (!(s = _tok_value_time(ft, mem, s, begin, end, tval))) {
+ log_error("Failed to parse time value "
+ "for selection field %s.", ft->id);
+ return NULL;
+ }
+
+ *flags |= DM_REPORT_FIELD_TYPE_TIME;
+ /*
+ * FLD_CMP_TIME shares operators with FLD_CMP_NUMBER,
+ * but we have TIME here, so remove FLD_CMP_NUMBER.
+ */
+ *flags &= ~FLD_CMP_NUMBER;
+ break;
}
return s;
@@ -2389,12 +3235,45 @@ static const char *_tok_field_name(const char *s,
return s;
}
-static const void *_get_reserved_value(const struct dm_report_reserved_value *reserved)
+static int _get_reserved_value(struct dm_report *rh, uint32_t field_num,
+ struct reserved_value_wrapper *rvw)
{
- if (reserved->type)
- return reserved->value;
+ const void *tmp_value;
+ dm_report_reserved_handler handler;
+ int r;
+
+ if (!rvw->reserved) {
+ rvw->value = NULL;
+ return 1;
+ }
+
+ if (rvw->reserved->type & DM_REPORT_FIELD_TYPE_MASK)
+ /* type reserved value */
+ tmp_value = rvw->reserved->value;
else
- return ((const struct dm_report_field_reserved_value *) reserved->value)->value;
+ /* per-field reserved value */
+ tmp_value = ((const struct dm_report_field_reserved_value *) rvw->reserved->value)->value;
+
+ if (rvw->reserved->type & (DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE | DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES)) {
+ handler = (dm_report_reserved_handler) tmp_value;
+ if ((r = handler(rh, rh->selection->mem, field_num,
+ DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+ rvw->matched_name, &tmp_value) <= 0)) {
+ if (r == -1)
+ log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing"
+ "implementation of DM_REPORT_RESERVED_GET_DYNAMIC_VALUE action",
+ (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ else
+ log_error("Error occured while processing %s reserved value handler for field %s",
+ (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ return 0;
+ }
+ }
+
+ rvw->value = tmp_value;
+ return 1;
}
static struct field_selection *_create_field_selection(struct dm_report *rh,
@@ -2403,15 +3282,16 @@ static struct field_selection *_create_field_selection(struct dm_report *rh,
const char *v,
size_t len,
uint32_t flags,
- const struct dm_report_reserved_value *reserved,
+ struct reserved_value_wrapper *rvw,
void *custom)
{
- static const char *_out_of_range_msg = "Field selection value %s out of supported range for field %s.";
+ static const char *_field_selection_value_alloc_failed_msg = "dm_report: struct field_selection_value allocation failed for selection field %s";
const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
: rh->fields;
struct field_properties *fp, *found = NULL;
struct field_selection *fs;
const char *field_id;
+ struct time_value *tval;
uint64_t factor;
char *s;
@@ -2443,9 +3323,29 @@ static struct field_selection *_create_field_selection(struct dm_report *rh,
"allocation failed for selection field %s", field_id);
return NULL;
}
+
+ if (!(fs->value = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+ log_error(_field_selection_value_alloc_failed_msg, field_id);
+ goto error;
+ }
+
+ if (((rvw->reserved && (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)) ||
+ (((flags & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_TIME) && ((struct time_value *) custom)->range))
+ &&
+ !(fs->value->next = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+ log_error(_field_selection_value_alloc_failed_msg, field_id);
+ goto error;
+ }
+
fs->fp = found;
fs->flags = flags;
+ if (!_get_reserved_value(rh, field_num, rvw)) {
+ log_error("dm_report: could not get reserved value "
+ "while processing selection field %s", field_id);
+ goto error;
+ }
+
/* store comparison operand */
if (flags & FLD_CMP_REGEX) {
/* REGEX */
@@ -2457,96 +3357,118 @@ static struct field_selection *_create_field_selection(struct dm_report *rh,
memcpy(s, v, len);
s[len] = '\0';
- fs->v.r = dm_regex_create(rh->selection->mem, (const char **) &s, 1);
+ fs->value->v.r = dm_regex_create(rh->selection->mem, (const char * const *) &s, 1);
dm_free(s);
- if (!fs->v.r) {
+ if (!fs->value->v.r) {
log_error("dm_report: failed to create regex "
"matcher for selection field %s", field_id);
goto error;
}
} else {
- /* STRING, NUMBER, SIZE or STRING_LIST */
- if (!(s = dm_pool_alloc(rh->selection->mem, len + 1))) {
- log_error("dm_report: dm_pool_alloc failed to store "
- "value for selection field %s", field_id);
+ /* STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME */
+ if (!(s = dm_pool_strndup(rh->selection->mem, v, len))) {
+ log_error("dm_report: dm_pool_strndup for value "
+ "of selection field %s", field_id);
goto error;
}
- memcpy(s, v, len);
- s[len] = '\0';
switch (flags & DM_REPORT_FIELD_TYPE_MASK) {
case DM_REPORT_FIELD_TYPE_STRING:
- if (reserved) {
- fs->v.s = (const char *) _get_reserved_value(reserved);
+ if (rvw->value) {
+ fs->value->v.s = (const char *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.s = (((const char * const *) rvw->value)[1]);
dm_pool_free(rh->selection->mem, s);
} else {
- fs->v.s = s;
- if (_check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_STRING, fs->v.s)) {
- log_error("String value %s found in selection is reserved.", fs->v.s);
+ fs->value->v.s = s;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING, fs->value->v.s, NULL)) {
+ log_error("String value %s found in selection is reserved.", fs->value->v.s);
goto error;
}
}
break;
case DM_REPORT_FIELD_TYPE_NUMBER:
- if (reserved)
- fs->v.i = *(uint64_t *) _get_reserved_value(reserved);
- else {
- if (((fs->v.i = strtoull(s, NULL, 10)) == ULLONG_MAX) &&
+ if (rvw->value) {
+ fs->value->v.i = *(const uint64_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+ } else {
+ if (((fs->value->v.i = strtoull(s, NULL, 10)) == ULLONG_MAX) &&
(errno == ERANGE)) {
log_error(_out_of_range_msg, s, field_id);
goto error;
}
- if (_check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_NUMBER, &fs->v.i)) {
- log_error("Numeric value %" PRIu64 " found in selection is reserved.", fs->v.i);
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &fs->value->v.i, NULL)) {
+ log_error("Numeric value %" PRIu64 " found in selection is reserved.", fs->value->v.i);
goto error;
}
}
dm_pool_free(rh->selection->mem, s);
break;
case DM_REPORT_FIELD_TYPE_SIZE:
- if (reserved)
- fs->v.d = (double) * (uint64_t *) _get_reserved_value(reserved);
- else {
- fs->v.d = strtod(s, NULL);
+ if (rvw->value) {
+ fs->value->v.d = *(const double *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.d = (((const double *) rvw->value)[1]);
+ } else {
+ fs->value->v.d = strtod(s, NULL);
if (errno == ERANGE) {
log_error(_out_of_range_msg, s, field_id);
goto error;
}
- if (custom && (factor = *((uint64_t *)custom)))
- fs->v.d *= factor;
- fs->v.d /= 512; /* store size in sectors! */
- if (_check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_SIZE, &fs->v.d)) {
- log_error("Size value %f found in selection is reserved.", fs->v.d);
+ if (custom && (factor = *((const uint64_t *)custom)))
+ fs->value->v.d *= factor;
+ fs->value->v.d /= 512; /* store size in sectors! */
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &fs->value->v.d, NULL)) {
+ log_error("Size value %f found in selection is reserved.", fs->value->v.d);
goto error;
}
}
dm_pool_free(rh->selection->mem, s);
break;
case DM_REPORT_FIELD_TYPE_PERCENT:
- if (reserved)
- fs->v.i = *(uint64_t *) _get_reserved_value(reserved);
- else {
- fs->v.d = strtod(s, NULL);
- if ((errno == ERANGE) || (fs->v.d < 0) || (fs->v.d > 100)) {
+ if (rvw->value) {
+ fs->value->v.i = *(const uint64_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+ } else {
+ fs->value->v.d = strtod(s, NULL);
+ if ((errno == ERANGE) || (fs->value->v.d < 0) || (fs->value->v.d > 100)) {
log_error(_out_of_range_msg, s, field_id);
goto error;
}
- fs->v.i = (dm_percent_t) (DM_PERCENT_1 * fs->v.d);
+ fs->value->v.i = (dm_percent_t) (DM_PERCENT_1 * fs->value->v.d);
- if (_check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_PERCENT, &fs->v.i)) {
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_PERCENT, &fs->value->v.i, NULL)) {
log_error("Percent value %s found in selection is reserved.", s);
goto error;
}
}
break;
case DM_REPORT_FIELD_TYPE_STRING_LIST:
- fs->v.l = *(struct selection_str_list **)custom;
- if (_check_value_is_reserved(rh, DM_REPORT_FIELD_TYPE_STRING_LIST, fs->v.l)) {
+ fs->value->v.l = *(struct selection_str_list **)custom;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING_LIST, fs->value->v.l, NULL)) {
log_error("String list value found in selection is reserved.");
goto error;
}
break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ if (rvw->value) {
+ fs->value->v.t = *(const time_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.t = (((const time_t *) rvw->value)[1]);
+ } else {
+ tval = (struct time_value *) custom;
+ fs->value->v.t = tval->t1;
+ if (tval->range)
+ fs->value->next->v.t = tval->t2;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &fs->value->v.t, NULL)) {
+ log_error("Time value found in selection is reserved.");
+ goto error;
+ }
+ }
+ break;
default:
log_error(INTERNAL_ERROR "_create_field_selection: "
"unknown type of selection field %s", field_id);
@@ -2593,7 +3515,7 @@ static void _display_selection_help(struct dm_report *rh)
log_warn(" size - Floating point value with units, 'm' unit used by default if not specified.");
log_warn(" percent - Non-negative integer with or without %% suffix.");
log_warn(" string - Characters quoted by \' or \" or unquoted.");
- log_warn(" string list - Strings enclosed by [ ] and elements delimited by either");
+ log_warn(" string list - Strings enclosed by [ ] or { } and elements delimited by either");
log_warn(" \"all items must match\" or \"at least one item must match\" operator.");
log_warn(" regular expression - Characters quoted by \' or \" or unquoted.");
log_warn(" ");
@@ -2638,7 +3560,7 @@ out_reserved_values:
log_warn(" Comparison operators:");
t = _op_cmp;
for (; t->string; t++)
- log_warn(" %4s - %s", t->string, t->desc);
+ log_warn(" %6s - %s", t->string, t->desc);
log_warn(" ");
log_warn(" Logical and grouping operators:");
t = _op_log;
@@ -2682,7 +3604,8 @@ static struct selection_node *_parse_selection(struct dm_report *rh,
int implicit;
const struct dm_report_field_type *ft;
struct selection_str_list *str_list;
- const struct dm_report_reserved_value *reserved;
+ struct reserved_value_wrapper rvw = {0};
+ struct time_value tval;
uint64_t factor;
void *custom = NULL;
char *tmp;
@@ -2733,39 +3656,54 @@ static struct selection_node *_parse_selection(struct dm_report *rh,
goto bad;
}
- /* some operators can compare only numeric fields (NUMBER, SIZE or PERCENT) */
- if ((flags & FLD_CMP_NUMBER) &&
- (ft->flags != DM_REPORT_FIELD_TYPE_NUMBER) &&
- (ft->flags != DM_REPORT_FIELD_TYPE_SIZE) &&
- (ft->flags != DM_REPORT_FIELD_TYPE_PERCENT)) {
- _display_selection_help(rh);
- log_error("Operator can be used only with number, size or percent fields: %s", ws);
- goto bad;
- }
-
/* comparison value */
if (flags & FLD_CMP_REGEX) {
- if (!(last = _tok_value_regex(rh, ft, last, &vs, &ve, &flags, &reserved)))
+ /*
+ * REGEX value
+ */
+ if (!(last = _tok_value_regex(rh, ft, last, &vs, &ve, &flags, &rvw)))
goto_bad;
} else {
+ /*
+ * STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME value
+ */
+ if (flags & FLD_CMP_NUMBER) {
+ if (!(ft->flags & (DM_REPORT_FIELD_TYPE_NUMBER |
+ DM_REPORT_FIELD_TYPE_SIZE |
+ DM_REPORT_FIELD_TYPE_PERCENT |
+ DM_REPORT_FIELD_TYPE_TIME))) {
+ _display_selection_help(rh);
+ log_error("Operator can be used only with number, size, time or percent fields: %s", ws);
+ goto bad;
+ }
+ } else if (flags & FLD_CMP_TIME) {
+ if (!(ft->flags & DM_REPORT_FIELD_TYPE_TIME)) {
+ _display_selection_help(rh);
+ log_error("Operator can be used only with time fields: %s", ws);
+ goto bad;
+ }
+ }
+
if (ft->flags == DM_REPORT_FIELD_TYPE_SIZE ||
ft->flags == DM_REPORT_FIELD_TYPE_NUMBER ||
ft->flags == DM_REPORT_FIELD_TYPE_PERCENT)
custom = &factor;
+ else if (ft->flags & DM_REPORT_FIELD_TYPE_TIME)
+ custom = &tval;
else if (ft->flags == DM_REPORT_FIELD_TYPE_STRING_LIST)
custom = &str_list;
else
custom = NULL;
if (!(last = _tok_value(rh, ft, field_num, implicit,
last, &vs, &ve, &flags,
- &reserved, rh->selection->mem, custom)))
+ &rvw, rh->selection->mem, custom)))
goto_bad;
}
*next = _skip_space(last);
/* create selection */
- if (!(fs = _create_field_selection(rh, field_num, implicit, vs, (size_t) (ve - vs), flags, reserved, custom)))
+ if (!(fs = _create_field_selection(rh, field_num, implicit, vs, (size_t) (ve - vs), flags, &rvw, custom)))
return_NULL;
/* create selection node */
@@ -2982,9 +3920,6 @@ static int _report_headings(struct dm_report *rh)
char *buf = NULL;
size_t buf_size = 0;
- if (rh->flags & RH_HEADINGS_PRINTED)
- return 1;
-
rh->flags |= RH_HEADINGS_PRINTED;
if (!(rh->flags & DM_REPORT_OUTPUT_HEADINGS))
@@ -3041,8 +3976,12 @@ static int _report_headings(struct dm_report *rh)
log_error("dm_report: Failed to generate report headings for printing");
goto bad;
}
- log_print("%s", (char *) dm_pool_end_object(rh->mem));
+ /* print all headings */
+ heading = (char *) dm_pool_end_object(rh->mem);
+ log_print("%s", heading);
+
+ dm_pool_free(rh->mem, (void *)heading);
dm_free(buf);
return 1;
@@ -3053,6 +3992,14 @@ static int _report_headings(struct dm_report *rh)
return 0;
}
+int dm_report_column_headings(struct dm_report *rh)
+{
+ /* Columns-as-rows does not use _report_headings. */
+ if (rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS)
+ return 1;
+ return _report_headings(rh);
+}
+
/*
* Sort rows of data
*/
@@ -3067,7 +4014,8 @@ static int _row_compare(const void *a, const void *b)
sfa = (*rowa->sort_fields)[cnt];
sfb = (*rowb->sort_fields)[cnt];
if ((sfa->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) ||
- (sfa->props->flags & DM_REPORT_FIELD_TYPE_SIZE)) {
+ (sfa->props->flags & DM_REPORT_FIELD_TYPE_SIZE) ||
+ (sfa->props->flags & DM_REPORT_FIELD_TYPE_TIME)) {
const uint64_t numa =
*(const uint64_t *) sfa->sort_value;
const uint64_t numb =
@@ -3230,6 +4178,19 @@ bad:
return 0;
}
+static void _destroy_rows(struct dm_report *rh)
+{
+ /*
+ * free the first row allocated to this report: since this is a
+ * pool allocation this will also free all subsequently allocated
+ * rows from the report and any associated string data.
+ */
+ if(rh->first_row)
+ dm_pool_free(rh->mem, rh->first_row);
+ rh->first_row = NULL;
+ dm_list_init(&rh->rows);
+}
+
static int _output_as_rows(struct dm_report *rh)
{
const struct dm_report_field_type *fields;
@@ -3237,6 +4198,7 @@ static int _output_as_rows(struct dm_report *rh)
struct dm_report_field *field;
struct row *row;
+PFL();
dm_list_iterate_items(fp, &rh->field_props) {
if (fp->flags & FLD_HIDDEN) {
dm_list_iterate_items(row, &rh->rows) {
@@ -3285,6 +4247,8 @@ static int _output_as_rows(struct dm_report *rh)
log_print("%s", (char *) dm_pool_end_object(rh->mem));
}
+ _destroy_rows(rh);
+
return 1;
bad:
@@ -3333,8 +4297,7 @@ static int _output_as_columns(struct dm_report *rh)
dm_list_del(&row->list);
}
- if (row)
- dm_pool_free(rh->mem, row);
+ _destroy_rows(rh);
return 1;
@@ -3343,11 +4306,15 @@ static int _output_as_columns(struct dm_report *rh)
return 0;
}
+int dm_report_is_empty(struct dm_report *rh)
+{
+ return dm_list_empty(&rh->rows) ? 1 : 0;
+}
+
int dm_report_output(struct dm_report *rh)
{
if (dm_list_empty(&rh->rows))
return 1;
-
if ((rh->flags & RH_SORT_REQUIRED))
_sort_rows(rh);
diff --git a/libdm/libdm-stats.c b/libdm/libdm-stats.c
new file mode 100644
index 000000000..4b4135af8
--- /dev/null
+++ b/libdm/libdm-stats.c
@@ -0,0 +1,1390 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "dmlib.h"
+
+#define DM_STATS_REGION_NOT_PRESENT UINT64_MAX
+
+#define NSEC_PER_MSEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+
+/*
+ * See Documentation/device-mapper/statistics.txt for full descriptions
+ * of the device-mapper statistics counter fields.
+ */
+struct dm_stats_counters {
+ uint64_t reads; /* Num reads completed */
+ uint64_t reads_merged; /* Num reads merged */
+ uint64_t read_sectors; /* Num sectors read */
+ uint64_t read_nsecs; /* Num milliseconds spent reading */
+ uint64_t writes; /* Num writes completed */
+ uint64_t writes_merged; /* Num writes merged */
+ uint64_t write_sectors; /* Num sectors written */
+ uint64_t write_nsecs; /* Num milliseconds spent writing */
+ uint64_t io_in_progress; /* Num I/Os currently in progress */
+ uint64_t io_nsecs; /* Num milliseconds spent doing I/Os */
+ uint64_t weighted_io_nsecs; /* Weighted num milliseconds doing I/Os */
+ uint64_t total_read_nsecs; /* Total time spent reading in milliseconds */
+ uint64_t total_write_nsecs; /* Total time spent writing in milliseconds */
+};
+
+struct dm_stats_region {
+ uint64_t region_id; /* as returned by @stats_list */
+ uint64_t start;
+ uint64_t len;
+ uint64_t step;
+ char *program_id;
+ char *aux_data;
+ uint64_t timescale; /* precise_timestamps is per-region */
+ struct dm_stats_counters *counters;
+};
+
+struct dm_stats {
+ /* device binding */
+ int major; /* device major that this dm_stats object is bound to */
+ int minor; /* device minor that this dm_stats object is bound to */
+ char *name; /* device-mapper device name */
+ char *uuid; /* device-mapper UUID */
+ char *program_id; /* default program_id for this handle */
+ struct dm_pool *mem; /* memory pool for region and counter tables */
+ uint64_t nr_regions; /* total number of present regions */
+ uint64_t max_region; /* size of the regions table */
+ uint64_t interval_ns; /* sampling interval in nanoseconds */
+ uint64_t timescale; /* sample value multiplier */
+ struct dm_stats_region *regions;
+ /* statistics cursor */
+ uint64_t cur_region;
+ uint64_t cur_area;
+};
+
+#define PROC_SELF_COMM "/proc/self/comm"
+static char *_program_id_from_proc(void)
+{
+ FILE *comm = NULL;
+ char buf[256];
+
+ if (!(comm = fopen(PROC_SELF_COMM, "r")))
+ return_NULL;
+
+ if (!fgets(buf, sizeof(buf), comm)) {
+ log_error("Could not read from %s", PROC_SELF_COMM);
+ if(fclose(comm))
+ stack;
+ return NULL;
+ }
+
+ if (fclose(comm))
+ stack;
+
+ return dm_strdup(buf);
+}
+
+struct dm_stats *dm_stats_create(const char *program_id)
+{
+ struct dm_stats *dms = NULL;
+
+ if (!(dms = dm_zalloc(sizeof(*dms))))
+ return_NULL;
+ if (!(dms->mem = dm_pool_create("stats_pool", 4096)))
+ goto_out;
+
+ if (!program_id || !strlen(program_id))
+ dms->program_id = _program_id_from_proc();
+ else
+ dms->program_id = dm_strdup(program_id);
+
+ dms->major = -1;
+ dms->minor = -1;
+ dms->name = NULL;
+ dms->uuid = NULL;
+
+ /* all regions currently use msec precision */
+ dms->timescale = NSEC_PER_MSEC;
+
+ dms->nr_regions = DM_STATS_REGION_NOT_PRESENT;
+ dms->max_region = DM_STATS_REGION_NOT_PRESENT;
+ dms->regions = NULL;
+
+ return dms;
+out:
+ dm_free(dms);
+ return NULL;
+}
+
+/**
+ * Test whether the stats region pointed to by region is present.
+ */
+static int _stats_region_present(const struct dm_stats_region *region)
+{
+ return !(region->region_id == DM_STATS_REGION_NOT_PRESENT);
+}
+
+static void _stats_region_destroy(struct dm_stats_region *region)
+{
+ if (!_stats_region_present(region))
+ return;
+
+ /**
+ * Don't free counters here explicitly; it will be dropped
+ * from the pool along with the corresponding regions table.
+ */
+
+ if (region->program_id)
+ dm_free(region->program_id);
+ if (region->aux_data)
+ dm_free(region->aux_data);
+}
+
+static void _stats_regions_destroy(struct dm_stats *dms)
+{
+ struct dm_pool *mem = dms->mem;
+ uint64_t i;
+
+ if (!dms->regions)
+ return;
+
+ /* walk backwards to obey pool order */
+ for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--)
+ _stats_region_destroy(&dms->regions[i]);
+ dm_pool_free(mem, dms->regions);
+}
+
+static int _set_stats_device(struct dm_stats *dms, struct dm_task *dmt)
+{
+ if (dms->name)
+ return dm_task_set_name(dmt, dms->name);
+ if (dms->uuid)
+ return dm_task_set_uuid(dmt, dms->uuid);
+ if (dms->major > 0)
+ return dm_task_set_major(dmt, dms->major)
+ && dm_task_set_minor(dmt, dms->minor);
+ return_0;
+}
+
+static int _stats_bound(struct dm_stats *dms)
+{
+ if (dms->major > 0 || dms->name || dms->uuid)
+ return 1;
+ /* %p format specifier expects a void pointer. */
+ log_debug("Stats handle at %p is not bound.", (void *) dms);
+ return 0;
+}
+
+static void _stats_clear_binding(struct dm_stats *dms)
+{
+ if (dms->name)
+ dm_pool_free(dms->mem, dms->name);
+ if (dms->uuid)
+ dm_pool_free(dms->mem, dms->uuid);
+
+ dms->name = dms->uuid = NULL;
+ dms->major = dms->minor = -1;
+}
+
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+
+ dms->major = major;
+ dms->minor = minor;
+
+ return 1;
+}
+
+int dm_stats_bind_name(struct dm_stats *dms, const char *name)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+
+ if (!(dms->name = dm_pool_strdup(dms->mem, name)))
+ return_0;
+
+ return 1;
+}
+
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+
+ if (!(dms->uuid = dm_pool_strdup(dms->mem, uuid)))
+ return_0;
+
+ return 1;
+}
+
+static struct dm_task *_stats_send_message(struct dm_stats *dms, char *msg)
+{
+ struct dm_task *dmt;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+ return_0;
+
+ if (!_set_stats_device(dms, dmt))
+ goto_out;
+
+ if (!dm_task_set_message(dmt, msg))
+ goto_out;
+
+ if (!dm_task_run(dmt))
+ goto_out;
+
+ return dmt;
+out:
+ dm_task_destroy(dmt);
+ return NULL;
+}
+
+static int _stats_parse_list_region(struct dm_stats_region *region, char *line)
+{
+ /* FIXME: the kernel imposes no length limit here */
+ char program_id[256], aux_data[256];
+ int r;
+
+ /* line format:
+ * <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+ */
+ r = sscanf(line, FMTu64 ": " FMTu64 "+" FMTu64 " " FMTu64 "%255s %255s",
+ &region->region_id, &region->start, &region->len, &region->step,
+ program_id, aux_data);
+
+ if (r != 6)
+ return_0;
+
+ if (!strcmp(program_id, "-"))
+ program_id[0] = '\0';
+ if (!strcmp(aux_data, "-"))
+ aux_data[0] = '\0';
+
+ if (!(region->program_id = dm_strdup(program_id)))
+ return_0;
+ if (!(region->aux_data = dm_strdup(aux_data))) {
+ dm_free(region->program_id);
+ return_0;
+ }
+
+ region->counters = NULL;
+ return 1;
+}
+
+static int _stats_parse_list(struct dm_stats *dms, const char *resp)
+{
+ struct dm_pool *mem = dms->mem;
+ struct dm_stats_region cur;
+ uint64_t max_region = 0, nr_regions = 0;
+ FILE *list_rows;
+ /* FIXME: determine correct maximum line length based on kernel format */
+ char line[256];
+
+ if (!resp) {
+ log_error("Could not parse NULL @stats_list response.");
+ return 0;
+ }
+
+ if (dms->regions)
+ _stats_regions_destroy(dms);
+
+ /* no regions */
+ if (!strlen(resp)) {
+ dms->nr_regions = dms->max_region = 0;
+ dms->regions = NULL;
+ return 1;
+ }
+
+ /*
+ * dm_task_get_message_response() returns a 'const char *' but
+ * since fmemopen also permits "w" it expects a 'char *'.
+ */
+ if (!(list_rows = fmemopen((char *)resp, strlen(resp), "r")))
+ return_0;
+
+ if (!dm_pool_begin_object(mem, 1024))
+ goto_out;
+
+ while(fgets(line, sizeof(line), list_rows)) {
+
+ if (!_stats_parse_list_region(&cur, line))
+ goto_out;
+
+ /* handle holes in the list of region_ids */
+ if (cur.region_id > max_region) {
+ struct dm_stats_region fill;
+ memset(&fill, 0, sizeof(fill));
+ fill.region_id = DM_STATS_REGION_NOT_PRESENT;
+ do {
+ if (!dm_pool_grow_object(mem, &fill, sizeof(fill)))
+ goto_out;
+ } while (max_region++ < (cur.region_id - 1));
+ }
+
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_out;
+
+ max_region++;
+ nr_regions++;
+ }
+
+ dms->nr_regions = nr_regions;
+ dms->max_region = max_region - 1;
+ dms->regions = dm_pool_end_object(mem);
+
+ if (fclose(list_rows))
+ stack;
+
+ return 1;
+out:
+ if(fclose(list_rows))
+ stack;
+ dm_pool_abandon_object(mem);
+ return 0;
+}
+
+int dm_stats_list(struct dm_stats *dms, const char *program_id)
+{
+ struct dm_task *dmt;
+ char msg[256];
+ int r;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ /* allow zero-length program_id for list */
+ if (!program_id)
+ program_id = dms->program_id;
+
+ r = dm_snprintf(msg, sizeof(msg), "@stats_list %s", program_id);
+
+ if (r < 0) {
+ log_error("Failed to prepare stats message.");
+ return 0;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ return 0;
+
+ if (!_stats_parse_list(dms, dm_task_get_message_response(dmt))) {
+ log_error("Could not parse @stats_list response.");
+ goto out;
+ }
+
+ dm_task_destroy(dmt);
+ return 1;
+
+out:
+ dm_task_destroy(dmt);
+ return 0;
+}
+
+static int _stats_parse_region(struct dm_pool *mem, const char *resp,
+ struct dm_stats_region *region,
+ uint64_t timescale)
+{
+ struct dm_stats_counters cur;
+ FILE *stats_rows = NULL;
+ uint64_t start, len;
+ char row[256];
+ int r;
+
+ if (!resp) {
+ log_error("Could not parse empty @stats_print response.");
+ return 0;
+ }
+
+ region->start = UINT64_MAX;
+
+ if (!dm_pool_begin_object(mem, 512))
+ goto_out;
+
+ /*
+ * dm_task_get_message_response() returns a 'const char *' but
+ * since fmemopen also permits "w" it expects a 'char *'.
+ */
+ stats_rows = fmemopen((char *)resp, strlen(resp), "r");
+ if (!stats_rows)
+ goto_out;
+
+ /*
+ * Output format for each step-sized area of a region:
+ *
+ * <start_sector>+<length> counters
+ *
+ * The first 11 counters have the same meaning as
+ * /sys/block/ * /stat or /proc/diskstats.
+ *
+ * Please refer to Documentation/iostats.txt for details.
+ *
+ * 1. the number of reads completed
+ * 2. the number of reads merged
+ * 3. the number of sectors read
+ * 4. the number of milliseconds spent reading
+ * 5. the number of writes completed
+ * 6. the number of writes merged
+ * 7. the number of sectors written
+ * 8. the number of milliseconds spent writing
+ * 9. the number of I/Os currently in progress
+ * 10. the number of milliseconds spent doing I/Os
+ * 11. the weighted number of milliseconds spent doing I/Os
+ *
+ * Additional counters:
+ * 12. the total time spent reading in milliseconds
+ * 13. the total time spent writing in milliseconds
+ *
+ */
+ while (fgets(row, sizeof(row), stats_rows)) {
+ r = sscanf(row, FMTu64 "+" FMTu64 /* start+len */
+ /* reads */
+ FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+ /* writes */
+ FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+ /* in flight & io nsecs */
+ FMTu64 " " FMTu64 " " FMTu64 " "
+ /* tot read/write nsecs */
+ FMTu64 " " FMTu64, &start, &len,
+ &cur.reads, &cur.reads_merged, &cur.read_sectors,
+ &cur.read_nsecs,
+ &cur.writes, &cur.writes_merged, &cur.write_sectors,
+ &cur.write_nsecs,
+ &cur.io_in_progress,
+ &cur.io_nsecs, &cur.weighted_io_nsecs,
+ &cur.total_read_nsecs, &cur.total_write_nsecs);
+ if (r != 15) {
+ log_error("Could not parse @stats_print row.");
+ goto out;
+ }
+
+ /* scale time values up if needed */
+ if (timescale != 1) {
+ cur.read_nsecs *= timescale;
+ cur.write_nsecs *= timescale;
+ cur.io_nsecs *= timescale;
+ cur.weighted_io_nsecs *= timescale;
+ cur.total_read_nsecs *= timescale;
+ cur.total_write_nsecs *= timescale;
+ }
+
+ if(!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_out;
+ if (region->start == UINT64_MAX) {
+ region->start = start;
+ region->step = len; /* area size is always uniform. */
+ }
+ }
+
+ region->len = (start + len) - region->start;
+ region->timescale = timescale;
+ region->counters = dm_pool_end_object(mem);
+
+ if (fclose(stats_rows))
+ stack;
+
+ return 1;
+
+out:
+
+ if (stats_rows)
+ if(fclose(stats_rows))
+ stack;
+ dm_pool_abandon_object(mem);
+ return 0;
+}
+
+static uint64_t _nr_areas(uint64_t len, uint64_t step)
+{
+ /* Default is one area. */
+ if (!len || !step)
+ return 1;
+ /*
+ * drivers/md/dm-stats.c::message_stats_create()
+ * A region may be sub-divided into areas with their own counters.
+ * Any partial area at the end of the region is treated as an
+ * additional complete area.
+ */
+ return (len + step - 1) / step;
+}
+
+static uint64_t _nr_areas_region(struct dm_stats_region *region)
+{
+ return _nr_areas(region->len, region->step);
+}
+
+static void _stats_walk_next(const struct dm_stats *dms, int region,
+ uint64_t *cur_r, uint64_t *cur_a)
+{
+ struct dm_stats_region *cur = NULL;
+ int present;
+
+ if (!dms || !dms->regions)
+ return;
+
+ cur = &dms->regions[*cur_r];
+ present = _stats_region_present(cur);
+
+ if (region && present)
+ *cur_a = _nr_areas_region(cur);
+
+ if (region || !present || ++(*cur_a) == _nr_areas_region(cur)) {
+ *cur_a = 0;
+ while(!dm_stats_region_present(dms, ++(*cur_r))
+ && *cur_r < dms->max_region)
+ ; /* keep walking until a present region is found
+ * or the end of the table is reached. */
+ }
+
+}
+
+static void _stats_walk_start(const struct dm_stats *dms,
+ uint64_t *cur_r, uint64_t *cur_a)
+{
+ if (!dms || !dms->regions)
+ return;
+
+ *cur_r = 0;
+ *cur_a = 0;
+
+ /* advance to the first present region */
+ if (!dm_stats_region_present(dms, dms->cur_region))
+ _stats_walk_next(dms, 0, cur_r, cur_a);
+}
+
+void dm_stats_walk_start(struct dm_stats *dms)
+{
+ _stats_walk_start(dms, &dms->cur_region, &dms->cur_area);
+}
+
+void dm_stats_walk_next(struct dm_stats *dms)
+{
+ _stats_walk_next(dms, 0, &dms->cur_region, &dms->cur_area);
+}
+
+void dm_stats_walk_next_region(struct dm_stats *dms)
+{
+ _stats_walk_next(dms, 1, &dms->cur_region, &dms->cur_area);
+}
+
+static int _stats_walk_end(const struct dm_stats *dms,
+ uint64_t *cur_r, uint64_t *cur_a)
+{
+ struct dm_stats_region *region = NULL;
+ int end = 0;
+
+ if (!dms || !dms->regions)
+ return 1;
+
+ region = &dms->regions[*cur_r];
+ end = (*cur_r > dms->max_region
+ || (*cur_r == dms->max_region
+ && *cur_a >= _nr_areas_region(region)));
+
+ return end;
+}
+
+int dm_stats_walk_end(struct dm_stats *dms)
+{
+ return _stats_walk_end(dms, &dms->cur_region, &dms->cur_area);
+}
+
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ struct dm_stats_region *region = &dms->regions[region_id];
+ return _nr_areas_region(region);
+}
+
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_nr_areas(dms, dms->cur_region);
+}
+
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms)
+{
+ uint64_t nr_areas = 0;
+ /* use a separate cursor */
+ uint64_t cur_region, cur_area;
+
+ _stats_walk_start(dms, &cur_region, &cur_area);
+ do {
+ nr_areas += dm_stats_get_current_nr_areas(dms);
+ _stats_walk_next(dms, 1, &cur_region, &cur_area);
+ } while (!_stats_walk_end(dms, &cur_region, &cur_area));
+ return nr_areas;
+}
+
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ const char *program_id, const char *aux_data)
+{
+ struct dm_task *dmt = NULL;
+ char msg[1024], range[64];
+ const char *err_fmt = "Could not prepare @stats_create %s.";
+ const char *resp;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!program_id || !strlen(program_id))
+ program_id = dms->program_id;
+
+ if (start || len) {
+ if (!dm_snprintf(range, sizeof(range), FMTu64 "+" FMTu64,
+ start, len)) {
+ log_error(err_fmt, "range");
+ goto out;
+ }
+ }
+
+ if (!dm_snprintf(msg, sizeof(msg), "@stats_create %s %s" FMTu64 " %s %s",
+ (start || len) ? range : "-",
+ (step < 0) ? "/" : "",
+ (uint64_t)llabs(step), program_id, aux_data)) {
+ log_error(err_fmt, "message");
+ goto out;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ goto out;
+
+ resp = dm_task_get_message_response(dmt);
+ if (!resp) {
+ log_error("Could not parse empty @stats_create response.");
+ goto out;
+ }
+
+ if (region_id) {
+ char *endptr = NULL;
+ *region_id = strtoull(resp, &endptr, 10);
+ if (resp == endptr)
+ goto_out;
+ }
+
+ dm_task_destroy(dmt);
+
+ return 1;
+out:
+ if(dmt)
+ dm_task_destroy(dmt);
+ return 0;
+}
+
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id)
+{
+ struct dm_task *dmt;
+ char msg[1024];
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!dm_snprintf(msg, sizeof(msg), "@stats_delete " FMTu64, region_id)) {
+ log_error("Could not prepare @stats_delete message.");
+ goto out;
+ }
+
+ dmt = _stats_send_message(dms, msg);
+ if (!dmt)
+ goto_out;
+ dm_task_destroy(dmt);
+ return 1;
+
+out:
+ return 0;
+}
+
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id)
+{
+ struct dm_task *dmt;
+ char msg[1024];
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!dm_snprintf(msg, sizeof(msg), "@stats_clear " FMTu64, region_id)) {
+ log_error("Could not prepare @stats_clear message.");
+ goto out;
+ }
+
+ dmt = _stats_send_message(dms, msg);
+ if (!dmt)
+ goto_out;
+ dm_task_destroy(dmt);
+ return 1;
+
+out:
+ return 0;
+}
+
+static struct dm_task *_stats_print_region(struct dm_stats *dms,
+ uint64_t region_id, unsigned start_line,
+ unsigned num_lines, unsigned clear)
+{
+ struct dm_task *dmt = NULL;
+ /* @stats_print[_clear] <region_id> [<start_line> <num_lines>] */
+ const char *clear_str = "_clear", *lines_fmt = "%u %u";
+ const char *msg_fmt = "@stats_print%s " FMTu64 " %s";
+ const char *err_fmt = "Could not prepare @stats_print %s.";
+ char msg[1024], lines[64];
+
+ if (start_line || num_lines)
+ if (!dm_snprintf(lines, sizeof(lines),
+ lines_fmt, start_line, num_lines)) {
+ log_error(err_fmt, "row specification");
+ goto out;
+ }
+
+ if (!dm_snprintf(msg, sizeof(msg), msg_fmt, (clear) ? clear_str : "",
+ region_id, (start_line || num_lines) ? lines : "")) {
+ log_error(err_fmt, "message");
+ goto out;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ goto out;
+
+ return dmt;
+
+out:
+ return NULL;
+}
+
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+ unsigned start_line, unsigned num_lines,
+ unsigned clear)
+{
+ char *resp = NULL;
+ struct dm_task *dmt = NULL;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ dmt = _stats_print_region(dms, region_id,
+ start_line, num_lines, clear);
+
+ if (!dmt)
+ return 0;
+
+ resp = dm_pool_strdup(dms->mem, dm_task_get_message_response(dmt));
+ dm_task_destroy(dmt);
+
+ if (!resp)
+ log_error("Could not allocate memory for response buffer.");
+
+ return resp;
+}
+
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer)
+{
+ dm_pool_free(dms->mem, buffer);
+}
+
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms)
+{
+ if (!dms || !dms->regions)
+ return 0;
+ return dms->nr_regions;
+}
+
+/**
+ * Test whether region_id is present in this set of stats data
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id)
+{
+ if (!dms->regions)
+ return 0;
+
+ if (region_id > dms->max_region)
+ return 0;
+
+ return _stats_region_present(&dms->regions[region_id]);
+}
+
+static int _dm_stats_populate_region(struct dm_stats *dms, uint64_t region_id,
+ const char *resp)
+{
+ struct dm_stats_region *region = &dms->regions[region_id];
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!_stats_parse_region(dms->mem, resp, region, dms->timescale)) {
+ log_error("Could not parse @stats_print message response.");
+ return 0;
+ }
+ region->region_id = region_id;
+ return 1;
+}
+
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+ uint64_t region_id)
+{
+ int all_regions = (region_id == DM_STATS_REGIONS_ALL);
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ /* allow zero-length program_id for populate */
+ if (!program_id)
+ program_id = dms->program_id;
+
+ if (all_regions && !dm_stats_list(dms, program_id)) {
+ log_error("Could not parse @stats_list response.");
+ goto out;
+ }
+
+ /* successful list but no regions registered */
+ if (!dms->nr_regions)
+ return 0;
+
+ dm_stats_walk_start(dms);
+ do {
+ struct dm_task *dmt = NULL; /* @stats_print task */
+ const char *resp;
+
+ region_id = (all_regions)
+ ? dm_stats_get_current_region(dms) : region_id;
+
+ /* obtain all lines and clear counter values */
+ if (!(dmt = _stats_print_region(dms, region_id, 0, 0, 1)))
+ goto_out;
+
+ resp = dm_task_get_message_response(dmt);
+ if (!_dm_stats_populate_region(dms, region_id, resp)) {
+ dm_task_destroy(dmt);
+ goto_out;
+ }
+
+ dm_task_destroy(dmt);
+ dm_stats_walk_next_region(dms);
+
+ } while (all_regions && !dm_stats_walk_end(dms));
+
+ return 1;
+
+out:
+ _stats_regions_destroy(dms);
+ dms->regions = NULL;
+ return 0;
+}
+
+/**
+ * destroy a dm_stats object and all associated regions and counter sets.
+ */
+void dm_stats_destroy(struct dm_stats *dms)
+{
+ _stats_regions_destroy(dms);
+ _stats_clear_binding(dms);
+ dm_pool_destroy(dms->mem);
+ dm_free(dms->program_id);
+ dm_free(dms);
+}
+
+/**
+ * Methods for accessing counter fields. All methods share the
+ * following naming scheme and prototype:
+ *
+ * uint64_t dm_stats_get_COUNTER(struct dm_stats *, uint64_t, uint64_t)
+ *
+ * Where the two integer arguments are the region_id and area_id
+ * respectively.
+ */
+#define MK_STATS_GET_COUNTER_FN(counter) \
+uint64_t dm_stats_get_ ## counter(const struct dm_stats *dms, \
+ uint64_t region_id, uint64_t area_id) \
+{ \
+ region_id = (region_id == DM_STATS_REGION_CURRENT) \
+ ? dms->cur_region : region_id ; \
+ area_id = (area_id == DM_STATS_REGION_CURRENT) \
+ ? dms->cur_area : area_id ; \
+ return dms->regions[region_id].counters[area_id].counter; \
+}
+
+MK_STATS_GET_COUNTER_FN(reads)
+MK_STATS_GET_COUNTER_FN(reads_merged)
+MK_STATS_GET_COUNTER_FN(read_sectors)
+MK_STATS_GET_COUNTER_FN(read_nsecs)
+MK_STATS_GET_COUNTER_FN(writes)
+MK_STATS_GET_COUNTER_FN(writes_merged)
+MK_STATS_GET_COUNTER_FN(write_sectors)
+MK_STATS_GET_COUNTER_FN(write_nsecs)
+MK_STATS_GET_COUNTER_FN(io_in_progress)
+MK_STATS_GET_COUNTER_FN(io_nsecs)
+MK_STATS_GET_COUNTER_FN(weighted_io_nsecs)
+MK_STATS_GET_COUNTER_FN(total_read_nsecs)
+MK_STATS_GET_COUNTER_FN(total_write_nsecs)
+#undef MK_STATS_GET_COUNTER_FN
+
+int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *rrqm = ((double) c->reads_merged) / (double) dms->interval_ns;
+ return 1;
+}
+
+int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *wrqm,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *wrqm = ((double) c->writes_merged) / (double) dms->interval_ns;
+ return 1;
+}
+
+int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *rd_s = ((double) c->reads * NSEC_PER_SEC) / (double) dms->interval_ns;
+ return 1;
+}
+
+int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *wr_s = ((double) c->writes * (double) NSEC_PER_SEC)
+ / (double) dms->interval_ns;
+
+ return 1;
+}
+
+int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms, double *rsec_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *rsec_s = ((double) c->read_sectors * (double) NSEC_PER_SEC)
+ / (double) dms->interval_ns;
+
+ return 1;
+}
+
+int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms, double *wsec_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ *wsec_s = ((double) c->write_sectors * (double) NSEC_PER_SEC)
+ / (double) dms->interval_ns;
+ return 1;
+}
+
+int dm_stats_get_average_request_size(const struct dm_stats *dms, double *arqsz,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t nr_ios, nr_sectors;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ *arqsz = 0.0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ nr_ios = c->reads + c->writes;
+ nr_sectors = c->read_sectors + c->write_sectors;
+ if (nr_ios)
+ *arqsz = (double) nr_sectors / (double) nr_ios;
+ return 1;
+}
+
+int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t io_ticks;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ *qusz = 0.0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ io_ticks = c->weighted_io_nsecs;
+ if (io_ticks)
+ *qusz = (double) io_ticks / (double) dms->interval_ns;
+ return 1;
+}
+
+int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t io_ticks, nr_ios;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ *await = 0.0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ io_ticks = c->read_nsecs + c->write_nsecs;
+ nr_ios = c->reads + c->writes;
+ if (nr_ios)
+ *await = (double) io_ticks / (double) nr_ios;
+ return 1;
+}
+
+int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t rd_io_ticks, nr_rd_ios;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ *await = 0.0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ rd_io_ticks = c->read_nsecs;
+ nr_rd_ios = c->reads;
+ if (rd_io_ticks)
+ *await = (double) rd_io_ticks / (double) nr_rd_ios;
+ return 1;
+}
+
+int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t wr_io_ticks, nr_wr_ios;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ *await = 0.0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+ wr_io_ticks = c->write_nsecs;
+ nr_wr_ios = c->writes;
+ if (wr_io_ticks && nr_wr_ios)
+ *await = (double) wr_io_ticks / (double) nr_wr_ios;
+ return 1;
+}
+
+int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm,
+ uint64_t region_id, uint64_t area_id)
+{
+ dm_percent_t util;
+ double tput;
+
+ if (!dm_stats_get_throughput(dms, &tput, region_id, area_id))
+ return 0;
+
+ if (!dm_stats_get_utilization(dms, &util, region_id, area_id))
+ return 0;
+
+ /* avoid NAN with zero counter values */
+ if ( (uint64_t) tput == 0 || (uint64_t) util == 0) {
+ *svctm = 0.0;
+ return 1;
+ }
+ *svctm = ((double) NSEC_PER_SEC * dm_percent_to_float(util))
+ / (100.0 * tput);
+ return 1;
+}
+
+int dm_stats_get_throughput(const struct dm_stats *dms, double *tput,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+
+ *tput = (( NSEC_PER_SEC * ((double) c->reads + (double) c->writes))
+ / (double) (dms->interval_ns));
+ return 1;
+}
+
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_counters *c;
+ uint64_t io_nsecs;
+
+ if (!dms->interval_ns)
+ return_0;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ c = &(dms->regions[region_id].counters[area_id]);
+
+ /**
+ * If io_nsec > interval_ns there is something wrong with the clock
+ * for the last interval; do not allow a value > 100% utilization
+ * to be passed to a dm_make_percent() call. We expect to see these
+ * at startup if counters have not been cleared before the first read.
+ */
+ io_nsecs = (c->io_nsecs <= dms->interval_ns) ? c->io_nsecs : dms->interval_ns;
+ *util = dm_make_percent(io_nsecs, dms->interval_ns);
+
+ return 1;
+}
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms, uint64_t interval_ms)
+{
+ /* All times use nsecs internally. */
+ dms->interval_ns = interval_ms * NSEC_PER_MSEC;
+}
+
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms, uint64_t interval_ns)
+{
+ dms->interval_ns = interval_ns;
+}
+
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms)
+{
+ /* All times use nsecs internally. */
+ return (dms->interval_ns / NSEC_PER_MSEC);
+}
+
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms)
+{
+ /* All times use nsecs internally. */
+ return (dms->interval_ns);
+}
+
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+ const char *program_id)
+{
+ if (!allow_empty && (!program_id || !strlen(program_id))) {
+ log_error("Empty program_id not permitted without "
+ "allow_empty=1");
+ return 0;
+ }
+
+ if (!program_id)
+ program_id = "";
+
+ if (dms->program_id)
+ dm_free(dms->program_id);
+
+ if (!(dms->program_id = dm_strdup(program_id)))
+ return_0;
+
+ return 1;
+}
+
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms)
+{
+ return dms->cur_region;
+}
+
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms)
+{
+ return dms->cur_area;
+}
+
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+ *start = dms->regions[region_id].start;
+ return 1;
+}
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+ *len = dms->regions[region_id].len;
+ return 1;
+}
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+ *len = dms->regions[region_id].step;
+ return 1;
+}
+
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+ uint64_t *start)
+{
+ return dm_stats_get_region_start(dms, start, dms->cur_region);
+}
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+ uint64_t *len)
+{
+ return dm_stats_get_region_len(dms, len, dms->cur_region);
+}
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+ uint64_t *step)
+{
+ return dm_stats_get_region_area_len(dms, step, dms->cur_region);
+}
+
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_region *region;
+ if (!dms || !dms->regions)
+ return_0;
+ region = &dms->regions[region_id];
+ *start = region->start + region->step * area_id;
+ return 1;
+}
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+ uint64_t region_id, uint64_t area_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+ *offset = dms->regions[region_id].step * area_id;
+ return 1;
+}
+
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+ uint64_t *start)
+{
+ return dm_stats_get_area_start(dms, start,
+ dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+ uint64_t *offset)
+{
+ return dm_stats_get_area_offset(dms, offset,
+ dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+ uint64_t *len)
+{
+ return dm_stats_get_region_area_len(dms, len, dms->cur_region);
+}
+
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ const char *program_id = dms->regions[region_id].program_id;
+ return (program_id) ? program_id : "";
+}
+
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ const char *aux_data = dms->regions[region_id].aux_data;
+ return (aux_data) ? aux_data : "" ;
+}
+
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_program_id(dms, dms->cur_region);
+}
+
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_aux_data(dms, dms->cur_region);
+}
diff --git a/libdm/libdm-string.c b/libdm/libdm-string.c
index bc41b7042..587abfe34 100644
--- a/libdm/libdm-string.c
+++ b/libdm/libdm-string.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006-2012 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
@@ -443,6 +443,161 @@ static int _close_enough(double d1, double d2)
return fabs(d1 - d2) < DBL_EPSILON;
}
+#define BASE_UNKNOWN 0
+#define BASE_SHARED 1
+#define BASE_1024 8
+#define BASE_1000 15
+#define BASE_SPECIAL 21
+#define NUM_UNIT_PREFIXES 6
+#define NUM_SPECIAL 3
+
+#define SIZE_BUF 128
+
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+ char unit_type, int use_si_units,
+ uint64_t unit_factor, int include_suffix,
+ dm_size_suffix_t suffix_type)
+{
+ unsigned base = BASE_UNKNOWN;
+ unsigned s;
+ int precision;
+ uint64_t byte = UINT64_C(0);
+ uint64_t units = UINT64_C(1024);
+ char *size_buf = NULL;
+ char new_unit_type = '\0', unit_type_buf[2];
+ const char * const size_str[][3] = {
+ /* BASE_UNKNOWN */
+ {" ", " ", " "}, /* [0] */
+
+ /* BASE_SHARED - Used if use_si_units = 0 */
+ {" Exabyte", " EB", "E"}, /* [1] */
+ {" Petabyte", " PB", "P"}, /* [2] */
+ {" Terabyte", " TB", "T"}, /* [3] */
+ {" Gigabyte", " GB", "G"}, /* [4] */
+ {" Megabyte", " MB", "M"}, /* [5] */
+ {" Kilobyte", " KB", "K"}, /* [6] */
+ {" Byte ", " B", "B"}, /* [7] */
+
+ /* BASE_1024 - Used if use_si_units = 1 */
+ {" Exbibyte", " EiB", "e"}, /* [8] */
+ {" Pebibyte", " PiB", "p"}, /* [9] */
+ {" Tebibyte", " TiB", "t"}, /* [10] */
+ {" Gibibyte", " GiB", "g"}, /* [11] */
+ {" Mebibyte", " MiB", "m"}, /* [12] */
+ {" Kibibyte", " KiB", "k"}, /* [13] */
+ {" Byte ", " B", "b"}, /* [14] */
+
+ /* BASE_1000 - Used if use_si_units = 1 */
+ {" Exabyte", " EB", "E"}, /* [15] */
+ {" Petabyte", " PB", "P"}, /* [16] */
+ {" Terabyte", " TB", "T"}, /* [17] */
+ {" Gigabyte", " GB", "G"}, /* [18] */
+ {" Megabyte", " MB", "M"}, /* [19] */
+ {" Kilobyte", " kB", "K"}, /* [20] */
+
+ /* BASE_SPECIAL */
+ {" Byte ", " B ", "B"}, /* [21] (shared with BASE_1000) */
+ {" Units ", " Un", "U"}, /* [22] */
+ {" Sectors ", " Se", "S"}, /* [23] */
+ };
+
+ if (!(size_buf = dm_pool_alloc(mem, SIZE_BUF))) {
+ log_error("no memory for size display buffer");
+ return "";
+ }
+
+ if (!use_si_units) {
+ /* Case-independent match */
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (toupper((int) unit_type) ==
+ *size_str[BASE_SHARED + s][2]) {
+ base = BASE_SHARED;
+ break;
+ }
+ } else {
+ /* Case-dependent match for powers of 1000 */
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (unit_type == *size_str[BASE_1000 + s][2]) {
+ base = BASE_1000;
+ break;
+ }
+
+ /* Case-dependent match for powers of 1024 */
+ if (base == BASE_UNKNOWN)
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (unit_type == *size_str[BASE_1024 + s][2]) {
+ base = BASE_1024;
+ break;
+ }
+ }
+
+ if (base == BASE_UNKNOWN)
+ /* Check for special units - s, b or u */
+ for (s = 0; s < NUM_SPECIAL; s++)
+ if (toupper((int) unit_type) ==
+ *size_str[BASE_SPECIAL + s][2]) {
+ base = BASE_SPECIAL;
+ break;
+ }
+
+ if (size == UINT64_C(0)) {
+ if (base == BASE_UNKNOWN)
+ s = 0;
+ sprintf(size_buf, "0%s", include_suffix ? size_str[base + s][suffix_type] : "");
+ return size_buf;
+ }
+
+ size *= UINT64_C(512);
+
+ if (base != BASE_UNKNOWN) {
+ if (!unit_factor) {
+ unit_type_buf[0] = unit_type;
+ unit_type_buf[1] = '\0';
+ if (!(unit_factor = dm_units_to_factor(&unit_type_buf[0], &new_unit_type, 1, NULL)) ||
+ unit_type != new_unit_type) {
+ /* The two functions should match (and unrecognised units get treated like 'h'). */
+ log_error(INTERNAL_ERROR "Inconsistent units: %c and %c.", unit_type, new_unit_type);
+ return "";
+ }
+ }
+ byte = unit_factor;
+ } else {
+ /* Human-readable style */
+ if (unit_type == 'H') {
+ units = UINT64_C(1000);
+ base = BASE_1000;
+ } else {
+ units = UINT64_C(1024);
+ base = BASE_1024;
+ }
+
+ if (!use_si_units)
+ base = BASE_SHARED;
+
+ byte = units * units * units * units * units * units;
+
+ for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++)
+ byte /= units;
+
+ include_suffix = 1;
+ }
+
+ /* FIXME Make precision configurable */
+ switch (toupper(*size_str[base + s][DM_SIZE_UNIT])) {
+ case 'B':
+ case 'S':
+ precision = 0;
+ break;
+ default:
+ precision = 2;
+ }
+
+ snprintf(size_buf, SIZE_BUF - 1, "%.*f%s", precision,
+ (double) size / byte, include_suffix ? size_str[base + s][suffix_type] : "");
+
+ return size_buf;
+}
+
uint64_t dm_units_to_factor(const char *units, char *unit_type,
int strict, const char **endptr)
{
diff --git a/libdm/libdm-timestamp.c b/libdm/libdm-timestamp.c
new file mode 100644
index 000000000..9be93a48a
--- /dev/null
+++ b/libdm/libdm-timestamp.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Abstract out the time methods used so they can be adjusted later -
+ * the results of these routines should stay in-core.
+ */
+
+#include "dmlib.h"
+
+#include <stdlib.h>
+
+#define NSEC_PER_USEC UINT64_C(1000)
+#define NSEC_PER_MSEC UINT64_C(1000000)
+#define NSEC_PER_SEC UINT64_C(1000000000)
+
+/*
+ * The realtime section uses clock_gettime with the CLOCK_MONOTONIC
+ * parameter to prevent issues with time warps
+ * This implementation requires librt.
+ */
+#ifdef HAVE_REALTIME
+
+#include <time.h>
+
+struct dm_timestamp {
+ struct timespec t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+ uint64_t stamp = 0;
+
+ stamp += (uint64_t) ts->t.tv_sec * NSEC_PER_SEC;
+ stamp += (uint64_t) ts->t.tv_nsec;
+
+ return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+ struct dm_timestamp *ts = NULL;
+
+ if (!(ts = dm_zalloc(sizeof(*ts))))
+ stack;
+
+ return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+ if (!ts)
+ return 0;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts->t)) {
+ log_sys_error("clock_gettime", "get_timestamp");
+ ts->t.tv_sec = 0;
+ ts->t.tv_nsec = 0;
+ return 0;
+ }
+
+ return 1;
+}
+
+#else /* ! HAVE_REALTIME */
+
+/*
+ * The !realtime section just uses gettimeofday and is therefore subject
+ * to ntp-type time warps - not sure if should allow that.
+ */
+
+#include <sys/time.h>
+
+struct dm_timestamp {
+ struct timeval t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+ uint64_t stamp = 0;
+
+ stamp += ts->t.tv_sec * NSEC_PER_SEC;
+ stamp += ts->t.tv_usec * NSEC_PER_USEC;
+
+ return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+ struct dm_timestamp *ts;
+
+ if (!(ts = dm_malloc(sizeof(*ts))))
+ stack;
+
+ return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+ if (!ts)
+ return 0;
+
+ if (gettimeofday(&ts->t, NULL)) {
+ log_sys_error("gettimeofday", "get_timestamp");
+ ts->t.tv_sec = 0;
+ ts->t.tv_usec = 0;
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* HAVE_REALTIME */
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ * 0 if ts1 is equal to ts2
+ * 1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+ uint64_t t1, t2;
+
+ t1 = _timestamp_to_uint64(ts1);
+ t2 = _timestamp_to_uint64(ts2);
+
+ if (t2 < t1)
+ return 1;
+
+ if (t1 < t2)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+ uint64_t t1, t2;
+
+ t1 = _timestamp_to_uint64(ts1);
+ t2 = _timestamp_to_uint64(ts2);
+
+ if (t1 > t2)
+ return t1 - t2;
+
+ return t2 - t1;
+}
+
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old)
+{
+ *ts_new = *ts_old;
+}
+
+void dm_timestamp_destroy(struct dm_timestamp *ts)
+{
+ dm_free(ts);
+}
diff --git a/libdm/misc/dm-ioctl.h b/libdm/misc/dm-ioctl.h
index 01a2b9e1b..cdb0c4c20 100644
--- a/libdm/misc/dm-ioctl.h
+++ b/libdm/misc/dm-ioctl.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004 - 2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004 - 2015 Red Hat, Inc. All rights reserved.
*
* This file is released under the LGPL.
*/
@@ -269,9 +269,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 27
+#define DM_VERSION_MINOR 31
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2013-10-30)"
+#define DM_VERSION_EXTRA "-ioctl (2015-03-12)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
@@ -354,4 +354,9 @@ enum {
*/
#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+
#endif /* _LINUX_DM_IOCTL_H */
diff --git a/libdm/mm/dbg_malloc.c b/libdm/mm/dbg_malloc.c
index e26f05eef..ac714807f 100644
--- a/libdm/mm/dbg_malloc.c
+++ b/libdm/mm/dbg_malloc.c
@@ -22,6 +22,22 @@
#include <assert.h>
#include <stdarg.h>
+void *dm_malloc_aux(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_malloc_aux_debug(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+void dm_free_aux(void *p);
+char *dm_strdup_aux(const char *str, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+int dm_dump_memory_debug(void);
+void dm_bounds_check_debug(void);
+
char *dm_strdup_aux(const char *str, const char *file, int line)
{
char *ret;
@@ -279,3 +295,82 @@ void *dm_zalloc_aux(size_t s, const char *file, int line)
return ptr;
}
+
+#ifdef DEBUG_MEM
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_malloc_aux_debug(s, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_zalloc_aux_debug(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str, const char *file, int line)
+{
+ return dm_strdup_aux(str, file, line);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+ dm_free_aux(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
+{
+ return dm_realloc_aux(p, s, file, line);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+ return dm_dump_memory_debug();
+}
+
+void dm_bounds_check_wrapper(void)
+{
+ dm_bounds_check_debug();
+}
+
+#else /* !DEBUG_MEM */
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_malloc_aux(s, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_zalloc_aux(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str,
+ const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ return strdup(str);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+ free(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s,
+ const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ return realloc(p, s);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+ return 1;
+}
+
+void dm_bounds_check_wrapper(void)
+{
+}
+
+#endif /* DEBUG_MEM */
diff --git a/liblvm/Makefile.in b/liblvm/Makefile.in
index d0f8d1b5b..69c12bb48 100644
--- a/liblvm/Makefile.in
+++ b/liblvm/Makefile.in
@@ -81,4 +81,4 @@ liblvm.cflow: $(SOURCES)
cflow: liblvm.cflow
-DISTCLEAN_TARGETS += $(LIB_NAME).pc .exported_symbols_generated
+DISTCLEAN_TARGETS += $(LIB_NAME).pc
diff --git a/liblvm/lvm2app.h b/liblvm/lvm2app.h
index 3692f9a7d..1a5bc0892 100644
--- a/liblvm/lvm2app.h
+++ b/liblvm/lvm2app.h
@@ -231,10 +231,12 @@ typedef struct lvm_property_value {
uint32_t is_string:1;
uint32_t is_integer:1;
uint32_t is_valid:1;
- uint32_t padding:28;
+ uint32_t is_signed:1;
+ uint32_t padding:27;
union {
const char *string;
uint64_t integer;
+ int64_t signed_integer;
} value;
} lvm_property_value_t;
diff --git a/liblvm/lvm_base.c b/liblvm/lvm_base.c
index 31fc0bbf6..5b14c3bdf 100644
--- a/liblvm/lvm_base.c
+++ b/liblvm/lvm_base.c
@@ -45,7 +45,7 @@ static lvm_t _lvm_init(const char *system_dir)
/* create context */
/* FIXME: split create_toolcontext */
/* FIXME: make all globals configurable */
- cmd = create_toolcontext(0, system_dir, 0, 0);
+ cmd = create_toolcontext(0, system_dir, 0, 0, 1, 1);
if (!cmd)
return NULL;
@@ -96,6 +96,7 @@ lvm_t lvm_init(const char *system_dir)
void lvm_quit(lvm_t libh)
{
struct saved_env e = store_user_env((struct cmd_context *)libh);
+ fin_locking();
destroy_toolcontext((struct cmd_context *)libh);
udev_fin_library_context();
restore_user_env(&e);
diff --git a/liblvm/lvm_misc.c b/liblvm/lvm_misc.c
index ff26cdfb2..431d35426 100644
--- a/liblvm/lvm_misc.c
+++ b/liblvm/lvm_misc.c
@@ -88,6 +88,7 @@ struct lvm_property_value get_property(const pv_t pv, const vg_t vg,
v.is_settable = prop.is_settable;
v.is_string = prop.is_string;
v.is_integer = prop.is_integer;
+ v.is_signed = prop.is_signed;
if (v.is_string)
v.value.string = prop.value.string;
if (v.is_integer)
diff --git a/liblvm/lvm_prop.c b/liblvm/lvm_prop.c
index 79ed1132e..4ea868d97 100644
--- a/liblvm/lvm_prop.c
+++ b/liblvm/lvm_prop.c
@@ -41,7 +41,7 @@ SET_PVCREATEPARAMS_NUM_PROPERTY_FN(zero, pvcp->zero)
struct lvm_property_type _lib_properties[] = {
#include "lvm_prop_fields.h"
- { 0, "", 0, 0, 0, { .integer = 0 }, prop_not_implemented_get,
+ { 0, "", 0, 0, 0, 0, { .integer = 0 }, prop_not_implemented_get,
prop_not_implemented_set },
};
diff --git a/liblvm/lvm_pv.c b/liblvm/lvm_pv.c
index 64b01d500..24b27327f 100644
--- a/liblvm/lvm_pv.c
+++ b/liblvm/lvm_pv.c
@@ -16,6 +16,7 @@
#include "lib.h"
#include "metadata.h"
#include "lvm-string.h"
+#include "str_list.h"
#include "lvm_misc.h"
#include "lvm2app.h"
#include "locking.h"
@@ -118,8 +119,14 @@ int lvm_pv_remove(lvm_t libh, const char *pv_name)
int rc = 0;
struct cmd_context *cmd = (struct cmd_context *)libh;
struct saved_env e = store_user_env(cmd);
+ struct dm_list pv_names;
- if (!pvremove_single(cmd, pv_name, NULL, 0, 0))
+ dm_list_init(&pv_names);
+
+ if (!str_list_add(cmd->mem, &pv_names, pv_name))
+ rc = -1;
+
+ if (rc >= 0 && !pvremove_many(cmd, &pv_names, 0, 0))
rc = -1;
restore_user_env(&e);
diff --git a/liblvm/lvm_vg.c b/liblvm/lvm_vg.c
index 76c5c6356..a2d42d23f 100644
--- a/liblvm/lvm_vg.c
+++ b/liblvm/lvm_vg.c
@@ -218,7 +218,7 @@ static vg_t _lvm_vg_open(lvm_t libh, const char *vgname, const char *mode,
return NULL;
}
- vg = vg_read((struct cmd_context *)libh, vgname, NULL, internal_flags);
+ vg = vg_read((struct cmd_context *)libh, vgname, NULL, internal_flags, 0);
if (vg_read_error(vg)) {
/* FIXME: use log_errno either here in inside vg_read */
release_vg(vg);
diff --git a/make.tmpl.in b/make.tmpl.in
index 7efe46b9c..e4f8835fd 100644
--- a/make.tmpl.in
+++ b/make.tmpl.in
@@ -46,14 +46,15 @@ LIBS = @LIBS@
STATIC_LIBS = $(SELINUX_LIBS) $(UDEV_LIBS) $(BLKID_LIBS)
DEFS += @DEFS@
# FIXME set this only where it's needed, not globally?
-CFLAGS += @CFLAGS@
+CFLAGS ?= @COPTIMISE_FLAG@ @CFLAGS@
+LDFLAGS ?= @COPTIMISE_FLAG@ @LDFLAGS@
CLDFLAGS += @CLDFLAGS@
ELDFLAGS += @ELDFLAGS@
LDDEPS += @LDDEPS@
-LDFLAGS += @LDFLAGS@
LIB_SUFFIX = @LIB_SUFFIX@
LVMINTERNAL_LIBS = -llvm-internal $(DAEMON_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
DL_LIBS = @DL_LIBS@
+M_LIBS = @M_LIBS@
PTHREAD_LIBS = @PTHREAD_LIBS@
READLINE_LIBS = @READLINE_LIBS@
SELINUX_LIBS = @SELINUX_LIBS@
@@ -61,6 +62,7 @@ UDEV_CFLAGS = @UDEV_CFLAGS@
UDEV_LIBS = @UDEV_LIBS@
BLKID_CFLAGS = @BLKID_CFLAGS@
BLKID_LIBS = @BLKID_LIBS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
TESTING = @TESTING@
# Setup directory variables
@@ -73,13 +75,15 @@ bindir = $(DESTDIR)@bindir@
confdir = $(DESTDIR)@CONFDIR@/lvm
includedir = $(DESTDIR)@includedir@
libdir = $(DESTDIR)@libdir@
+libexecdir = $(DESTDIR)@libexecdir@
usrlibdir = $(DESTDIR)@usrlibdir@
sbindir = $(DESTDIR)@sbindir@
usrsbindir = $(DESTDIR)@usrsbindir@
datarootdir = @datarootdir@
+datadir = $(DESTDIR)@datadir@
infodir = $(DESTDIR)@infodir@
mandir = $(DESTDIR)@mandir@
-localedir = $(DESTDIR)@LOCALEDIR@
+localedir = $(DESTDIR)@localedir@
staticdir = $(DESTDIR)@STATICDIR@
udevdir = $(DESTDIR)@udevdir@
pkgconfigdir = $(usrlibdir)/pkgconfig
@@ -106,12 +110,14 @@ DEFAULT_MANGLING = @MANGLING@
# Setup vpath search paths for some suffixes
vpath %.c $(srcdir)
+vpath %.cpp $(srcdir)
vpath %.in $(srcdir)
vpath %.po $(srcdir)
vpath %.exported_symbols $(srcdir)
interface = @interface@
interfacebuilddir = $(top_builddir)/libdm/$(interface)
+rpmbuilddir = $(abs_top_builddir)/build
# The number of jobs to run, if blank, defaults to the make standard
ifndef MAKEFLAGS
@@ -136,23 +142,51 @@ INSTALL_ROOT_DIR = $(INSTALL) -m 700 -d
INSTALL_ROOT_DATA = $(INSTALL) -m 600
INSTALL_SCRIPT = $(INSTALL) -p $(M_INSTALL_PROGRAM)
-.SUFFIXES: .c .d .o .so .a .po .pot .mo .dylib
-
-WFLAGS += -Wall -Wundef -Wshadow -Wcast-align -Wwrite-strings \
- -Wmissing-prototypes -Wmissing-declarations -Wnested-externs \
- -Winline -Wmissing-noreturn -Wformat-security -Wredundant-decls \
- -Wpointer-arith -Wuninitialized -Wmissing-include-dirs \
- -Wfloat-equal -Wstrict-prototypes \
- -Wold-style-definition -Wmissing-format-attribute
+.SUFFIXES: .c .cpp .d .o .so .a .po .pot .mo .dylib
+
+WFLAGS +=\
+ -Wall\
+ -Wcast-align\
+ -Wfloat-equal\
+ -Wformat-security\
+ -Winline\
+ -Wmissing-format-attribute\
+ -Wmissing-include-dirs\
+ -Wmissing-noreturn\
+ -Wpointer-arith\
+ -Wredundant-decls\
+ -Wshadow\
+ -Wundef\
+ -Wwrite-strings
+
+WCFLAGS +=\
+ -Wmissing-declarations\
+ -Wmissing-prototypes\
+ -Wnested-externs\
+ -Wold-style-definition\
+ -Wstrict-prototypes\
+ -Wuninitialized
ifeq ("@HAVE_WJUMP@", "yes")
-WFLAGS += -Wjump-misses-init
+WCFLAGS += -Wjump-misses-init
endif
ifeq ("@HAVE_WCLOBBERED@", "yes")
-WFLAGS += -Wclobbered -Wempty-body -Wignored-qualifiers \
- -Wmissing-parameter-type -Wold-style-declaration -Woverride-init \
- -Wtype-limits -Wsync-nand -Wlogical-op
+WFLAGS +=\
+ -Wclobbered\
+ -Wempty-body\
+ -Wignored-qualifiers\
+ -Wlogical-op\
+ -Wtype-limits
+
+WCFLAGS +=\
+ -Wmissing-parameter-type\
+ -Wold-style-declaration\
+ -Woverride-init
+endif
+
+ifeq ("@HAVE_WSYNCNAND@", "yes")
+WFLAGS += -Wsync-nand
endif
ifneq ("@STATIC_LINK@", "yes")
@@ -166,13 +200,18 @@ endif
endif
#WFLAGS += -W -Wno-sign-compare -Wno-unused-parameter -Wno-missing-field-initializers
-#WFLAGS += -Wsign-compare -Wunused-parameter -Wmissing-field-initializers
+#WFLAGS += -Wsign-compare -Wunused-parameter -Wmissing-field-initializers
#WFLAGS += -Wconversion -Wbad-function-cast -Wcast-qual -Waggregate-return -Wpacked
#WFLAGS += -pedantic -std=gnu99
#DEFS += -DDEBUG_CRC32
-CFLAGS += -fPIC @COPTIMISE_FLAG@
-LDFLAGS += @COPTIMISE_FLAG@
+#
+# Avoid recursive extension of CFLAGS
+# by checking whether CFLAGS already has fPIC string
+#
+ifeq (,$(findstring fPIC,$(CFLAGS)))
+
+CFLAGS += -fPIC
ifeq ("@DEBUG@", "yes")
CFLAGS += -g -fno-omit-frame-pointer
@@ -183,8 +222,7 @@ ifeq ("@DEBUG@", "yes")
endif
endif
-ifeq ("@INTL@", "yes")
- DEFS += -DINTL_PACKAGE=\"@INTL_PACKAGE@\" -DLOCALEDIR=\"@LOCALEDIR@\"
+# end of fPIC protection
endif
LDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib
@@ -199,14 +237,6 @@ ifeq ("@DMEVENTD@", "yes")
CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
endif
-ifeq ("@DM_COMPAT@", "yes")
- DEFS += -DDM_COMPAT
-endif
-
-ifeq ("@DM_IOCTLS@", "yes")
- DEFS += -DDM_IOCTLS
-endif
-
# Combination of DEBUG_POOL and DEBUG_ENFORCE_POOL_LOCKING is not suppored.
#DEFS += -DDEBUG_POOL
# Default pool locking is using the crc checksum. With mprotect memory
@@ -232,14 +262,14 @@ LIB_VERSION_DM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)
LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION)
-INCLUDES += -I. -I$(top_builddir)/include
+INCLUDES += -I$(srcdir) -I$(top_builddir)/include
INC_LNS = $(top_builddir)/include/.symlinks_created
DEPS = $(top_builddir)/make.tmpl $(top_srcdir)/VERSION \
$(top_builddir)/Makefile $(INC_LNS)
-OBJECTS = $(SOURCES:%.c=%.o)
+OBJECTS = $(SOURCES:%.c=%.o) $(CXXSOURCES:%.cpp=%.o)
POTFILES = $(SOURCES:%.c=%.pot)
.PHONY: all pofile distclean clean cleandir cflow device-mapper
@@ -250,6 +280,7 @@ POTFILES = $(SOURCES:%.c=%.pot)
.PHONY: $(SUBDIRS) $(SUBDIRS.install) $(SUBDIRS.clean) $(SUBDIRS.distclean)
.PHONY: $(SUBDIRS.pofile) $(SUBDIRS.install_cluster) $(SUBDIRS.cflow)
.PHONY: $(SUBDIRS.device-mapper) $(SUBDIRS.install-device-mapper)
+.PHONY: $(SUBDIRS.generate) generate
SUBDIRS.device-mapper := $(SUBDIRS:=.device-mapper)
SUBDIRS.install := $(SUBDIRS:=.install)
@@ -310,6 +341,9 @@ $(SUBDIRS.pofile):
$(MAKE) -C $(@:.pofile=) pofile
endif
+$(SUBDIRS.generate):
+ $(MAKE) -C $(@:.generate=) generate
+
ifneq ("$(CFLOW_LIST_TARGET)", "")
CLEAN_CFLOW += $(CFLOW_LIST_TARGET)
$(CFLOW_LIST_TARGET): $(CFLOW_LIST)
@@ -361,13 +395,14 @@ cflow: $(CFLOW_TARGET).cflow $(CFLOW_TARGET).tree $(CFLOW_TARGET).rxref $(CFLOW_
endif
endif
-$(TARGETS): $(OBJECTS)
-
%.o: %.c
- $(CC) -c $(INCLUDES) $(DEFS) $(WFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@
+ $(CC) -c $(INCLUDES) $(DEFS) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@
+
+%.o: %.cpp
+ $(CXX) -c $(INCLUDES) $(DEFS) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@
%.pot: %.c Makefile
- $(CC) -E $(INCLUDES) -include $(top_builddir)/include/pogen.h \
+ $(CC) -E $(INCLUDES) $(BLKID_CFLAGS) $(VALGRIND_CFLAGS) $(UDEV_CFLAGS) -include $(top_builddir)/include/pogen.h \
$(DEFS) $(WFLAGS) $(CFLAGS) $< > $@
%.so: %.o
@@ -389,12 +424,14 @@ endif
$(LIB_SHARED): $(LIB_SHARED).$(LIB_VERSION)
$(LN_S) -f $(<F) $@
+CLEAN_TARGETS += $(LDDEPS) .exported_symbols_generated
+
install_lib_shared: $(LIB_SHARED)
$(INSTALL_PROGRAM) -D $< $(libdir)/$(<F).$(LIB_VERSION)
$(INSTALL_DIR) $(usrlibdir)
$(LN_S) -f $(USRLIB_RELPATH)$(<F).$(LIB_VERSION) $(usrlibdir)/$(<F)
-# FIXME: plugins are installed to subdirs
+# FIXME: plugins are installed to subdirs
# and for compatibility links in libdir are created
# when the code is fixed links could be removed.
install_dm_plugin: $(LIB_SHARED)
@@ -416,7 +453,7 @@ $(LIB_STATIC): $(OBJECTS)
set -e; \
FILE=`echo $@ | sed 's/\\//\\\\\\//g;s/\\.d//g'`; \
DEPS=`echo $(DEPS) | sed -e 's/\\//\\\\\\//g'`; \
- $(CC) -MM $(INCLUDES) $(BLKID_CFLAGS) $(DEFS) -o $@ $<; \
+ $(CC) -MM $(INCLUDES) $(BLKID_CFLAGS) $(VALGRIND_CFLAGS) $(UDEV_CFLAGS) $(DEFS) -o $@ $<; \
sed -i "s/\(.*\)\.o[ :]*/$$FILE.o $$FILE.d $$FILE.pot: $$DEPS /g" $@; \
DEPLIST=`sed 's/ \\\\//;s/.*://;' < $@`; \
echo $$DEPLIST | fmt -1 | sed 's/ //g;s/\(.*\)/\1:/' >> $@; \
@@ -425,36 +462,58 @@ $(LIB_STATIC): $(OBJECTS)
%.mo: %.po
$(MSGFMT) -o $@ $<
+CLEAN_TARGETS += \
+ $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \
+ $(SOURCES2:%.c=%.o) $(SOURCES2:%.c=%.d) $(SOURCES2:%.c=%.gcno) $(SOURCES2:%.c=%.gcda) \
+ $(POTFILES) $(CLEAN_CFLOW)
+
cleandir:
- $(RM) $(OBJECTS) $(TARGETS) $(CLEAN_TARGETS) $(CLEAN_CFLOW) $(LDDEPS) \
- $(POTFILES) $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \
- $(SOURCES2:%.c=%.o) $(SOURCES2:%.c=%.d) $(SOURCES2:%.c=%.gcno) $(SOURCES2:%.c=%.gcda) \
- .exported_symbols_generated core
+ifneq (,$(firstword $(CLEAN_DIRS)))
+ $(RM) -r $(CLEAN_DIRS)
+endif
+ $(RM) $(OBJECTS) $(TARGETS) $(CLEAN_TARGETS) core
clean: $(SUBDIRS.clean) cleandir
distclean: cleandir $(SUBDIRS.distclean)
- test -z "$(DISTCLEAN_DIRS)" || $(RM) -r $(DISTCLEAN_DIRS)
- $(RM) $(DISTCLEAN_TARGETS) Makefile
+ifneq (,$(firstword $(DISTCLEAN_DIRS)))
+ $(RM) -r $(DISTCLEAN_DIRS)
+endif
+ $(RM) $(DISTCLEAN_TARGETS) Makefile
-.exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols
+.exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols $(DEPS)
set -e; \
( cat $(srcdir)/.exported_symbols; \
- if test x$(EXPORTED_HEADER) != x; then \
+ if test -n "$(EXPORTED_HEADER)"; then \
$(CC) -E -P $(INCLUDES) $(DEFS) $(EXPORTED_HEADER) | \
$(SED) -ne "/^typedef|}/!s/.*[ *]\($(EXPORTED_FN_PREFIX)_[a-z0-9_]*\)(.*/\1/p"; \
fi \
) > $@
-.export.sym: .exported_symbols_generated
- set -e; (echo "Base {"; echo " global:"; \
- sed "s/^/ /;s/$$/;/" < $<; \
+EXPORTED_UC := $(shell echo $(EXPORTED_FN_PREFIX) | tr '[a-z]' '[A-Z]')
+EXPORTED_SYMBOLS := $(wildcard $(srcdir)/.exported_symbols.Base $(srcdir)/.exported_symbols.$(EXPORTED_UC)_[0-9_]*[0-9])
+
+.export.sym: .exported_symbols_generated $(EXPORTED_SYMBOLS)
+ifeq (,$(firstword $(EXPORTED_SYMBOLS)))
+ set -e; (echo "Base {"; echo " global:";\
+ $(SED) "s/^/ /;s/$$/;/" $<;\
echo " local:"; echo " *;"; echo "};") > $@
+else
+ set -e;\
+ R=$$(sort $^ | uniq -u);\
+ test -z "$$R" || { echo "Mismatch between symbols in shared library and lists in .exported_symbols.* files: $$R"; false; } ;\
+ for i in $(EXPORTED_SYMBOLS); do\
+ echo "$${i##*.} {"; echo " global:";\
+ $(SED) "s/^/ /;s/$$/;/" $$i;\
+ test "$$i" = Base && { echo " local:"; echo " *;"; };\
+ echo "};";\
+ done > $@
+endif
ifeq (,$(findstring $(MAKECMDGOALS),cscope.out cflow clean distclean lcov \
- help check check_local check_cluster check_lvmetad))
+ help check check_local check_cluster check_lvmetad check_lvmpolld))
ifdef SOURCES
- -include $(SOURCES:.c=.d)
+ -include $(SOURCES:.c=.d) $(CXXSOURCES:.cpp=.d)
endif
ifdef SOURCES2
-include $(SOURCES2:.c=.d)
diff --git a/man/Makefile.in b/man/Makefile.in
index c42f07a8b..dc16a76de 100644
--- a/man/Makefile.in
+++ b/man/Makefile.in
@@ -40,17 +40,30 @@ else
LVMETAD =
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+LVMPOLLD = lvmpolld.8
+else
+LVMPOLLD =
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+LVMLOCKD = lvmlockd.8
+else
+LVMLOCKD =
+endif
+
MAN5=lvm.conf.5
-MAN7=
-MAN8=lvm-dumpconfig.8 \
- lvchange.8 lvconvert.8 lvcreate.8 lvdisplay.8 lvextend.8 lvm.8 \
- lvmchange.8 lvmconf.8 lvmdiskscan.8 lvmdump.8 lvmsadc.8 lvmsar.8 \
+MAN7=lvmsystemid.7
+MAN8=lvm-config.8 lvm-dumpconfig.8 lvm-lvpoll.8 \
+ lvchange.8 lvmconfig.8 lvconvert.8 lvcreate.8 lvdisplay.8 lvextend.8 \
+ lvm.8 lvmchange.8 lvmconf.8 lvmdiskscan.8 lvmdump.8 lvmsadc.8 lvmsar.8 \
lvreduce.8 lvremove.8 lvrename.8 lvresize.8 lvs.8 \
lvscan.8 pvchange.8 pvck.8 pvcreate.8 pvdisplay.8 pvmove.8 pvremove.8 \
pvresize.8 pvs.8 pvscan.8 vgcfgbackup.8 vgcfgrestore.8 vgchange.8 \
vgck.8 vgcreate.8 vgconvert.8 vgdisplay.8 vgexport.8 vgextend.8 \
vgimport.8 vgimportclone.8 vgmerge.8 vgmknodes.8 vgreduce.8 vgremove.8 \
- vgrename.8 vgs.8 vgscan.8 vgsplit.8 $(FSADMMAN) $(BLKDEACTIVATEMAN) $(LVMETAD)
+ vgrename.8 vgs.8 vgscan.8 vgsplit.8 $(FSADMMAN) $(LVMETAD) $(LVMPOLLD) \
+ $(LVMLOCKD)
ifneq ("@CLVMD@", "none")
MAN8CLUSTER=clvmd.8
@@ -70,7 +83,7 @@ ifneq ("@THIN@", "none")
MAN7+=lvmthin.7
endif
-MAN8DM=dmsetup.8 $(DMEVENTDMAN)
+MAN8DM=dmsetup.8 dmstats.8 $(DMEVENTDMAN) $(BLKDEACTIVATEMAN)
MAN5DIR=$(mandir)/man5
MAN7DIR=$(mandir)/man7
MAN8DIR=$(mandir)/man8
diff --git a/man/blkdeactivate.8.in b/man/blkdeactivate.8.in
index ebeaeaabe..990be4aac 100644
--- a/man/blkdeactivate.8.in
+++ b/man/blkdeactivate.8.in
@@ -9,9 +9,7 @@ blkdeactivate \(em utility to deactivate block devices
.RB [ \-l \ \fIlvm_options\fP ]
.RB [ \-u ]
.RB [ \-v ]
-.RS
.RI [ device ]
-.RE
.SH DESCRIPTION
blkdeactivate utility deactivates block devices. If a device
is mounted, the utility can unmount it automatically before
@@ -57,7 +55,7 @@ Unmount a mounted device before trying to deactivate it.
Without this option used, a device that is mounted is not deactivated.
.TP
.BR \-v ", " \-\-verbose
-Run in verbose mode.
+Run in verbose mode. Use \-\-vv for even more verbose mode.
.SH EXAMPLES
.sp
Deactivate all supported block devices found in the system. If a device
diff --git a/man/clvmd.8.in b/man/clvmd.8.in
index 9b2bd4e1e..2c7330b0b 100644
--- a/man/clvmd.8.in
+++ b/man/clvmd.8.in
@@ -118,7 +118,7 @@ Display the version of the cluster LVM daemon.
.SH ENVIRONMENT VARIABLES
.TP
.B LVM_CLVMD_BINARY
-The CLVMD binary to use when \fBclmvd\fP restart is requested.
+The CLVMD binary to use when \fBclvmd\fP restart is requested.
Defaults to #CLVMD_PATH#.
.TP
.B LVM_BINARY
diff --git a/man/cmirrord.8.in b/man/cmirrord.8.in
index 8ef67940c..383db65ed 100644
--- a/man/cmirrord.8.in
+++ b/man/cmirrord.8.in
@@ -3,7 +3,7 @@
cmirrord \(em cluster mirror log daemon
.SH SYNOPSIS
-.B cmirrord
+\fBcmirrord\fR [\fB\-f\fR] [\fB\-h\fR]
.SH DESCRIPTION
cmirrord is the daemon that tracks mirror log information in a cluster.
@@ -24,6 +24,12 @@ there are still active cluster mirrors, however, the signal will be
ignored. Active cluster mirrors should be shutdown before stopping the
cluster mirror log daemon.
+.SH OPTIONS
+.IP "\fB\-f\fR, \fB\-\-foreground\fR" 4
+Do not fork and log to the terminal.
+.IP "\fB\-h\fR, \fB\-\-help\fR" 4
+Print usage.
+
.SH SEE ALSO
.BR lvm (8)
.BR clvmd (8)
diff --git a/man/dmsetup.8.in b/man/dmsetup.8.in
index 57fbdf9bd..ded7ca1e0 100644
--- a/man/dmsetup.8.in
+++ b/man/dmsetup.8.in
@@ -32,6 +32,7 @@ dmsetup \(em low level logical volume management
.br
.B dmsetup info
.BR \-c | \-C | \-\-columns
+.RB [ \-\-nameprefixes ]
.RB [ \-\-noheadings ]
.RB [ \-\-separator
.IR separator ]
@@ -42,6 +43,10 @@ dmsetup \(em low level logical volume management
.IR sort_fields ]
.RB [ \-S | \-\-select
.IR Selection ]
+.RB [ \-\-interval
+.IR seconds ]
+.RB [ \-\-count
+.IR count ]
.RI [ device_name ]
.RE
.br
@@ -187,6 +192,10 @@ In some cases these checks may slow down operations noticeably.
.BR \-c | \-C | \-\-columns
Display output in columns rather than as Field: Value lines.
.TP
+.B \-\-count \fIcount
+Specify the number of times to repeat a report. Set this to zero
+continue until interrupted. The default interval is one second.
+.TP
.BR \-h | \-\-help
Outputs a summary of the commands available, optionally including
the list of report fields (synonym with \fBhelp\fP command).
@@ -196,6 +205,12 @@ When returning any table information from the kernel report on the
inactive table instead of the live table.
Requires kernel driver version 4.16.0 or above.
.TP
+.B \-\-interval \fIseconds
+Specify the interval in seconds between successive iterations for
+repeating reports. If \-\-interval is specified but \-\-count is not,
+reports will continue to repeat until interrupted.
+The default interval is one second.
+.TP
.IR \fB\-\-manglename \ { none | hex | auto }
Mangle any character not on a whitelist using mangling_mode when
processing device-mapper device names and UUIDs. The names and UUIDs
@@ -222,6 +237,10 @@ Specify the minor number.
.BR \-n | \-\-notable
When creating a device, don't load any table.
.TP
+.BR \-\-nameprefixes
+Add a "DM_" prefix plus the field name to the output. Useful with --noheadings to produce a list of
+field=value pairs that can be used to set environment variables (for example, in udev(7) rules).
+.TP
.BR \-\-noheadings
Suppress the headings line when using columnar output.
.TP
@@ -355,6 +374,10 @@ Outputs some brief information about the device in the form:
.IR fields ]
.RB [ \-O | \-\-sort
.IR sort_fields ]
+.RB [ \-\-interval
+.IR seconds ]
+.RB [ \-\-count
+.IR count ]
.RI [ device_name ]
.br
Output you can customise.
diff --git a/man/dmstats.8.in b/man/dmstats.8.in
new file mode 100644
index 000000000..bdcf74200
--- /dev/null
+++ b/man/dmstats.8.in
@@ -0,0 +1,715 @@
+.TH DMSTATS 8 "Jul 25 2015" "Linux" "MAINTENANCE COMMANDS"
+.SH NAME
+dmstats \(em device-mapper statistics management
+.SH SYNOPSIS
+.ad l
+.B dmsetup stats
+.I command
+.RB [ options ]
+.br
+
+.B dmstats <command>
+.RB [[
+.IR device_name ]
+.RB |[ \-\-uuid
+.IR uuid ]
+.RB |[ \-\-major
+.IR major
+.RB \-\-minor
+.IR minor ]]
+.br
+
+.B dmstats clear
+.I device_name
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.br
+.B dmstats create
+.I device_name
+.RB [ \-\-alldevices ]
+.RB [[ \-\-areas
+.IR nr_areas ]
+.RB |[ \-\-areasize
+.IR area_size ]]
+.RB [[ \-\-start
+.IR start_sector ]
+.RB [ \-\-length
+.IR length ]
+.RB |[ \-\-segments ]]
+.RB [ \-\-auxdata
+.IR data ]
+.RB [ \-\-programid
+.IR id ]
+.br
+.B dmstats delete
+.I device_name
+.RB [ \-\-alldevices ]
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.br
+.B dmstats help
+.RB [ \-c | \-C | \-\-columns ]
+.br
+.B dmstats list
+.RI [ device_name ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.RB [ \-\-units
+.IR units ]
+.RB [ \-\-nosuffix ]
+.RB [ \-\-nosuffix ]
+.RB [ \-v | \-\-verbose \ [ \-v | \-\-verbose ]
+.br
+.B dmstats print
+.RI [ device_name ]
+.RB [ \-\-clear ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.br
+.B dmstats report
+.RI [ device_name ]
+.RB [ \-\-interval
+.IR seconds ]
+.RB [ \-\-count
+.IR count ]
+.RB [ \-\-units
+.IR units ]
+.RB [ \-\-allprograms ]
+.RB [ \-\-programid
+.IR id ]
+.RB [ \-\-regionid
+.IR id ]
+.RB [ \-O | \-\-sort
+.IR sort_fields ]
+.RB [ \-S | \-\-select
+.IR Selection ]
+.RB [ \-\-units
+.IR units ]
+.RB [ \-\-nosuffix ]
+.br
+.ad b
+.SH DESCRIPTION
+The dmstats program manages IO statistics regions for devices that use
+the device-mapper driver. Statistics regions may be created, deleted,
+listed and reported on using the tool.
+
+The first argument to dmstats is a command.
+
+The second argument is the device name, uuid, or major and minor
+numbers.
+
+Further options permit the selection of regions, output format
+control, and reporting behaviour.
+
+When the program is run using the 'dmstats' alias, the command
+\fBmust\fP be the first argument and any switches and options should be
+specified following the command itself. This limitation is not present
+when run as 'dmsetup stats'.
+
+When no device argument is given dmstats will by default operate on all
+device-mapper devices present. The \fBcreate\fP and \fBdelete\fP
+commands require the use of \fB--alldevices\fP when used in this way.
+
+.SH OPTIONS
+.TP
+.B \-\-alldevices
+If no device arguments are given allow operation on all devices when
+creating or deleting regions.
+.TP
+.B \-\-allprograms
+Include regions from all program IDs for list and report operations.
+.TP
+.B \-\-allregions
+Include all present regions for commands that normally accept a single
+region identifier.
+.TP
+.B \-\-areas \fInr_areas
+Specify the number of statistics areas to create within a new region.
+.TP
+.B \-\-areasize \fIarea_size
+Specify the size of areas into which a new region should be divided. An
+optional suffix selects units of bBsSkKmMgGtTpPeE: (b)ytes,
+(s)ectors, (k)ilobytes, (m)egabytes, (g)igabytes, (t)erabytes,
+(p)etabytes, (e)xabytes. Capitalise to use multiples of 1000 (S.I.)
+instead of 1024.
+.TP
+.B \-\-auxdata \fIaux_data
+Specify auxilliary data (a string) to be stored with a new region.
+.TP
+.B \-\-clear
+When printing statistics counters, also atomically reset them to zero.
+.TP
+.B \-\-count \fIcount
+Specify the iteration count for repeating reports. If the count
+argument is zero reports will continue to repeat until interrupted.
+.TP
+.B \-\-interval \fIseconds
+Specify the interval in seconds between successive iterations for
+repeating reports. If \-\-interval is specified but \-\-count is not,
+reports will continue to repeat until interrupted.
+.TP
+.B \-\-length \fIlength
+Specify the length of a new statistics region in sectors. An optional
+suffix selects units of bBsSkKmMgGtTpPeE: (b)ytes, (s)ectors,
+(k)ilobytes, (m)egabytes, (g)igabytes, (t)erabytes, (p)etabytes,
+(e)xabytes. Capitalise to use multiples of 1000 (S.I.) instead of 1024.
+.TP
+.BR \-j | \-\-major\ \fImajor
+Specify the major number.
+.TP
+.BR \-m | \-\-minor\ \fIminor
+Specify the minor number.
+.TP
+.B \-\-nosuffix
+Suppress the suffix on output sizes. Use with \fB\-\-units\fP
+(except h and H) if processing the output.
+.TP
+.BR \-o | \-\-options
+Specify which report fields to display.
+.TP
+.BR \-O | \-\-sort\ \fIsort_fields
+Sort output according to the list of fields given. Precede any
+sort_field with - for a reverse sort on that column.
+.TP
+.B \-\-programid \fIid
+Specify a program ID string. When creating new statistics regions this
+string is stored with the region. Subsequent operations may supply a
+program ID in order to select only regions with a matching value. The
+default program ID for dmstats-managed regions is "dmstats".
+.TP
+.BR \-S | \-\-select \ \fIselection
+Display only rows that match selection criteria. All rows with the
+additional "selected" column (-o selected) showing 1 if the row matches
+the selection and 0 otherwise. The selection criteria are defined by
+specifying column names and their valid values while making use of
+supported comparison operators.
+.TP
+.B \-\-start \fIstart
+Specify the start offset of a new statistics region in sectors. An
+optional suffix selects units of bBsSkKmMgGtTpPeE: (b)ytes,
+(s)ectors, (k)ilobytes, (m)egabytes, (g)igabytes, (t)erabytes,
+(p)etabytes, (e)xabytes. Capitalise to use multiples of 1000 (S.I.)
+instead of 1024.
+.TP
+.B \-\-segments
+Create a new statistics region for each target contained in the target
+device. This causes a separate region to be allocated for each segment
+of the device.
+.TP
+.BR \-\-units \ hHbBsSkKmMgGtTpPeE
+Set the display units for report output. All sizes are output in these
+units: (h)uman-readable, (b)ytes, (s)ectors, (k)ilobytes, (m)egabytes,
+(g)igabytes, (t)erabytes, (p)etabytes, (e)xabytes. Capitalise to use
+multiples of 1000 (S.I.) instead of 1024. Can also specify custom units
+e.g. \fB\-\-units 3M\fP
+.TP
+.BR \-u | \-\-uuid
+Specify the uuid.
+.TP
+.BR \-v | \-\-verbose \ [ \-v | \-\-verbose ]
+Produce additional output.
+.br
+.SH COMMANDS
+.TP
+.B clear
+.I device_name
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.br
+Instructs the kernel to clear statistics counters for the speficied
+regions (with the exception of in-flight IO counters).
+.br
+.TP
+.B create
+.I device_name
+.RB [ \-\-areas
+.IR nr_areas ]
+.RB [ \-\-areasize
+.IR area_size ]
+.RB [[ \-\-start
+.IR start_sector ]
+.RB [ \-\-length
+.IR length ]
+.RB |[ \-\-segments ]]
+.RB [ \-\-auxdata
+.IR data ]
+.RB [ \-\-programid
+.IR id ]
+.br
+Creates one or more new statistics regions on the specified device(s).
+
+The region will span the entire device unless \fB\-\-start\fP and
+\fB\-\-length\fP or \fB\-\-target\fP are given. The \fB\-\-start\fP and
+\fB\-\-length\fP options allow a region of arbitrary length to be placed
+at an arbitrary offset into the device. The \fB\-\-segments\fP option
+causes a new region to be created for each target in the corresponding
+device-mapper device's table.
+
+An optional \fBprogram_id\fP or \fBaux_data\fP string may be associated
+with the region. A \fBprogram_id\fP may then be used to select regions
+for subsequent list, print, and report operations. The \fBaux_data\fP
+stores an arbitrary string and is not used by dmstats or the
+device-mapper kernel statistics subsystem.
+
+By default dmstats creates regions with a \fBprogram_id\fP of
+"dmstats".
+
+On success the \fBregion_id\fP of the newly created region is printed to
+stdout.
+.br
+.TP
+.B delete
+.I [ device_name ]
+.RB [ \-\-alldevices ]
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.br
+Delete the specified statistics region. All counters and resources used
+by the region are released and the region will not appear in the output
+of subsequent list, print, or report operations.
+
+All regions registered on a device may be removed using
+\fB\-\-allregions\fP.
+
+To remove all regions on all devices both \fB--allregions\fP and
+\fB\-\-alldevices\fP must be used.
+.br
+.TP
+.B help
+.RB [ \-c | \-C | \-\-columns ]
+.br
+Outputs a summary of the commands available, optionally including
+the list of report fields.
+.br
+.TP
+.B list
+.RI [ device_name ]
+.RB [ \-\-allprograms ]
+.RB [ \-\-programid
+.RB [ \-v | \-\-verbose \ [ \-v | \-\-verbose ]]
+.IR id ]
+.br
+List the statistics regions registered on the device. If the
+\fB\-\-allprograms\fP switch is given all regions will be listed
+regardless of region program ID values.
+
+If \fB\-v\fP or \fB\-\-verbose\fP is given the report will include
+a row of information for each area contained in each region displayed.
+.br
+.TP
+.B print
+.RB [ \-\-clear ]
+.IR
+.RB [ \-\-allregions
+.RB | \-\-regionid
+.IR id ]
+.RB [ \-\-allprograms
+.RB | \-\-programid
+.IR id ]
+.br
+Print raw statistics counters for the specified region or for all
+present regions.
+.br
+.TP
+.B report
+.RB [ \-\-allprograms ]
+.RB [ \-\-interval
+.IR seconds ]
+.RB [ \-\-count
+.IR count ]
+.RB [ \-\-units
+.IR unit ]
+.RB [ \-\-regionid
+.IR id ]
+.RB [ \-\-programid
+.IR id ]
+.RB [ \-O | \-\-sort
+.IR sort_fields ]
+.RB [ \-S | \-\-select
+.IR Selection ]
+.RB [ \-\-units
+.IR units ]
+.br
+Start a report for the specified region or for all present regions. If
+the count argument is specified, the report will repeat at a fixed
+interval set by the \fB\-\-interval\fP option. The default interval is
+one second.
+
+If the \fB\-\-allprograms\fP switch is given, all regions will be
+listed, regardless of region program ID values.
+.br
+.SH REGIONS AND AREAS
+The device-mapper statistics facility allows separate performance
+counters to be maintained for arbitrary regions of devices. A region may
+span any range: from a single sector to the whole device. A region may
+be further sub-divided into a number of distinct areas (one or more),
+each with its own counter set.
+
+By default new regions span the entire device. The \fB\-\-start\fP and
+\fB\-\-length\fP options allows a region of any size to be placed at any
+location on the device.
+
+A region may be either divided into the specified number of equal-sized
+areas, or into areas of the given size by specifying one of
+\fB\-\-areas\fP or \fB\-\-areasize\fP when creating a region with the
+\fBcreate\fP command. Depending on the size of the areas and the device
+region the final area within the region may be smaller than requested.
+
+.SS Region identifiers
+Each region is assigned an identifier when it is created that is used to
+reference the region in subsequent operations. Region identifiers are
+unique within a given device (including across different \fBprogram_id\fP
+values).
+.br
+Depending on the sequence of create and delete operations, gaps may
+exist in the sequence of \fBregion_id\fP values for a particular device.
+
+.SH REPORT FIELDS
+The dmstats report provides several types of field that may be added to
+the default field set, or used to create custom reports.
+.br
+All performance counters and metrics are calculated per-area.
+.br
+.SS Derived metrics
+A number of metrics fields are included that provide high level
+performance indicators. These are based on the fields provided by the
+conventional Linux iostat program and are derived from the basic counter
+values provided by the kernel for each area.
+.br
+.HP
+.B rrqm
+.br
+Read requests merged per second.
+.HP
+.B wrqm
+.br
+Write requests merged per second.
+.HP
+.B rs
+.br
+Read requests per second.
+.HP
+.B ws
+.br
+Write requests per second.
+.HP
+.B rsec
+.br
+Sectors read per second.
+.HP
+.B wsec
+.br
+Sectors written per second.
+.HP
+.B arqsz
+.br
+The average size of requests submitted to the area.
+.HP
+.B qusz
+.br
+The average queue length.
+.HP
+.B await
+.br
+The average wait time for read and write requests.
+.HP
+.B r_await
+.br
+The average wait time for read requests.
+.HP
+.B w_await
+.br
+The average wait time for write requests.
+.HP
+.B tput
+.br
+The device throughput in requests per second.
+.HP
+.B svctm
+.br
+The average service time (in milliseconds) for I/O requests that
+were issued to the device.
+.HP
+.B util
+.br
+Percentage of CPU time during which I/O requests were issued to the
+device (bandwidth utilization for the device). Device saturation occurs
+when this value is close to 100%.
+.br
+.SS Region and area meta fields
+Meta fields provide information about the region or area that the
+statistics values relate to. This includes the region and area
+identifier, start, length, and counts, as well as the program ID and
+auxiliary data values.
+.br
+.HP
+.B region_id
+.br
+Region identifier. This is a non-negative integer returned by the kernel
+when a statistics region is created.
+.HP
+.B region_start
+.br
+.br
+The region start sector in units of 512 byte sectors.
+.HP
+.B region_len
+.br
+The length of the region in units of 512 byte sectors.
+.HP
+.B area_id
+.br
+Area identifier. Area identifiers are assigned by the device-mapper
+statistics library and uniquely identify each area within a region. Each
+ID corresponds to a distinct set of performance counters for that area
+of the statistics region. Area identifiers are always monotonically
+increasing within a region so that higher ID values correspond to
+greater sector addresses within the region and no gaps in the sequence
+of identifiers exist. Sorting a report by device, region start, and area
+ID (the default) will then produce rows in order of ascending region and
+area address.
+.HP
+.B area_start
+.br
+The area start sector in units of 512 byte sectors.
+.HP
+.B area_len
+.br
+The length of the area in units of 512 byte sectors.
+.HP
+.B area_count
+.br
+The number of areas in this region.
+.HP
+.B program_id
+.br
+The program ID value associated with this region.
+.HP
+.B aux_data
+.br
+The auxiliary data value associated with this region.
+.br
+.HP
+.B interval_ns
+.br
+The estimated interval over which the current counter values have
+accumulated. The vaulue is reported as an interger expressed in units
+of nanoseconds.
+.br
+.HP
+.B interval
+.br
+The estimated interval over which the current counter values have
+accumulated. The value is reported as a real number in units of
+seconds.
+.br
+.SS Basic counters
+Basic counters provide access to the raw counter data from the kernel,
+allowing further processing to be carried out by another program.
+
+The kernel provides thirteen separate counters for each statistics
+area. The first eleven of these match the counters provided in
+/proc/diskstats or /sys/block/*/*/stat. The final pair provide separate
+counters for read and write time.
+.P
+.HP
+.B reads
+.br
+The number of reads successfully completed this interval.
+.HP
+.B read_merges
+.br
+The number of read requests merged this interval. This field is
+incremented every time a pair of requests are merged to create a single
+request to be issued to the device.
+.HP
+.B read_sectors
+.br
+The number of 512 byte sectors read this interval.
+.HP
+.B read_nsecs
+.br
+The number of nanoseconds spent reading during this interval.
+.HP
+.B writes
+.br
+The number of writes successfully completed this interval.
+.HP
+.B write_merges
+.br
+The number of write requests merged this interval. This field is
+incremented every time a pair of requests are merged to create a single
+request to be issued to the device.
+.HP
+.B write_sectors
+.br
+The number of 512 byte sectors written this interval.
+.HP
+.B write_nsecs
+.br
+The number of nanoseconds spent writing during this interval.
+.HP
+.B in_progress
+.br
+The number of reads and writes currently in progress.
+.HP
+.B io_nsecs
+.br
+The number of nanoseconds spent reading and writing.
+.HP
+.B weighted_io_nsecs
+.br
+This field is incremented at each I/O start, I/O completion, I/O merge,
+or read of these stats by the number of I/Os in progress multiplied by
+the number of milliseconds spent doing I/O since the last update of this
+field. This can provide an easy measure of both I/O completion time and
+the backlog that may be accumulating.
+.br
+.br
+.P
+.SH EXAMPLES
+Create a whole-device region with one area on vg00/lvol1
+.br
+.br
+# dmstats create vg00/lvol1
+.br
+vg00/lvol1: Created new region with 1 area(s) as region ID 0
+.br
+.br
+
+
+Create a 32M region 1G into device d0
+.br
+.br
+# dmstats create --start 1G --length 32M d0
+.br
+d0: Created new region with 1 area(s) as region ID 0
+.br
+
+
+Create a whole-device region with 8 areas on every device
+.br
+.br
+# dmstats create --areas 8
+.br
+vg00/lvol1: Created new region with 8 area(s) as region ID 0
+.br
+vg00/lvol2: Created new region with 8 area(s) as region ID 0
+.br
+vg00/lvol3: Created new region with 8 area(s) as region ID 0
+.br
+vg01/lvol0: Created new region with 8 area(s) as region ID 2
+.br
+vg01/lvol1: Created new region with 8 area(s) as region ID 0
+.br
+vg00/lvol2: Created new region with 8 area(s) as region ID 1
+.br
+.br
+
+Delete all regions on all devices
+.br
+.br
+# dmstats delete --alldevices --allregions
+.br
+.br
+
+Create a whole-device region with areas 10GiB in size on vg00/lvol1
+using dmsetup
+.br
+.br
+# dmsetup stats create --areasize 10G vg00/lvol1
+.br
+vg00/lvol1: Created new region with 5 area(s) as region ID 1
+.br
+.br
+
+Create a 1GiB region with 16 areas at the start of vg00/lvol1
+.br
+# dmstats create --start 0 --len 1G --areas=16 vg00/lvol1
+.br
+vg00/lvol1: Created new region with 16 area(s) as region ID 0
+.br
+.br
+
+List the statistics regions registered on vg00/lvol1
+.br
+# dmstats list vg00/lvol1
+.br
+Name RgID RStart RSize #Areas ASize ProgID
+.br
+vg00-lvol1 0 0 61.00g 1 61.00g dmstats
+.br
+vg00-lvol1 1 61.00g 19.20g 1 19.20g dmstats
+.br
+vg00-lvol1 2 80.20g 2.14g 1 2.14g dmstats
+.br
+.br
+
+Display five statistics reports for vg00/lvol1 at an interval of one second
+.br
+.br
+# dmstats report --interval 1 --count 5 vg00/lvol1
+.br
+# dmstats report
+.br
+Name RgID ArID AStart ASize RRqM/s WRqM/s R/s W/s RSz/s WSz/s AvRqSz QSize Util% AWait RdAWa WrAWa
+.br
+vg_hex-lv_home 0 0 0 61.00g 0.00 0.00 0.00 218.00 0 1.04m 4.50k 2.97 81.70 13.62 0.00 13.62
+.br
+vg_hex-lv_home 1 0 61.00g 19.20g 0.00 0.00 0.00 5.00 0 548.00k 109.50k 0.14 11.00 27.40 0.00 27.40
+.br
+vg_hex-lv_home 2 0 80.20g 2.14g 0.00 0.00 0.00 14.00 0 1.15m 84.00k 0.39 18.70 27.71 0.00 27.71
+.br
+.br
+
+Create one region for reach target contained in device vg00/lvol1
+.br
+.br
+# dmstats create --segments vg00/lvol1
+.br
+Created new region with 1 area(s) as region ID 0
+.br
+Created new region with 1 area(s) as region ID 1
+.br
+Created new region with 1 area(s) as region ID 2
+.br
+.br
+
+Print raw counters for region 4 on device d0
+.br
+.br
+# dmstats print --regionid 4 d0
+.br
+2097152+65536 0 0 0 0 29 0 264 701 0 41 701 0 41
+.br
+.br
+.SH AUTHORS
+Bryn M. Reeves <bmr@redhat.com>
+
+.SH SEE ALSO
+LVM2 resource page https://www.sourceware.org/lvm2/
+.br
+Device-mapper resource page: http://sources.redhat.com/dm/
+.br
+
+Device-mapper statistics kernel documentation
+.br
+ Documentation/device-mapper/statistics.txt
diff --git a/man/lvchange.8.in b/man/lvchange.8.in
index 4577cce7a..9199b49be 100644
--- a/man/lvchange.8.in
+++ b/man/lvchange.8.in
@@ -8,7 +8,7 @@ lvchange \(em change attributes of a logical volume
.RB [ \-A | \-\-autobackup
.RI { y | n }]
.RB [ \-a | \-\-activate
-.RI [ a | e | l ]{ y | n }]
+.RI [ a | e | s | l ]{ y | n }]
.RB [ \-\-activationmode
.RI { complete | degraded | partial }]
.RB [ \-k | \-\-setactivationskip
@@ -31,6 +31,8 @@ lvchange \(em change attributes of a logical volume
.RB [ \-\-detachprofile ]
.RB [ \-\-discards
.RI { ignore | nopassdown | passdown }]
+.RB [ \-\-errorwhenfull
+.RI { y | n }]
.RB [ \-\-resync ]
.RB [ \-h | \-? | \-\-help ]
.RB [ \-\-ignorelockingfailure ]
@@ -66,11 +68,12 @@ lvchange \(em change attributes of a logical volume
.RB [ \-r | \-\-readahead
.RI { ReadAheadSectors | auto | none }]
.RB [ \-\-refresh ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
.RB [ \-Z | \-\-zero
.RI { y | n }]
-.I LogicalVolumePath
.RI [ LogicalVolumePath ...]
.SH DESCRIPTION
lvchange allows you to change the attributes of a logical volume
@@ -78,19 +81,19 @@ including making them known to the kernel ready for use.
.SH OPTIONS
See \fBlvm\fP(8) for common options.
.TP
-.BR \-a ", " \-\-activate " [" \fIa | \fIe | \fIl ]{ \fIy | \fIn }
+.BR \-a ", " \-\-activate " [" \fIa | \fIe | \fIs | \fIl ]{ \fIy | \fIn }
Controls the availability of the logical volumes for use.
Communicates with the kernel device-mapper driver via
libdevmapper to activate (\-ay) or deactivate (\-an) the
-logical volumes.
+logical volumes.
.IP
Activation of a logical volume creates a symbolic link
/dev/VolumeGroupName/LogicalVolumeName pointing to the device node.
This link is removed on deactivation.
All software and scripts should access the device through
this symbolic link and present this as the name of the device.
-The location and name of the underlying device node may depend on
-the distribution and configuration (e.g. udev) and might change
+The location and name of the underlying device node may depend on
+the distribution and configuration (e.g. udev) and might change
from release to release.
.IP
If autoactivation option is used (\-aay),
@@ -101,11 +104,36 @@ activation. The \-aay option should be also used during system
boot so it's possible to select which volumes to activate using
the activation/auto_activation_volume_list setting.
.IP
-If clustered locking is enabled, -aey will activate exclusively
-on one node and -aly will activate only on the local node.
-To deactivate only on the local node use -aln.
-Logical volumes with single-host snapshots are always activated
-exclusively because they can only be used on one node at once.
+In a clustered VG, clvmd is used for activation, and the
+following options are possible:
+
+With \-aey, clvmd activates the LV in exclusive mode
+(with an exclusive lock), allowing a single node to activate the LV.
+
+With \-asy, clvmd activates the LV in shared mode
+(with a shared lock), allowing multiple nodes to activate the LV concurrently.
+If the LV type prohibits shared access, such as an LV with a snapshot,
+the 's' option is ignored and an exclusive lock is used.
+
+With \-ay (no mode specified), clvmd activates the LV in shared mode
+if the LV type allows concurrent access, such as a linear LV.
+Otherwise, clvmd activates the LV in exclusive mode.
+
+With \-aey, \-asy, and \-ay, clvmd attempts to activate the LV
+on all nodes. If exclusive mode is used, then only one of the
+nodes will be successful.
+
+With \-an, clvmd attempts to deactivate the LV on all nodes.
+
+With \-aly, clvmd activates the LV only on the local node, and \-aln
+deactivates only on the local node. If the LV type allows concurrent
+access, then shared mode is used, otherwise exclusive.
+
+LVs with snapshots are always activated exclusively because they can only
+be used on one node at once.
+
+For local VGs, \-ay, \-aey, and \-asy are all equivalent.
+
.TP
.BR \-\-activationmode " {" \fIcomplete | \fIdegraded | \fIpartial }
The activation mode determines whether logical volumes are allowed to
@@ -156,9 +184,14 @@ Set this to \fIignore\fP to ignore any discards received by a
thin pool Logical Volume. Set to \fInopassdown\fP to process such
discards within the thin pool itself and allow the no-longer-needed
extents to be overwritten by new data. Set to \fIpassdown\fP (the
-default) to process them both within the thin pool itself and to
+default) to process them both within the thin pool itself and to
pass them down the underlying device.
.TP
+.BR \-\-errorwhenfull " {" \fIy | \fIn }
+Sets thin pool behavior when data space is exhaused. See
+.BR lvcreate (8)
+for information.
+.TP
.B \-\-resync
Forces the complete resynchronization of a mirror. In normal
circumstances you should not need this option because synchronization
@@ -271,6 +304,7 @@ Do not use this if dmeventd is already monitoring a device.
.TP
.BR \-M ", " \-\-persistent " {" \fIy | \fIn }
Set to y to make the minor number specified persistent.
+Change of persistent numbers is not supported for pool volumes.
.TP
.BR \-p ", " \-\-permission " {" \fIr | \fIrw }
Change access permission to read-only or read/write.
diff --git a/man/lvconvert.8.in b/man/lvconvert.8.in
index 7dd105129..c2ee2b2bc 100644
--- a/man/lvconvert.8.in
+++ b/man/lvconvert.8.in
@@ -162,6 +162,10 @@ lvconvert \(em convert a logical volume from linear to mirror or snapshot
.IR ChunkSize [ bBsSkKmMgG ]]
.RB [ \-\-cachemode
.RI { writeback | writethrough }]
+.RB [ \-\-cachepolicy
+.IR policy ]
+.RB [ \-\-cachesettings
+.IR key=value ]
.RB [ \-\-poolmetadata
.IR CachePoolMetadataLogicalVolume { Name | Path }
|
@@ -220,11 +224,22 @@ Run the daemon in the background.
.BR \-H ", " \-\-cache ", " \-\-type\ \fIcache
Converts logical volume to a cached LV with the use of cache pool
specified with \fB\-\-cachepool\fP.
-For more information on cache pool LVs and cache LVs, see \fBlvmcache\fP(8).
+For more information on cache pool LVs and cache LVs, see \fBlvmcache\fP(7).
+.TP
+.B \-\-cachepolicy \fIpolicy
+Only applicable to cached LVs; see also \fBlvmcache(7)\fP. Sets
+the cache policy. \fImq\fP is the basic policy name. \fIsmq\fP is more advanced
+version available in newer kernels.
.TP
.BR \-\-cachepool " " \fICachePoolLV
This argument is necessary when converting a logical volume to a cache LV.
-For more information on cache pool LVs and cache LVs, see \fBlvmcache\fP(8).
+For more information on cache pool LVs and cache LVs, see \fBlvmcache\fP(7).
+.TP
+.BR \-\-cachesettings " " \fIkey=value
+Only applicable to cached LVs; see also \fBlvmcache(7)\fP. Sets
+the cache tunable settings. In most use-cases, default values should be adequate.
+Special string value \fIdefault\fP switches setting back to its default kernel value
+and removes it from the list of settings stored in lvm2 metadata.
.TP
.BR \-m ", " \-\-mirrors " " \fIMirrors
Specifies the degree of the mirror you wish to create.
@@ -496,7 +511,7 @@ See \fBlvmthin\fP(7) for more info about thin provisioning support.
Uncaches \fICacheLogicalVolume\fP.
Before the volume becomes uncached, cache is flushed.
Unlike with \fB\-\-splitcache\fP the cache pool volume is removed.
-This option could seen as an inverse of \fB\-\-cache\fP.
+This option could be seen as an inverse of \fB\-\-cache\fP.
.SH Examples
Converts the linear logical volume "vg00/lvol1" to a two-way mirror
diff --git a/man/lvcreate.8.in b/man/lvcreate.8.in
index cc5154632..ee10392e0 100644
--- a/man/lvcreate.8.in
+++ b/man/lvcreate.8.in
@@ -13,9 +13,13 @@ lvcreate \- create a logical volume in an existing volume group
.RI { y | n }]
.RB [ \-H | \-\-cache ]
.RB [ \-\-cachemode
-.RI { writeback | writethrough }]
+.RI { passthrough | writeback | writethrough }]
+.RB [ \-\-cachepolicy
+.IR policy ]
.RB [ \-\-cachepool
.IR CachePoolLogicalVolume { Name | Path }
+.RB [ \-\-cachesettings
+.IR key=value ]
.RB [ \-c | \-\-chunksize
.IR ChunkSize [ bBsSkKmMgG ]]
.RB [ \-\-commandprofile
@@ -25,6 +29,8 @@ lvcreate \- create a logical volume in an existing volume group
.RB [ \-d | \-\-debug ]
.RB [ \-\-discards
.RI { ignore | nopassdown | passdown }]
+.RB [ \-\-errorwhenfull
+.RI { y | n }]
.RB [{ \-l | \-\-extents
.IR LogicalExtentsNumber [ % { FREE | PVS | VG }]
|
@@ -85,7 +91,8 @@ lvcreate \- create a logical volume in an existing volume group
.RB [ \-\-type
.IR SegmentType ]
.RB [ \-v | \-\-verbose ]
-.RB [ \-W | \-\-wipesignatures ]
+.RB [ \-W | \-\-wipesignatures
+.RI { y | n }]
.RB [ \-Z | \-\-zero
.RI { y | n }]
.RI [ VolumeGroup { Name | Path }
@@ -181,7 +188,7 @@ See \fBlvmcache\fP(7) for more info about caching support.
Note that the cache segment type requires a dm-cache kernel module version
1.3.0 or greater.
.TP
-.IR \fB\-\-cachemode " {" writeback | writethrough }
+.IR \fB\-\-cachemode " {" passthrough | writeback | writethrough }
Specifying a cache mode determines when the writes to a cache LV
are considered complete. When \fIwriteback\fP is specified, a write is
considered complete as soon as it is stored in the cache pool LV.
@@ -191,10 +198,21 @@ While \fIwritethrough\fP may be slower for writes, it is more
resilient if something should happen to a device associated with the
cache pool LV.
.TP
+.B \-\-cachepolicy \fIpolicy
+Only applicable to cached LVs; see also \fBlvmcache(7)\fP. Sets
+the cache policy. \fImq\fP is the basic policy name. \fIsmq\fP is more advanced
+version available in newer kernels.
+.TP
.IR \fB\-\-cachepool " " CachePoolLogicalVolume { Name | Path }
Specifies the name of cache pool volume name. The other way to specify pool name
is to append name to Volume group name argument.
.TP
+.BR \-\-cachesettings " " \fIkey=value
+Only applicable to cached LVs; see also \fBlvmcache(7)\fP. Sets
+the cache tunable settings. In most use-cases, default values should be adequate.
+Special string value \fIdefault\fP switches setting back to its default kernel value
+and removes it from the list of settings stored in lvm2 metadata.
+.TP
.BR \-c ", " \-\-chunksize " " \fIChunkSize [ \fIbBsSkKmMgG ]
Gives the size of chunk for snapshot, cache pool and thin pool logical volumes.
Default unit is in kilobytes.
@@ -230,6 +248,15 @@ This is shortcut for option \fB\-\-mirrorlog\fP \fIcore\fP.
Sets discards behavior for thin pool.
Default is \fIpassdown\fP.
.TP
+.BR \-\-errorwhenfull " {" \fIy |\fIn }
+Configures thin pool behaviour when data space is exhausted.
+Default is \fIn\fPo.
+Device will queue I/O operations until target timeout
+(see dm-thin-pool kernel module option \fIno_space_timeout\fP)
+expires. Thus configured system has a time to i.e. extend
+the size of thin pool data device.
+When set to \fIy\fPes, the I/O operation is immeditelly errored.
+.TP
.BR \-K ", " \-\-ignoreactivationskip
Ignore the flag to skip Logical Volumes during activation.
Use \fB\-\-setactivationskip\fP option to set or reset
diff --git a/man/lvdisplay.8.in b/man/lvdisplay.8.in
index 3232853fc..ed1d3f7ae 100644
--- a/man/lvdisplay.8.in
+++ b/man/lvdisplay.8.in
@@ -14,12 +14,13 @@ lvdisplay \(em display attributes of a logical volume
.RB [ \-\-maps ]
.RB [ \-\-nosuffix ]
.RB [ \-P | \-\-partial ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-\-units
.IR hHbBsSkKmMgGtTpPeE ]
.RB [ \-v | \-\-verbose ]
.RB [ \-\-version ]
-.RI [ VolumeGroupName | LogicalVolume { Name | Path }
-.RI [ VolumeGroupName | LogicalVolume { Name | Path }\ ...]]
+.RI [ VolumeGroupName | LogicalVolume { Name | Path }\ ...]
.br
.B lvdisplay
@@ -50,8 +51,7 @@ lvdisplay \(em display attributes of a logical volume
.IR hHbBsSkKmMgGtTpPeE ]
.RB [ \-v | \-\-verbose ]
.RB [ \-\-version ]
-.RI [ VolumeGroupName | LogicalVolume { Name | Path }
-.RI [ VolumeGroupName | LogicalVolume { Name | Path }\ ...]]
+.RI [ VolumeGroupName | LogicalVolume { Name | Path }\ ...]
.SH DESCRIPTION
lvdisplay allows you to see the attributes of a logical volume
like size, read/write status, snapshot information etc.
diff --git a/man/lvm-config.8.in b/man/lvm-config.8.in
new file mode 100644
index 000000000..5050d7770
--- /dev/null
+++ b/man/lvm-config.8.in
@@ -0,0 +1 @@
+.so man8/lvmconfig.8
diff --git a/man/lvm-dumpconfig.8.in b/man/lvm-dumpconfig.8.in
index 844f8ac81..5050d7770 100644
--- a/man/lvm-dumpconfig.8.in
+++ b/man/lvm-dumpconfig.8.in
@@ -1,149 +1 @@
-.TH "LVM-DUMPCONFIG" "8" "LVM TOOLS #VERSION#" "Red Hat, Inc" "\""
-.SH "NAME"
-lvm-dumpconfig \(em dump LVM configuration
-.SH SYNOPSIS
-.B lvm dumpconfig
-.RB [ \-f | \-\-file
-.IR filename ]
-.RB [ \-\-type
-.RI { current | default | diff | missing | new | profilable | profilable-command | profilable-metadata }
-.RB [ \-\-atversion
-.IR version ]
-.RB [ \-\-ignoreadvanced ]
-.RB [ \-\-ignoreunsupported ]
-.RB [ \-\-config
-.IR ConfigurationString ]
-.RB [ \-\-commandprofile
-.IR ProfileName ]
-.RB [ \-\-profile
-.IR ProfileName ]
-.RB [ \-\-metadataprofile
-.IR ProfileName ]
-.RB [ \-\-mergedconfig ]
-.RB [ \-\-validate ]
-.RB [ \-\-withcomments ]
-.RB [ \-\-withversions ]
-.RB [ ConfigurationNode... ]
-
-.SH DESCRIPTION
-lvm dumpconfig produces formatted output with LVM configuration tree.
-
-.SH OPTIONS
-.TP
-.BR \-f ", " \-\-file " \fIfilename"
-Dump configuration to a file named 'filename'.
-
-.TP
-.IR \fB\-\-type " {" current | default | diff | missing | new | profilable }
-Select the type of configuration to dump. The configuration settings dumped
-have either default values or currently used values assigned based on the
-type selected (if no type is selected, \fB\-\-type current\fP is used
-by default). Whenever a configuration setting with a default value is
-commented out, it means the setting does not have any concrete default
-value defined. All output can be saved and reused as proper \fBlvm.conf\fP(5)
-file.
-.RS
-.IP current 3
-Dump current \fBlvm.conf\fP configuration merged with any \fBtag config\fP
-if used. See also \fBlvm.conf\fP(5) for more info about LVM configuration methods.
-.IP default 3
-Dump all possible configuration settings with default values assigned.
-.IP diff 3
-Dump all configuration settings for which the values used differ from defaults.
-The value assigned for each configuration setting is the value currently used.
-This is actually minimal LVM configuration which can be used without
-a change to current configured behaviour.
-.IP missing 3
-Dump all configuration settings with default values assigned which are
-missing in the configuration currently used and for which LVM automatically
-fallbacks to using these default values.
-.IP new 3
-Dump all new configuration settings introduced in current LVM version
-or specific version as defined by \fB\-\-atversion\fP option.
-.IP profilable 3
-Dump all profilable configuration settings with default values assigned.
-See \fBlvm.conf\fP(5) for more info about \fBprofile config\fP method.
-.IP profilable-command 3
-Dump all profilable configuration settings with default values assigned
-that can be used in command profile. This is a subset of settings dumped
-by \fB\-\-type \-\-profilable\fP.
-.IP profilable-metadata 3
-Dump all profilable configuration settings with default values assigned
-that can be used in metadata profile. This is a subset of settings dumped
-by \fB\-\-type \-\-profilable\fP.
-.RE
-
-.TP
-.BI \-\-atversion " version"
-Specify an LVM version in x.y.z format where x is the major version,
-the y is the minor version and z is the patchlevel (e.g. 2.2.106).
-When configuration is dumped, the configuration settings recognized
-at this LVM version will be considered only. This can be used
-to dump a configuration that certain LVM version understands and
-which does not contain any newer settings for which LVM would
-issue a warning message when checking the configuration.
-
-.TP
-.B \-\-ignoreadvanced
-Ignore advanced configuration settings on dump.
-
-.TP
-.B \-\-ignoreunsupported
-Ignore unsupported configuration settings on dump. These settings are
-either used for debugging purposes only or their support is not yet
-complete and they are not meant to be used in production.
-
-.TP
-.BI \-\-config " ConfigurationString"
-Use \fBConfigurationString\fP to override existing configuration.
-This configuration is then applied for dumpconfig command itself.
-See also \fBlvm.conf\fP(5) for more info about \fBconfig cascade\fP.
-
-.TP
-.BI \-\-commandprofile " ProfileName"
-Use \fBProfileName\fP to override existing configuration.
-This configuration is then applied for dumpconfig command itself.
-See also \fBlvm.conf\fP(5) for more info about \fBconfig cascade\fP.
-
-.TP
-.BI \-\-profile " ProfileName"
-The same as using \fB\-\-commandprofile\fP but the configuration is not
-applied for dumpconfig command itself.
-
-.TP
-.BI \-\-metadataprofile " ProfileName"
-Use \fBProfileName\fP to override existing configuration.
-The configuration defined in metadata profile has no effect for
-dumpconfig command itself, the dumpconfig dumps the configuration only.
-See also \fBlvm.conf\fP(5) for more info about \fBconfig cascade\fP.
-
-.TP
-.B \-\-mergedconfig
-When the dumpconfig command is run with the \fB\-\-config\fP option
-and/or \fB\-\-commandprofile\fP, \fB\-\-profile\fP, \fB\-\-metadataprofile\fP
-option, merge all the contents of the \fBconfig cascade\fP before dumping it.
-Without the \fB\-\-mergeconfig\fP option used, only the configuration at
-the front of the cascade is dumped. See also \fBlvm.conf\fP(5) for more
-info about \fBconfig cascade\fP.
-
-.TP
-.B \-\-validate
-Validate current configuration used and exit with appropriate
-return code. The validation is done only for the configuration
-at the front of the \fBconfig cascade\fP. To validate the whole
-merged configuration tree, use also the \fB\-\-mergedconfig\fP option.
-The validation is done even if \fBconfig/checks\fP \fBlvm.conf\fP(5)
-option is disabled.
-
-.TP
-.B \-\-withcomments
-Also dump comments for each configuration node.
-
-.TP
-.B \-\-withversions
-Also dump a comment containing the version of introduction for
-each configuration node.
-
-.SH SEE ALSO
-.BR lvm (8)
-.BR lvm.conf (5)
+.so man8/lvmconfig.8
diff --git a/man/lvm-lvpoll.8.in b/man/lvm-lvpoll.8.in
new file mode 100644
index 000000000..8ffbdd4d6
--- /dev/null
+++ b/man/lvm-lvpoll.8.in
@@ -0,0 +1,89 @@
+.TH "LVPOLL" "8" "LVM TOOLS #VERSION#" "Red Hat, Inc" \" -*- nroff -*-
+.SH NAME
+lvpoll \(em Internal command used by lvmpolld to complete some Logical Volume operations.
+
+.SH SYNOPSIS
+.B lvm lvpoll
+.BR \-\-polloperation
+.RI { pvmove | convert | merge | merge_thin }
+.RB [ \-\-abort ]
+.RB [ \-A | \-\-autobackup
+.RI { y | n }]
+.RB [ \-\-commandprofile
+.IR ProfileName ]
+.RB [ \-d | \-\-debug ]
+.RB [ \-h | \-? | \-\-help ]
+.RB [ \-\-handlemissingpvs ]
+.RB [ \-i | \-\-interval
+.IR Seconds ]
+.RB [ \-t | \-\-test ]
+.RB [ \-v | \-\-verbose ]
+.RB [ \-\-version ]
+.IR LogicalVolume [ Path ]
+.SH DESCRIPTION
+\fBlvpoll\fP is an internal command used by \fBlvmpolld\fP(8) to monitor and
+complete \fBlvconvert\fP(8) and \fBpvmove\fP(8) operations.
+\fBlvpoll\fP itself does not initiate these operations and
+you should never normally need to invoke it directly.
+
+.I LogicalVolume
+The Logical Volume undergoing conversion or, in the case of pvmove, the name of
+the internal pvmove Logical Volume (see \fBEXAMPLES\fP).
+.SH OPTIONS
+See \fBlvm\fP(8) for common options.
+.TP
+.BR \-\-polloperation " {" \fIconvert | \fImerge | \fImerge_thin | \fIpvmove }
+Mandatory option.
+\fIpvmove\fP refers to a pvmove operation that is moving data.
+\fIconvert\fP refers to an operation that is increasing the number of redundant copies of data maintained by a mirror.
+\fImerge\fP indicates a merge operation that doesn't involve thin volumes.
+\fImerge_thin\fP indicates a merge operation involving thin snapshots.
+\fBpvmove\fP(8) and \fBlvconvert\fP(8) describe how to initiate these operations.
+.TP
+.B \-\-abort
+Abort pvmove in progress. See \fBpvmove\fP(8).
+.TP
+.B \-\-handlemissingpvs
+Used when the polling operation needs to handle missing PVs to be able to
+continue. This can happen when \fBlvconvert\fP(8) is repairing a mirror
+with one or more faulty devices.
+.TP
+.BR \-i ", " \-\-interval " "\fISeconds
+Report progress at regular intervals
+
+.SH EXAMPLES
+Resume polling of a pvmove operation identified by the Logical Volume vg00/pvmove0:
+.sp
+.B lvm lvpoll --polloperation pvmove vg00/pvmove0
+.P
+Abort the same pvmove operation:
+.sp
+.B lvm lvpoll --polloperation pvmove --abort vg00/pvmove0
+.P
+To find out the name of the pvmove Logical Volume resulting from an original
+\fBpvmove /dev/sda1\fP command you may use the following \fBlvs\fP command.
+(Remove the parentheses from the LV name.)
+.sp
+.B lvs -a -S move_pv=/dev/sda1
+.P
+Resume polling of mirror conversion vg00/lvmirror:
+.sp
+.B lvm lvpoll --polloperation convert vg00/lvmirror
+.P
+Complete mirror repair:
+.sp
+.B lvm lvpoll --polloperation convert vg/damaged_mirror --handlemissingpvs
+.P
+Process snapshot merge:
+.sp
+.B lvm lvpoll --polloperation merge vg/snapshot_old
+.P
+Finish thin snapshot merge:
+.sp
+.B lvm lvpoll --polloperation merge_thin vg/thin_snapshot
+.SH SEE ALSO
+.BR lvconvert (8),
+.BR lvm (8),
+.BR lvmpolld (8),
+.BR lvs (8),
+.BR pvmove (8)
diff --git a/man/lvm.8.in b/man/lvm.8.in
index 5347e949e..c8e6484eb 100644
--- a/man/lvm.8.in
+++ b/man/lvm.8.in
@@ -43,19 +43,24 @@ path of \fBlvm\fP.
The following commands are built into lvm without links normally
being created in the filesystem for them.
.TP
-\fBdumpconfig\fP \(em Display the configuration information after
-loading \fBlvm.conf\fP(5) and any other configuration files.
+\fBconfig\fP \(em The same as \fBlvmconfig\fP(8) below.
.TP
\fBdevtypes\fP \(em Display the recognised built-in block device types.
.TP
+\fBdumpconfig\fP \(em The same as \fBlvmconfig\fP(8) below.
+.TP
\fBformats\fP \(em Display recognised metadata formats.
.TP
\fBhelp\fP \(em Display the help text.
.TP
+\fBlvpoll\fP \(em Internal command used by lvmpolld to complete some Logical Volume operations.
+.TP
\fBpvdata\fP \(em Not implemented in LVM2.
.TP
\fBsegtypes\fP \(em Display recognised Logical Volume segment types.
.TP
+\fBsystemid\fP \(em Display the system ID, if any, currently set on this host.
+.TP
\fBtags\fP \(em Display any tags defined on this host.
.TP
\fBversion\fP \(em Display version information.
@@ -134,6 +139,9 @@ Volumes.
.TP
\fBlvmchange\fP \(em Change attributes of the Logical Volume Manager.
.TP
+\fBlvmconfig\fP \(em Display the configuration information after
+loading \fBlvm.conf\fP(5) and any other configuration files.
+.TP
\fBlvmdiskscan\fP \(em Scan for all devices visible to LVM2.
.TP
\fBlvmdump\fP \(em Create lvm2 information dumps for diagnostic purposes.
@@ -155,6 +163,10 @@ The following commands are not implemented in LVM2 but might be in the future: l
The following options are available for many of the commands.
They are implemented generically and documented here rather
than repeated on individual manual pages.
+
+Additional hyphens within option names are ignored. For example,
+\-\-readonly and \-\-read\-only are both accepted.
+
.TP
.BR \-h ", " \-? ", " \-\-help
Display the help text.
@@ -210,8 +222,9 @@ match the original followed by \fBvgcfgrestore\fP(8).
.BR \-S ", " \-\-select " " \fISelection
For reporting commands, display only rows that match selection criteria.
All rows are displayed with the additional "selected" column (\fB-o selected\fP)
-showing 1 if the row matches the Selection and 0 otherwise.
-See \fBSELECTION CRITERIA\fP section of this man page for more
+showing 1 if the row matches the Selection and 0 otherwise. For non-reporting
+commands which process LVM entities, the selection can be used to match items
+to process. See \fBSELECTION CRITERIA\fP section of this man page for more
information about the way the selection criteria are constructed.
.TP
.BR \-M ", " \-\-metadatatype " " \fIType
@@ -242,6 +255,17 @@ will be made to communicate with the device-mapper kernel driver, so
this option is unable to report whether or not Logical Volumes are
actually in use.
.TP
+.B \-\-foreign
+Cause the command to access foreign VGs, that would otherwise be skipped.
+It can be used to report or display a VG that is owned by another host.
+This option can cause a command to perform poorly because lvmetad caching
+is not used and metadata is read from disks.
+.TP
+.B \-\-shared
+Cause the command to access shared VGs, that would otherwise be skipped
+when lvmlockd is not being used. It can be used to report or display a
+lockd VG without locking.
+.TP
.B \-\-addtag \fITag
Add the tag \fITag\fP to a PV, VG or LV.
Supply this argument multiple times to add more than one tag at once.
@@ -291,7 +315,9 @@ placing two stripes on the same Physical Volume.
.IR \fB\-\-commandprofile " " \fIProfileName
Selects the command configuration profile to use when processing an LVM command.
See also \fBlvm.conf\fP(5) for more information about \fBcommand profile config\fP and
-the way it fits with other LVM configuration methods.
+the way it fits with other LVM configuration methods. Using \fB\-\-commandprofile\fP
+option overrides any command profile specified via \fBLVM_COMMAND_PROFILE\fP
+environment variable.
.TP
.IR \fB\-\-metadataprofile " " \fIProfileName
Selects the metadata configuration profile to use when processing an LVM command.
@@ -305,8 +331,8 @@ way it fits with other LVM configuration methods.
.IR \fB\-\-profile " " \fIProfileName
A short form of \fB\-\-metadataprofile\fP for \fBvgcreate\fP, \fBlvcreate\fP,
\fBvgchange\fP and \fBlvchange\fP command and a short form of \fB\-\-commandprofile\fP
-for any other command (with the exception of \fBdumpconfig\fP command where the
-\-\-profile has special meaning, see \fBlvm dumpconfig\fP(8) for more information).
+for any other command (with the exception of \fBlvmconfig\fP command where the
+\-\-profile has special meaning, see \fBlvmconfig\fP(8) for more information).
.TP
.IR \fB\-\-config " " \fIConfigurationString
Uses the ConfigurationString as direct string representation of the configuration
@@ -459,6 +485,10 @@ that can be used in selection, see the output of \fB<lvm reporting command> -S h
\fB[\fP \(em List start
.IP
\fB]\fP \(em List end
+.IP
+\fB{\fP \(em List subset start
+.IP
+\fB}\fP \(em List subset end
.LP
\fBInformal grammar specification\fP:
.IP
@@ -466,7 +496,11 @@ that can be used in selection, see the output of \fB<lvm reporting command> -S h
.IP
.BR VALUE " = " [VALUE " log_op " VALUE]
.br
-For list-based types: string list. The log_op must always be of one type within the whole list value.
+For list-based types: string list. Matches strictly. The log_op must always be of one type within the whole list value.
+.IP
+.BR VALUE " = " {VALUE " log_op " VALUE}
+.br
+For list-based types: string list. Matches a subset. The log_op must always be of one type within the whole list value.
.IP
.BR VALUE " = " value
.br
@@ -480,13 +514,16 @@ All tools return a status code of zero on success or non-zero on failure.
Directory containing \fI.lvm_history\fP if the internal readline
shell is invoked.
.TP
+.B LVM_COMMAND_PROFILE
+Name of default command profile to use for LVM commands. This profile
+is overriden by direct use of \fB\-\-commandprofile\fP command line option.
+.TP
.B LVM_SYSTEM_DIR
Directory containing \fBlvm.conf\fP(5) and other LVM system files.
Defaults to "#DEFAULT_SYS_DIR#".
.TP
.B LVM_SUPPRESS_FD_WARNINGS
-Suppress warnings about openned file descriptors, when lvm command
-is executed.
+Suppress warnings about unexpected file descriptors passed into LVM.
.TP
.B LVM_VG_NAME
The Volume Group name that is assumed for
@@ -494,10 +531,40 @@ any reference to a Logical Volume that doesn't specify a path.
Not set by default.
.TP
.B LVM_LVMETAD_PIDFILE
-Path for the lvmetad pid file.
+Path to the file that stores the lvmetad process ID.
.TP
.B LVM_LVMETAD_SOCKET
-Path for the lvmetad socket file.
+Path to the socket used to communicate with lvmetad.
+.TP
+.B LVM_LVMPOLLD_PIDFILE
+Path to the file that stores the lvmpolld process ID.
+.TP
+.B LVM_LVMPOLLD_SOCKET
+Path to the socket used to communicate with lvmpolld..
+.TP
+.B LVM_LOG_FILE_EPOCH
+A string of up to 32 letters appended to the log filename and
+followed by the process ID and a timestamp. When set, each process logs to a
+separate file.
+.TP
+.B LVM_EXPECTED_EXIT_STATUS
+The status anticipated when the process exits. Use ">N" to match any
+status greater than N. If the actual exit status matches and a log
+file got produced, it is deleted.
+
+LVM_LOG_FILE_EPOCH and LVM_EXPECTED_EXIT_STATUS together allow
+automated test scripts to discard uninteresting log data.
+.TP
+.B LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES
+Used to suppress warning messages when the configured locking is known
+to be unavailable.
+.TP
+.B DM_ABORT_ON_INTERNAL_ERRORS
+Abort processing if the code detects a non-fatal internal error.
+.TP
+.B DM_DISABLE_UDEV
+Avoid interaction with udev. LVM will manage the relevant nodes in /dev
+directly.
.SH FILES
.I #DEFAULT_SYS_DIR#/lvm.conf
.br
@@ -506,7 +573,6 @@ Path for the lvmetad socket file.
.BR lvm.conf (5),
.BR lvmcache (7),
.BR lvmthin (7),
-.BR lvm\ dumpconfig (8),
.BR clvmd (8),
.BR dmsetup (8),
.BR lvchange (8),
@@ -514,6 +580,7 @@ Path for the lvmetad socket file.
.BR lvdisplay (8),
.BR lvextend (8),
.BR lvmchange (8),
+.BR lvmconfig (8),
.BR lvmdiskscan (8),
.BR lvreduce (8),
.BR lvremove (8),
diff --git a/man/lvm.conf.5.in b/man/lvm.conf.5.in
index 288bc42ef..ae884beec 100644
--- a/man/lvm.conf.5.in
+++ b/man/lvm.conf.5.in
@@ -65,8 +65,8 @@ For this purpose, there's the \fBcommand_profile_template.profile\fP
(for metadata profiles) which contain all settings that are customizable
by profiles of certain type. Users are encouraged to copy these template
profiles and edit them as needed. Alternatively, the
-\fBlvm dumpconfig \-\-file <ProfileName.profile> \-\-type profilable-command <section>\fP
-or \fBlvm dumpconfig \-\-file <ProfileName.profile> \-\-type profilable-metadata <section>\fP
+\fBlvmconfig \-\-file <ProfileName.profile> \-\-type profilable-command <section>\fP
+or \fBlvmconfig \-\-file <ProfileName.profile> \-\-type profilable-metadata <section>\fP
can be used to generate a configuration with profilable settings in either
of the type for given section and save it to new ProfileName.profile
(if the section is not specified, all profilable settings are reported).
@@ -87,11 +87,11 @@ When several configuration methods are used at the same time
and when LVM looks for the value of a particular setting, it traverses
this \fBconfig cascade\fP from left to right:
-\fBdirect config override on command line\fP -> \fBcommand profile config\fP -> \fBmetadata profile config\fP -> \fBtag config\fP -> \fBlvm.conf\fP
+\fBdirect config override on command line\fP -> \fBcommand profile config\fP -> \fBmetadata profile config\fP -> \fBtag config\fP -> \fBlvmlocal.conf\fB -> \fBlvm.conf\fP
No part of this cascade is compulsory. If there's no setting value found at
the end of the cascade, a default value is used for that setting.
-Use \fBlvm dumpconfig\fP to check what settings are in use and what
+Use \fBlvmconfig\fP to check what settings are in use and what
the default values are.
.SH SYNTAX
.LP
@@ -155,502 +155,48 @@ An empty array is acceptable.
Strings with spaces must be enclosed in double quotes, single words that start
with a letter can be left unquoted.
-.SH SECTIONS
-.LP
-The sections that may be present in the file are:
-.TP
-\fBdevices\fP \(em Device settings
-.IP
-\fBdir\fP \(em Directory in which to create volume group device nodes.
-Defaults to "/dev". Commands also accept this as a prefix on volume
-group names.
-.IP
-\fBscan\fP \(em List of directories to scan recursively for
-LVM physical volumes.
-Devices in directories outside this hierarchy will be ignored.
-Defaults to "/dev".
-.IP
-\fBpreferred_names\fP \(em List of patterns compared in turn against
-all the pathnames referencing the same device in in the scanned directories.
-The pathname that matches the earliest pattern in the list is the
-one used in any output. As an example, if device-mapper multipathing
-is used, the following will select multipath device names:
+.SH SETTINGS
+
+The
+.B lvmconfig
+command prints the LVM configuration settings in various ways.
+See the man page
+.BR lvmconfig (8).
+
+Command to print a list of all possible config settings, with their
+default values:
.br
-\fBdevices { preferred_names = [ "^/dev/mapper/mpath" ] }\fP
-.IP
-\fBfilter\fP \(em List of patterns to apply to devices found by a scan.
-Patterns are regular expressions delimited by any character and preceded
-by \fBa\fP (for accept) or \fBr\fP (for reject). The list is traversed
-in order, and the first regex that matches determines if the device
-will be accepted or rejected (ignored). Devices that don't match
-any patterns are accepted. If you want to reject patterns that
-don't match, end the list with "r/.*/".
-If there are several names for the same device (e.g. symbolic links
-in /dev), if the first matching pattern in the list for any of the names is an
-\fBa\fP pattern, the device is accepted; otherwise if the first matching
-pattern in the list for any of the names is an \fBr\fP pattern it is rejected;
-otherwise it is accepted. As an example, to ignore /dev/cdrom you could use:
+.B lvmconfig \-\-type default
+
+Command to print a list of all possible config settings, with their
+default values, and a full description of each as a comment:
.br
-\fBdevices { filter=["r|cdrom|"] }\fP
-.IP
-\fBglobal_filter\fP \(em Since "filter" might get overridden from the command line, it
-is not suitable for system-wide device filtering (udev rules, lvmetad). To hide
-devices from LVM-specific udev processing and/or from lvmetad, you need to set
-global_filter. The syntax is the same as for normal "filter" above. Devices that
-fail the global_filter are not even opened by LVM.
-.IP
-\fBcache_dir\fP \(em Persistent filter cache file directory.
-Defaults to "#DEFAULT_CACHE_DIR#".
-.IP
-\fBwrite_cache_state\fP \(em Set to 0 to disable the writing out of the
-persistent filter cache file when \fBlvm\fP exits.
-Defaults to 1.
-.IP
-\fBtypes\fP \(em List of pairs of additional acceptable block device types
-found in /proc/devices together with maximum (non-zero) number of
-partitions (normally 16). By default, LVM2 supports ide, sd, md, loop,
-dasd, dac960, nbd, ida, cciss, ubd, ataraid, drbd, power2, i2o_block
-and iseries/vd. Block devices with major
-numbers of different types are ignored by LVM2.
-Example: \fBtypes = ["fd", 16]\fP.
-To create physical volumes on device-mapper volumes
-created outside LVM2, perhaps encrypted ones from \fBcryptsetup\fP,
-you'll need \fBtypes = ["device-mapper", 16]\fP. But if you do this,
-be careful to avoid recursion within LVM2. The figure for number
-of partitions is not currently used in LVM2 - and might never be.
-.IP
-\fBsysfs_scan\fP \(em If set to 1 and your kernel supports sysfs and
-it is mounted, sysfs will be used as a quick way of filtering out
-block devices that are not present.
-.IP
-\fBmd_component_detection\fP \(em If set to 1, LVM2 will ignore devices
-used as components of software RAID (md) devices by looking for md
-superblocks. This doesn't always work satisfactorily e.g. if a device
-has been reused without wiping the md superblocks first.
-.IP
-\fBmd_chunk_alignment\fP \(em If set to 1, and a Physical Volume is placed
-directly upon an md device, LVM2 will align its data blocks with the
-md device's stripe-width.
-.IP
-\fBdata_alignment_detection\fP \(em If set to 1, and your kernel provides
-topology information in sysfs for the Physical Volume, the start of data
-area will be aligned on a multiple of the ’minimum_io_size’ or
-’optimal_io_size’ exposed in sysfs. minimum_io_size is the smallest
-request the device can perform without incurring a read-modify-write
-penalty (e.g. MD's chunk size). optimal_io_size is the device's
-preferred unit of receiving I/O (e.g. MD's stripe width). minimum_io_size
-is used if optimal_io_size is undefined (0). If both \fBmd_chunk_alignment\fP
-and \fBdata_alignment_detection\fP are enabled the result of
-\fBdata_alignment_detection\fP is used.
-.IP
-\fBdata_alignment\fP \(em Default alignment (in KB) of start of data area
-when creating a new Physical Volume using the \fBlvm2\fP format.
-If a Physical Volume is placed directly upon an md device and
-\fBmd_chunk_alignment\fP or \fBdata_alignment_detection\fP is enabled
-this parameter is ignored. Set to 0 to use the default alignment of
-64KB or the page size, if larger.
-.IP
-\fBdata_alignment_offset_detection\fP \(em If set to 1, and your kernel
-provides topology information in sysfs for the Physical Volume, the
-start of the aligned data area of the Physical Volume will be shifted
-by the alignment_offset exposed in sysfs.
-.sp
-To see the location of the first Physical Extent of an existing Physical Volume
-use \fBpvs \-o +pe_start\fP . It will be a multiple of the requested
-\fBdata_alignment\fP plus the alignment_offset from
-\fBdata_alignment_offset_detection\fP (if enabled) or the pvcreate
-commandline.
-.IP
-\fBdisable_after_error_count\fP \(em During each LVM operation errors received
-from each device are counted. If the counter of a particular device exceeds
-the limit set here, no further I/O is sent to that device for the remainder of
-the respective operation. Setting the parameter to 0 disables the counters
-altogether.
-.IP
-\fBpv_min_size\fP \(em
-Minimal size (in KB) of the block device which can be used as a PV.
-In clustered environment all nodes have to use the same value.
-Any value smaller than 512KB is ignored. Up to and include version 2.02.84
-the default was 512KB. From 2.02.85 onwards it was changed to 2MB to
-avoid floppy drives by default.
-.IP
-\fBissue_discards\fP \(em
-Issue discards to a logical volumes's underlying physical volume(s) when the
-logical volume is no longer using the physical volumes' space (e.g. lvremove,
-lvreduce, etc). Discards inform the storage that a region is no longer in use.
-Storage that supports discards advertise the protocol specific way discards
-should be issued by the kernel (TRIM, UNMAP, or WRITE SAME with UNMAP bit set).
-Not all storage will support or benefit from discards but SSDs and thinly
-provisioned LUNs generally do. If set to 1, discards will only be issued if
-both the storage and kernel provide support.
-.IP
-.TP
-\fBallocation\fP \(em Space allocation policies
-.IP
-\fBcling_tag_list\fP \(em List of PV tags matched by the \fBcling\fP allocation policy.
-.IP
-When searching for free space to extend an LV, the \fBcling\fP
-allocation policy will choose space on the same PVs as the last
-segment of the existing LV. If there is insufficient space and a
-list of tags is defined here, it will check whether any of them are
-attached to the PVs concerned and then seek to match those PV tags
-between existing extents and new extents.
-.IP
-The @ prefix for tags is required.
-Use the special tag "@*" as a wildcard to match any PV tag and so use
-all PV tags for this purpose.
-.IP
-For example, LVs are mirrored between two sites within a single VG.
-PVs are tagged with either @site1 or @site2 to indicate where
-they are situated and these two PV tags are selected for use with this
-allocation policy:
-.IP
-cling_tag_list = [ "@site1", "@site2" ]
-.IP
-\fBcache_pool_cachemode\fP \(em Cache mode for new cache pools.
-.IP
-This is the default cache mode a new cache pool will be given.
-Valid cache modes are:
-\fBwritethrough\fP - Data blocks are immediately written from the
-cache to disk.
-\fBwriteback\fP - Data blocks are written from the cache
-back to disk after some delay to improve performance.
-.TP
-\fBlog\fP \(em Default log settings
-.IP
-\fBfile\fP \(em Location of log file. If this entry is not present, no
-log file is written.
-.IP
-\fBoverwrite\fP \(em Set to 1 to overwrite the log file each time a tool
-is invoked. By default tools append messages to the log file.
-.IP
-\fBlevel\fP \(em Log level (0-9) of messages to write to the file.
-9 is the most verbose; 0 should produce no output.
-.IP
-\fBverbose\fP \(em Default level (0-3) of messages sent to stdout or stderr.
-3 is the most verbose; 0 should produce the least output.
-.IP
-\fBsilent\fP \(em Set to 1 to suppress all non-essential tool output.
-When set, display and reporting tools will still write the requested
-device properties to standard output, but messages confirming that
-something was or wasn't changed will be reduced to the 'verbose' level
-and not appear unless \-v is supplied.
-.IP
-\fBsyslog\fP \(em Set to 1 (the default) to send log messages through syslog.
-Turn off by setting to 0. If you set to an integer greater than one,
-this is used - unvalidated - as the facility. The default is LOG_USER.
-See /usr/include/sys/syslog.h for safe facility values to use.
-For example, LOG_LOCAL0 might be 128.
-.IP
-\fBindent\fP \(em When set to 1 (the default) messages are indented
-according to their severity, two spaces per level.
-Set to 0 to turn off indentation.
-.IP
-\fBcommand_names\fP \(em When set to 1, the command name is used as a
-prefix for each message.
-Default is 0 (off).
-.IP
-\fBprefix\fP \(em Prefix used for all messages (after the command name).
-Default is two spaces.
-.IP
-\fBactivation\fP \(em Set to 1 to log messages while
-devices are suspended during activation.
-Only set this temporarily while debugging a problem because
-in low memory situations this setting can cause your machine to lock up.
-.TP
-\fBbackup\fP \(em Configuration for metadata backups.
-.IP
-\fBarchive_dir\fP \(em Directory used for automatic metadata archives.
-Backup copies of former metadata for each volume group are archived here.
-Defaults to "#DEFAULT_ARCHIVE_DIR#".
-.IP
-\fBbackup_dir\fP \(em Directory used for automatic metadata backups.
-A single backup copy of the current metadata for each volume group
-is stored here.
-Defaults to "#DEFAULT_BACKUP_DIR#".
-.IP
-\fBarchive\fP \(em Whether or not tools automatically archive existing
-metadata into \fBarchive_dir\fP before making changes to it.
-Default is 1 (automatic archives enabled).
-Set to 0 to disable.
-Disabling this might make metadata recovery difficult or impossible
-if something goes wrong.
-.IP
-\fBbackup\fP \(em Whether or not tools make an automatic backup
-into \fBbackup_dir\fP after changing metadata.
-Default is 1 (automatic backups enabled). Set to 0 to disable.
-Disabling this might make metadata recovery difficult or impossible
-if something goes wrong.
-.IP
-\fBretain_min\fP \(em Minimum number of archives to keep.
-Defaults to 10.
-.IP
-\fBretain_days\fP \(em Minimum number of days to keep archive files.
-Defaults to 30.
-.TP
-\fBshell\fP \(em LVM2 built-in readline shell settings
-.IP
-\fBhistory_size\fP \(em Maximum number of lines of shell history to retain (default 100) in $HOME/.lvm_history
-.TP
-\fBglobal\fP \(em Global settings
-.IP
-\fBtest\fP \(em If set to 1, run tools in test mode i.e. no changes to
-the on-disk metadata will get made. It's equivalent to having the
--t option on every command.
-.IP
-\fBactivation\fP \(em Set to 0 to turn off all communication with
-the device-mapper driver. Useful if you want to manipulate logical
-volumes while device-mapper is not present in your kernel.
-.IP
-\fBproc\fP \(em Mount point of proc filesystem.
-Defaults to /proc.
-.IP
-\fBumask\fP \(em File creation mask for any files and directories created.
-Interpreted as octal if the first digit is zero.
-Defaults to 077.
-Use 022 to allow other users to read the files by default.
-.IP
-\fBformat\fP \(em The default value of \fB\-\-metadatatype\fP used
-to determine which format of metadata to use when creating new
-physical volumes and volume groups. \fBlvm1\fP or \fBlvm2\fP.
-.IP
-\fBfallback_to_lvm1\fP \(em Set this to 1 if you need to
-be able to switch between 2.4 kernels using LVM1 and kernels
-including device-mapper.
-The LVM2 tools should be installed as normal and
-the LVM1 tools should be installed with a .lvm1 suffix e.g.
-vgscan.lvm1.
-If an LVM2 tool is then run but unable to communicate
-with device-mapper, it will automatically invoke the equivalent LVM1
-version of the tool. Note that for LVM1 tools to
-manipulate physical volumes and volume groups created by LVM2 you
-must use \fB\-\-metadataformat lvm1\fP when creating them.
-.IP
-\fBlibrary_dir\fP \(em A directory searched for LVM2's shared libraries
-ahead of the places \fBdlopen\fP (3) searches.
-.IP
-\fBformat_libraries\fP \(em A list of shared libraries to load that contain
-code to process different formats of metadata. For example, liblvm2formatpool.so
-is needed to read GFS pool metadata if LVM2 was configured \fB\-\-with-pool=shared\fP.
-.IP
-\fBlocking_type\fP \(em What type of locking to use.
-1 is the default, which use flocks on files in \fBlocking_dir\fP
-(see below) to
-avoid conflicting LVM2 commands running concurrently on a single
-machine. 0 disables locking and risks corrupting your metadata.
-If set to 2, the tools will load the external \fBlocking_library\fP
-(see below).
-If the tools were configured \fB\-\-with-cluster=internal\fP
-(the default) then 3 means to use built-in cluster-wide locking.
-Type 4 enforces read-only metadata and forbids any operations that
-might want to modify Volume Group metadata.
-All changes to logical volumes and their states are communicated
-using locks.
-.IP
-\fBwait_for_locks\fP \(em When set to 1, the default, the tools
-wait if a lock request cannot be satisfied immediately.
-When set to 0, the operation is aborted instead.
-.IP
-\fBlocking_dir\fP \(em The directory LVM2 places its file locks
-if \fBlocking_type\fP is set to 1. The default is \fB/var/lock/lvm\fP.
-.IP
-\fBlocking_library\fP \(em The name of the external locking
-library to load if \fBlocking_type\fP is set to 2.
-The default is \fBliblvm2clusterlock.so\fP. If you need to write
-such a library, look at the lib/locking source code directory.
-.IP
-\fBuse_lvmetad\fP \(em Whether to use (trust) a running instance of lvmetad. If
-this is set to 0, all commands fall back to the usual scanning mechanisms. When
-set to 1 \fBand\fP when lvmetad is running (it is not auto-started), the volume
-group metadata and PV state flags are obtained from the lvmetad instance and no
-scanning is done by the individual commands. In a setup with lvmetad, lvmetad
-udev rules \fBmust\fP be set up for LVM to work correctly. Without proper udev
-rules, all changes in block device configuration will be \fBignored\fP until a
-manual 'pvscan \-\-cache' is performed.
+.B lvmconfig \-\-type default --withcomments
+
+Command to print a list of all possible config settings, with their
+current values (configured, non-default values are shown):
.br
-If lvmetad has been running while use_lvmetad was 0, it \fBMUST\fP be stopped before
-changing use_lvmetad to 1 and started again afterwards.
-.TP
-\fBtags\fP \(em Host tag settings
-.IP
-\fBhosttags\fP \(em If set to 1, create a host tag with the machine name.
-Setting this to 0 does nothing, neither creating nor destroying any tag.
-The machine name used is the nodename as returned by \fBuname\fP (2).
-.IP
-Additional host tags to be set can be listed here as subsections.
-The @ prefix for tags is optional.
-Each of these host tag subsections can contain a \fBhost_list\fP
-array of host names. If any one of these entries matches the machine
-name exactly then the host tag gets defined on this particular host,
-otherwise it doesn't.
-.IP
-After lvm.conf has been processed, LVM2 works through each host
-tag that has been defined in turn, and if there is a configuration
-file called lvm_\fB<host_tag>\fP.conf it attempts to load it.
-The activation/volume_list, devices/filter and devices/types settings are merged
-(these all are lists), otherwise any settings read in override settings found in
-earlier files. Any additional host tags defined get appended to the search list,
-so in turn they can lead to further configuration files being processed.
-Use \fBlvm dumpconfig\fP to check the result of config
-file processing.
-.IP
-The following example always sets host tags \fBtag1\fP and
-sets \fBtag2\fP on machines fs1 and fs2:
-.IP
-tags { tag1 { } tag2 { host_list = [ "fs1", "fs2" ] } }
-.IP
-These options are useful if you are replicating configuration files
-around a cluster. Use of \fBhosttags = 1\fP means every machine
-can have static and identical local configuration files yet use
-different settings and activate different logical volumes by
-default. See also \fBvolume_list\fP below and \fB\-\-addtag\fP
-in \fBlvm\fP (8).
-.TP
-\fBactivation\fP \(em Settings affecting device-mapper activation
-.IP
-\fBmissing_stripe_filler\fP \(em When activating an incomplete logical
-volume in partial mode, this option dictates how the missing data is
-replaced. A value of "error" will cause activation to create error
-mappings for the missing data, meaning that read access to missing
-portions of the volume will result in I/O errors. You can instead also
-use a device path, and in that case this device will be used in place of
-missing stripes. However, note that using anything other than
-"error" with mirrored or snapshotted volumes is likely to result in data
-corruption. For instructions on how to create a device that always
-returns zeros, see \fBlvcreate\fP (8).
-.IP
-\fBmirror_region_size\fP \(em Unit size in KB for copy operations
-when mirroring.
-.IP
-\fBreadahead\fP \(em Used when there is no readahead value stored
-in the volume group metadata. Set to \fBnone\fP to disable
-readahead in these circumstances or \fBauto\fP to use the default
-value chosen by the kernel.
-.IP
-\fBreserved_memory\fP, \fBreserved_stack\fP \(em How many KB to reserve
-for LVM2 to use while logical volumes are suspended. If insufficient
-memory is reserved before suspension, there is a risk of machine deadlock.
-.IP
-\fBprocess_priority\fP \(em The nice value to use while devices are
-suspended. This is set to a high priority so that logical volumes
-are suspended (with I/O generated by other processes to those
-logical volumes getting queued) for the shortest possible time.
-.IP
-\fBvolume_list\fP \(em This acts as a filter through which
-all requests to activate a logical volume on this machine
-are passed. A logical volume is only activated if it matches
-an item in the list. Tags must be preceded by @ and are checked
-against all tags defined in the logical volume and volume group
-metadata for a match.
-@* is short-hand to check every tag set on the host machine (see
-\fBtags\fP above).
-Logical volume and volume groups can also be included in the list
-by name e.g. vg00, vg00/lvol1.
-If this setting is not present but at least one host tag is defined
-then a default single-entry list containing @* is assumed.
-.IP
-\fBauto_activation_volume_list\fP \(em This acts as a filter through
-which all requests to autoactivate a logical volume on this machine
-are passed. A logical volume is autoactivated if it matches
-an item in the list. Volumes must also pass the \fBvolume_list\fP
-filter, if present. Tags must be preceded by @ and are checked against
-all tags defined in the logical volume and volume group metadata for
-a match. @* is short-hand to check every tag set on the host machine
-(see \fBtags\fP above).
-Logical volume and volume groups can also be included in the list
-by name e.g. vg00, vg00/lvol1.
-.IP
-\fBread_only_volume_list\fP \(em This acts as a filter through
-which all requests to activate a logical volume on this machine
-are passed. A logical volume is activated in read-only mode (instead
-of read-write) if it matches an item in the list. Volumes must first
-pass the \fBvolume_list\fP filter, if present. Tags must be preceded
-by @ and are checked against all tags defined in the logical volume
-and volume group metadata for a match.
-@* is short-hand to check every tag set on the host machine (see
-\fBtags\fP above).
-Logical volume and volume groups can also be included in the list
-by name e.g. vg00, vg00/lvol1.
-.TP
-\fBmetadata\fP \(em Advanced metadata settings
-.IP
-\fBpvmetadatacopies\fP \(em When creating a physical volume using the
-LVM2 metadata format, this is the default number of copies of metadata
-to store on each physical volume.
-Currently it can be set to 0, 1 or 2. The default is 1.
-If set to 2, one copy is placed at the beginning of the disk
-and the other is placed at the end.
-It can be overridden on the command line with \fB\-\-pvmetadatacopies\fP
-(see \fBpvcreate\fP).
-If creating a volume group with just one physical volume, it's a
-good idea to have 2 copies. If creating a large volume group with
-many physical volumes, you may decide that 3 copies of the metadata
-is sufficient, i.e. setting it to 1 on three of the physical volumes,
-and 0 on the rest. Every volume group must contain at least one
-physical volume with at least 1 copy of the metadata (unless using
-the text files described below). The disadvantage of having lots
-of copies is that every time the tools access the volume group, every
-copy of the metadata has to be accessed, and this slows down the
-tools.
-.IP
-\fBpvmetadatasize\fP \(em Approximate number of sectors to set aside
-for each copy of the metadata. Volume groups with large numbers of
-physical or logical volumes, or volumes groups containing complex
-logical volume structures will need additional space for their metadata.
-The metadata areas are treated as circular buffers, so
-unused space becomes filled with an archive of the most recent
-previous versions of the metadata.
-.IP
-\fBpvmetadataignore\fP When creating a physical volume using the LVM2
-metadata format, this states whether metadata areas should be ignored.
-The default is "n". If metadata areas on a physical volume are ignored,
-LVM will not not store metadata in the metadata areas present on newly
-created Physical Volumes. The option can be overridden on the command
-line with \fB\-\-metadataignore\fP (See \fBpvcreate\fP and \fBpvchange\fP).
-Metadata areas cannot be created or extended after Logical Volumes have
-been allocated on the device.
-If you do not want to store metadata on this device, it is still wise
-always to allocate a metadata area (use a non-zero value for
-\fB\-\-pvmetadatacopies\fP) in case you need it in the future and to use
-this option to instruct LVM2 to ignore it.
-.IP
-\fBvgmetadatacopies\fP \(em When creating a volume group using the
-LVM2 metadata format, this is the default number of copies of metadata
-desired across all the physical volumes in the volume group. If set to
-a non-zero value, LVM will automatically set or clear the metadataignore
-flag on the physical volumes (see \fBpvcreate\fP and \fBpvchange\fP
-\fB\-\-metadataignore\fP) in order to achieve the desired number of metadata
-copies. An LVM command that adds or removes physical volumes (for example,
-\fBvgextend\fP, \fBvgreduce\fP, \fBvgsplit\fP, or \fBvgmerge\fP), may cause
-LVM to automatically set or clear the metadataignore flags. Also, if
-physical volumes go missing or reappear, or a new number of copies is
-explicitly set (see \fBvgchange \-\-vgmetadatacopies\fP), LVM may adjust
-the metadataignore flags.
-Set \fBvgmetadatacopies\fP to 0 instructs LVM not to set or clear the
-metadataignore flags automatically. You may set a value larger than the
-sum of all metadata areas on all physical volumes. The value can
-be overridden on the command line with \fB\-\-vgmetadatacopies\fP for various
-commands (for example, \fBvgcreate\fP and \fBvgchange\fP), and can be
-queryied with the \fBvg_mda_copies\fP field of \fBvgs\fP. This option
-is useful for volume groups containing large numbers of physical volumes
-with metadata as it may be used to minimize metadata read and write overhead.
-.IP
-\fBdirs\fP \(em List of directories holding live copies of LVM2
-metadata as text files. These directories must not be on logical
-volumes. It is possible to use LVM2 with a couple of directories
-here, preferably on different (non-logical-volume) filesystems
-and with no other on-disk metadata, \fBpvmetadatacopies = 0\fP.
-Alternatively these directories can be in addition to the
-on-disk metadata areas. This feature was created during the
-development of the LVM2 metadata before the new on-disk metadata
-areas were designed and no longer gets tested.
-It is not supported under low-memory conditions, and it is
-important never to edit these metadata files unless you fully
-understand how things work: to make changes you should always use
-the tools as normal, or else vgcfgbackup, edit backup, vgcfgrestore.
+.B lvmconfig \-\-type current
+
+Command to print all config settings that have been configured with a
+different value than the default (configured, non-default values are
+shown):
+.br
+.B lvmconfig \-\-type diff
+
+Command to print a single config setting, with its default value,
+and a full description, where "Section" refers to the config section,
+e.g. global, and "Setting" refers to the name of the specific setting,
+e.g. umask:
+.br
+.B lvmconfig \-\-type default --withcomments Section/Setting
+
+
.SH FILES
.I #DEFAULT_SYS_DIR#/lvm.conf
.br
+.I #DEFAULT_SYS_DIR#/lvmlocal.conf
+.br
.I #DEFAULT_ARCHIVE_DIR#
.br
.I #DEFAULT_BACKUP_DIR#
@@ -658,10 +204,10 @@ the tools as normal, or else vgcfgbackup, edit backup, vgcfgrestore.
.I #DEFAULT_CACHE_DIR#/.cache
.br
.I #DEFAULT_LOCK_DIR#
+.br
+.I #DEFAULT_PROFILE_DIR#
+
.SH SEE ALSO
-.BR lvm (8),
-.BR umask (2),
-.BR uname (2),
-.BR dlopen (3),
-.BR syslog (3),
-.BR syslog.conf (5)
+.BR lvm (8)
+.BR lvmconfig (8)
+
diff --git a/man/lvmcache.7.in b/man/lvmcache.7.in
index 24ce5b4ed..725738634 100644
--- a/man/lvmcache.7.in
+++ b/man/lvmcache.7.in
@@ -16,7 +16,7 @@ origin LV to increase speed. The cache metadata LV holds the
accounting information that specifies where data blocks are stored (e.g.
on the origin LV or on the cache data LV). Users should be familiar with
these LVs if they wish to create the best and most robust cached
-logical volumes.
+logical volumes. All of these associated LVs must be in the same VG.
.SH Cache Terms
.nf
@@ -254,6 +254,43 @@ defines the default cache mode.
# lvconvert \-\-type cache \-\-cachepool vg/cache1 vg/lv1
.fi
+The cache mode can be changed on an existing LV with the command:
+
+.B lvconvert --cachemode writethrough|writeback VG/CacheLV
+
+
+.SS Cache policy & policy settings
+
+\&
+
+The cache subsystem has an additional per-LV parameter, namely the cache policy
+to use, and possibly the tunable parameters of the said cache policy. In the
+current implementation, two policies are available, "mq" which is the default
+policy and "cleaner" which is used to force the cache to write back (flush) all
+cached writes to the origin LV. Moreover, the "mq" policy has a number of
+tunable parameters: the defaults are chosen to be suitable for the vast
+majority of systems. However, under special circumstances, changing the tunable
+settings of the cache policy can improve performance.
+
+On an existing cache LV, the policy can be set (to "mq") and the cache settings
+can be changed using commands like these:
+
+.I Example
+
+.nf
+# lvchange \-\-cachepolicy mq vg/lv1
+# lvchange \-\-cachesettings \(aqmigration_threshold=2048 random_threshold=4\(aq \\
+ vg/lv1
+.fi
+
+Both commands can be combined, setting both cache policy and its settings
+together. Moreover, when creating a cache LV for the first time (using
+lvcreate), the \-\-cachepolicy and \-\-cachesettings parameters can be used as
+well. The current policy and the policy settings can be listed using the lvs
+command, using 'cache_policy' and 'cache_settings' fields:
+
+# lvs -o +cache_policy,cache_settings
+
.SS Spare metadata LV
\&
diff --git a/man/lvmconf.8.in b/man/lvmconf.8.in
index bfd539a70..1ce706139 100644
--- a/man/lvmconf.8.in
+++ b/man/lvmconf.8.in
@@ -6,16 +6,23 @@ lvmconf \(em LVM configuration modifier
.B lvmconf
.RB [ \-\-disable-cluster ]
.RB [ \-\-enable-cluster ]
+.RB [ \-\---enable-halvm ]
+.RB [ \-\---disable-halvm ]
.RB [ \-\-file
.RI < configfile >]
.RB [ \-\-lockinglib
.RI < lib >]
.RB [ \-\-lockinglibdir
.RI < dir >]
+.RB [ \-\-services ]
+.RB [ \-\-mirrorservice ]
+.RB [ \-\-startstopservices ]
.SH "DESCRIPTION"
lvmconf is a script that modifies the locking configuration in
-an lvm configuration file. See \fBlvm.conf\fP(5).
+an lvm configuration file. See \fBlvm.conf\fP(5). In addition
+to that, it can also set Systemd or SysV services according to
+changes in the lvm configuration if needed.
.SH "OPTIONS"
.TP
@@ -27,6 +34,14 @@ lvmetad use to its default.
Set \fBlocking_type\fR to the default clustered type on this system.
Also disable lvmetad use as it is not yet supported in clustered environment.
.TP
+.BR \-\-disable-halvm
+Set \fBlocking_type\fR to the default non-clustered type. Also reset
+lvmetad use to its default.
+.TP
+.BR \-\-enable-halvm
+Set \fBlocking_type\fR suitable for HA LVM use.
+Also disable lvmetad use as it is not yet supported in HA LVM environment.
+.TP
.BR \-\-file " <" \fIconfigfile >
Apply the changes to \fIconfigfile\fP instead of the default
\fI#DEFAULT_SYS_DIR#/lvm.conf\fP.
@@ -35,6 +50,18 @@ Apply the changes to \fIconfigfile\fP instead of the default
Set external \fBlocking_library\fR locking library to load if an external locking type is used.
.TP
.BR \-\-lockinglibdir " <" \fIdir >
+.TP
+.BR \-\-services
+In addition to setting the lvm configuration, also enable or disable related Systemd or SysV
+clvmd and lvmetad services. This script does not configure services provided by cluster resource
+agents.
+.TP
+.BR \-\-mirrorservice
+Also enable or disable optional cmirrord service when handling services (applicable only with \-\-services).
+.TP
+.BR \-\-startstopservices
+In addition to enabling or disabling related services, start or stop them immediately
+(applicable only with \-\-services).
.SH FILES
.I #DEFAULT_SYS_DIR#/lvm.conf
diff --git a/man/lvmconfig.8.in b/man/lvmconfig.8.in
new file mode 100644
index 000000000..bbbeb6da3
--- /dev/null
+++ b/man/lvmconfig.8.in
@@ -0,0 +1,211 @@
+.TH "LVMCONFIG" "8" "LVM TOOLS #VERSION#" "Red Hat, Inc" "\""
+.SH "NAME"
+lvmconfig, lvm dumpconfig, lvm config \(em Display LVM configuration
+.SH SYNOPSIS
+.B lvmconfig
+.RB [ \-f | \-\-file
+.IR filename ]
+.RB [ \-\-type
+.RI { current | default | diff | full | list | missing | new | profilable | profilable-command | profilable-metadata }
+.RB [ \-\-atversion
+.IR version ]
+.RB [ \-\-ignoreadvanced ]
+.RB [ \-\-ignoreunsupported ]
+.RB [ \-\-ignorelocal ]
+.RB [ \-l | \-\-list ]
+.RB [ \-\-config
+.IR ConfigurationString ]
+.RB [ \-\-commandprofile
+.IR ProfileName ]
+.RB [ \-\-profile
+.IR ProfileName ]
+.RB [ \-\-metadataprofile
+.IR ProfileName ]
+.RB [ \-\-mergedconfig ]
+.RB [ \-\-showdeprecated ]
+.RB [ \-\-showunsupported ]
+.RB [ \-\-validate ]
+.RB [ \-\-withsummary ]
+.RB [ \-\-withcomments ]
+.RB [ \-\-withspaces ]
+.RB [ \-\-withversions ]
+.RB [ ConfigurationNode... ]
+
+.SH DESCRIPTION
+lvmconfig produces formatted output from the LVM configuration tree.
+The command was added in release 2.02.119 and has an identical longer form
+\fBlvm dumpconfig\fP.
+
+.SH OPTIONS
+.TP
+.BR \-f ", " \-\-file " \fIfilename"
+Send output to a file named 'filename'.
+
+.TP
+.BR \-l ", " \-\-list
+List configuration settings with summarizing comment. This is the same as using
+\fBlvmconfig --type list --withsummary\fP.
+
+.TP
+.IR \fB\-\-type " {" current | default | diff | full | missing | new | profilable | profilable-command | profilable-metadata }
+Select the type of configuration to display. The configuration settings
+displayed have either default values or currently-used values assigned based on
+the type selected. If no type is selected, \fB\-\-type current\fP is used
+by default. Whenever a configuration setting with a default value is
+commented out, it means the setting does not have any concrete default
+value defined. Output can be saved and used as a proper \fBlvm.conf\fP(5)
+file.
+.RS
+.IP current 3
+Display the current \fBlvm.conf\fP configuration merged with any \fBtag
+config\fP if used. See also \fBlvm.conf\fP(5) for more info about LVM
+configuration methods.
+.IP default 3
+Display all possible configuration settings with default values assigned.
+.IP diff 3
+Display all configuration settings for which the values used differ from defaults.
+The value assigned for each configuration setting is the value currently used.
+This is actually minimal LVM configuration which can be used without
+a change to current configured behaviour.
+.IP full 3
+Display full configuration tree - a combination of current configuration tree
+(\fB\-\-type current\fP) and tree of settings for which default values are
+used (\fB\-\-type missing\fP). This is exactly the configuration tree that
+LVM2 uses during command execution. Using this type also implies
+the use of \fB\-\-mergedconfig\fP option. If comments are displayed
+(see \fB\-\-withcomments\fP and \fB\-\-withsummary\fP options), then
+for each setting found in existing configuration and for which defaults
+are not used, there's an extra comment line printed to denote this.
+.IP list 3
+Display plain list of configuration settings.
+.IP missing 3
+Display all configuration settings with default values assigned which are
+missing in the configuration currently used and for which LVM automatically
+fallbacks to using these default values.
+.IP new 3
+Display all new configuration settings introduced in current LVM version
+or specific version as defined by \fB\-\-atversion\fP option.
+.IP profilable 3
+Display all profilable configuration settings with default values assigned.
+See \fBlvm.conf\fP(5) for more info about \fBprofile config\fP method.
+.IP profilable-command 3
+Display all profilable configuration settings with default values assigned
+that can be used in command profile. This is a subset of settings displayed
+by \fB\-\-type \-\-profilable\fP.
+.IP profilable-metadata 3
+Display all profilable configuration settings with default values assigned
+that can be used in metadata profile. This is a subset of settings displayed
+by \fB\-\-type \-\-profilable\fP.
+.RE
+
+.TP
+.BI \-\-atversion " version"
+Specify an LVM version in x.y.z format where x is the major version,
+the y is the minor version and z is the patchlevel (e.g. 2.2.106).
+When configuration is displayed, the configuration settings recognized
+at this LVM version will be considered only. This can be used
+to display a configuration that a certain LVM version understands and
+which does not contain any newer settings for which LVM would
+issue a warning message when checking the configuration.
+
+.TP
+.B \-\-ignoreadvanced
+Exclude advanced configuration settings from the output.
+
+.TP
+.B \-\-ignoreunsupported
+Exclude unsupported configuration settings from the output. These settings are
+either used for debugging and development purposes only or their support is not
+yet complete and they are not meant to be used in production. The \fBcurrent\fP
+and \fBdiff\fP types include unsupported settings in their output by default,
+all the other types ignore unsupported settings.
+
+.TP
+.B \-\-ignorelocal
+Ignore local section.
+
+.TP
+.BI \-\-config " ConfigurationString"
+Use \fBConfigurationString\fP to override existing configuration.
+This configuration is then applied for the lvmconfig command itself.
+See also \fBlvm.conf\fP(5) for more info about \fBconfig cascade\fP.
+
+.TP
+.BI \-\-commandprofile " ProfileName"
+Use \fBProfileName\fP to override existing configuration.
+This configuration is then applied for the lvmconfig command itself.
+See also \fB\-\-mergedconfig\fP option and \fBlvm.conf\fP(5) for
+more info about \fBconfig cascade\fP.
+
+.TP
+.BI \-\-profile " ProfileName"
+The same as using \fB\-\-commandprofile\fP but the configuration is not
+applied for the lvmconfig command itself.
+
+.TP
+.BI \-\-metadataprofile " ProfileName"
+Use \fBProfileName\fP to override existing configuration.
+The configuration defined in metadata profile has no effect for
+the lvmconfig command itself. lvmconfig displays the configuration only.
+See also \fB\-\-mergedconfig\fP option and \fBlvm.conf\fP(5) for more
+info about \fBconfig cascade\fP.
+
+.TP
+.B \-\-mergedconfig
+When the lvmconfig command is run with the \fB\-\-config\fP option
+and/or \fB\-\-commandprofile\fP (or using \fBLVM_COMMAND_PROFILE\fP
+environment variable), \fB\-\-profile\fP, \fB\-\-metadataprofile\fP
+option, merge all the contents of the \fBconfig cascade\fP before displaying it.
+Without the \fB\-\-mergeconfig\fP option used, only the configuration at
+the front of the cascade is displayed. See also \fBlvm.conf\fP(5) for more
+info about \fBconfig cascade\fP.
+
+.TP
+.B \-\-showdeprecated
+Include deprecated configuration settings in the output. These settings
+are always deprecated since certain version. If concrete version is specified
+with \fB--atversion\fP option, deprecated settings are automatically included
+if specified version is lower that the version in which the settings were
+deprecated. The \fBcurrent\fP and \fBdiff\fP types include deprecated settings
+int their output by default, all the other types ignore deprecated settings.
+
+.TP
+.B \-\-showunsupported
+Include unsupported configuration settings in the output. These settings
+are either used for debugging or development purposes only or their support
+is not yet complete and they are not meant to be used in production. The
+\fBcurrent\fP and \fBdiff\fP types include unsupported settings in their
+output by default, all the other types ignore unsupported settings.
+
+.TP
+.B \-\-validate
+Validate current configuration used and exit with appropriate
+return code. The validation is done only for the configuration
+at the front of the \fBconfig cascade\fP. To validate the whole
+merged configuration tree, use also the \fB\-\-mergedconfig\fP option.
+The validation is done even if \fBconfig/checks\fP \fBlvm.conf\fP(5)
+option is disabled.
+
+.TP
+.B \-\-withsummary
+Display a one line comment for each configuration node.
+
+.TP
+.B \-\-withcomments
+Display a full comment for each configuration node. For deprecated
+settings, also display comments about deprecation in addition.
+
+.TP
+.B \-\-withspaces
+Where appropriate, add more spaces in output for better readability.
+
+.TP
+.B \-\-withversions
+Also display a comment containing the version of introduction for
+each configuration node. If the setting is deprecated, also display
+the version since which it is deprecated.
+
+.SH SEE ALSO
+.BR lvm (8)
+.BR lvmconf (8)
+.BR lvm.conf (5)
diff --git a/man/lvmdump.8.in b/man/lvmdump.8.in
index 9a8c46970..f9c22846f 100644
--- a/man/lvmdump.8.in
+++ b/man/lvmdump.8.in
@@ -10,6 +10,7 @@ lvmdump \(em create lvm2 information dumps for diagnostic purposes
.RB [ \-h ]
.RB [ \-l ]
.RB [ \-m ]
+.RB [ \-p ]
.RB [ \-s ]
.RB [ \-u ]
.SH DESCRIPTION
@@ -41,6 +42,8 @@ The content of the tarball is as follows:
.br
- if enabled with \-l, lvmetad state if running
.br
+- if enabled with \-p, lvmpolld state if running
+.br
- if enabled with \-s, system info and context
.br
- if enabled with \-u, udev info and context
@@ -75,6 +78,12 @@ This option generates a 1:1 dump of the metadata area from all PVs visible
to the system, which can cause the dump to increase in size considerably.
However, the metadata dump may represent a valuable diagnostic resource.
.TP
+.B \-p
+Include \fBlvmpolld\fP(8) daemon dump if it is running. The dump contains
+all in-progress operation currently monitored by the daemon and partial
+history for all yet uncollected results of polling operations already finished
+including reason.
+.TP
.B \-s
Gather system info and context. Currently, this encompasses systemd info
and context only: overall state of systemd units present in the system,
diff --git a/man/lvmetad.8.in b/man/lvmetad.8.in
index 4f602496d..cdaf198ca 100644
--- a/man/lvmetad.8.in
+++ b/man/lvmetad.8.in
@@ -1,6 +1,7 @@
.TH LVMETAD 8 "LVM TOOLS #VERSION#" "Red Hat Inc" \" -*- nroff -*-
.SH NAME
lvmetad \(em LVM metadata cache daemon
+
.SH SYNOPSIS
.B lvmetad
.RB [ \-l
@@ -13,16 +14,56 @@ lvmetad \(em LVM metadata cache daemon
.RB [ \-h ]
.RB [ \-V ]
.RB [ \-? ]
+
.SH DESCRIPTION
-lvmetad is a metadata caching daemon for LVM. The daemon receives notifications
-from udev rules (which must be installed for LVM to work correctly when lvmetad
-is in use). Through these notifications, lvmetad has an up-to-date and
-consistent image of the volume groups available in the system.
-
-lvmetad is used by LVM only if it is enabled in \fBlvm.conf\fP(5) by specifying the
-\fBglobal/use_lvmetad\fP setting. If this is not defined in the LVM configuration
-explicitly then default setting is used instead (see the output of
-\fBlvm dumpconfig \-\-type default global/use_lvmetad\fP command).
+
+The lvmetad daemon caches LVM metadata, so that LVM commands can read
+metadata without scanning disks.
+
+Metadata caching can be an advantage because scanning disks is time
+consuming and may interfere with the normal work of the system and disks.
+
+lvmetad does not read metadata from disks itself. The 'pvscan \-\-cache'
+command scans disks, reads the LVM metadata and sends it to lvmetad.
+
+New LVM disks that appear on the system must be scanned by pvscan before
+lvmetad knows about them. If lvmetad does not know about a disk, then LVM
+commands using lvmetad will also not know about it. When disks are added
+or removed from the system, lvmetad must be updated.
+
+lvmetad is usually combined with event-based system services that
+automatically run pvscan \-\-cache on new disks. This way, the lvmetad
+cache is automatically updated with metadata from new disks when they
+appear. LVM udev rules and systemd services implement this automation.
+Automatic scanning is usually combined with automatic activation. For
+more information, see
+.BR pvscan (8).
+
+If lvmetad is started or restarted after disks have been added to the
+system, or if the global_filter has changed, the cache must be updated by
+running 'pvscan \-\-cache'.
+
+When lvmetad is not used, LVM commands revert to scanning disks for LVM
+metadata.
+
+Use of lvmetad is enabled/disabled by:
+.br
+.BR lvm.conf (5)
+.B global/use_lvmetad
+
+For more information on this setting, see:
+.br
+.B lvmconfig \-\-withcomments global/use_lvmetad
+
+To ignore disks from LVM at the system level, e.g. lvmetad, pvscan use:
+.br
+.BR lvm.conf (5)
+.B devices/global_filter
+
+For more information on this setting, see
+.br
+.B lvmconfig \-\-withcomments devices/global_filter
+
.SH OPTIONS
To run the daemon in a test environment both the pidfile_path and the
@@ -68,4 +109,6 @@ Path for the socket file.
.SH SEE ALSO
.BR lvm (8),
-.BR lvm.conf (5)
+.BR lvmconfig (8),
+.BR lvm.conf (5),
+.BR pvscan (8)
diff --git a/man/lvmlockd.8.in b/man/lvmlockd.8.in
new file mode 100644
index 000000000..1daea180b
--- /dev/null
+++ b/man/lvmlockd.8.in
@@ -0,0 +1,781 @@
+.TH "LVMLOCKD" "8" "LVM TOOLS #VERSION#" "Red Hat, Inc" "\""
+
+.SH NAME
+lvmlockd \(em LVM locking daemon
+
+.SH DESCRIPTION
+LVM commands use lvmlockd to coordinate access to shared storage.
+.br
+When LVM is used on devices shared by multiple hosts, locks will:
+
+\[bu]
+coordinate reading and writing of LVM metadata
+.br
+\[bu]
+validate caching of LVM metadata
+.br
+\[bu]
+prevent concurrent activation of logical volumes
+.br
+
+lvmlockd uses an external lock manager to perform basic locking.
+.br
+Lock manager (lock type) options are:
+
+\[bu]
+sanlock: places locks on disk within LVM storage.
+.br
+\[bu]
+dlm: uses network communication and a cluster manager.
+.br
+
+.SH OPTIONS
+
+lvmlockd [options]
+
+For default settings, see lvmlockd \-h.
+
+.B \-\-help | \-h
+ Show this help information.
+
+.B \-\-version | \-V
+ Show version of lvmlockd.
+
+.B \-\-test | \-T
+ Test mode, do not call lock manager.
+
+.B \-\-foreground | \-f
+ Don't fork.
+
+.B \-\-daemon\-debug | \-D
+ Don't fork and print debugging to stdout.
+
+.B \-\-pid\-file | \-p
+.I path
+ Set path to the pid file.
+
+.B \-\-socket\-path | \-s
+.I path
+ Set path to the socket to listen on.
+
+.B \-\-syslog\-priority | \-S err|warning|debug
+ Write log messages from this level up to syslog.
+
+.B \-\-gl\-type | \-g sanlock|dlm
+ Set global lock type to be sanlock or dlm.
+
+.B \-\-host\-id | \-i
+.I num
+ Set the local sanlock host id.
+
+.B \-\-host\-id\-file | \-F
+.I path
+ A file containing the local sanlock host_id.
+
+.B \-\-sanlock\-timeout | \-o
+.I seconds
+ Override the default sanlock I/O timeout.
+
+.B \-\-adopt | \-A 0|1
+ Adopt locks from a previous instance of lvmlockd.
+
+
+.SH USAGE
+
+.SS Initial set up
+
+Using LVM with lvmlockd for the first time includes some one\-time set up
+steps:
+
+.SS 1. choose a lock manager
+
+.I dlm
+.br
+If dlm (or corosync) are already being used by other cluster
+software, then select dlm. dlm uses corosync which requires additional
+configuration beyond the scope of this document. See corosync and dlm
+documentation for instructions on configuration, setup and usage.
+
+.I sanlock
+.br
+Choose sanlock if dlm/corosync are not otherwise required.
+sanlock does not depend on any clustering software or configuration.
+
+.SS 2. configure hosts to use lvmlockd
+
+On all hosts running lvmlockd, configure lvm.conf:
+.nf
+locking_type = 1
+use_lvmlockd = 1
+use_lvmetad = 1
+.fi
+
+.I sanlock
+.br
+Assign each host a unique host_id in the range 1\-2000 by setting
+.br
+/etc/lvm/lvmlocal.conf local/host_id
+
+.SS 3. start lvmlockd
+
+Use a service/init file if available, or just run "lvmlockd".
+
+.SS 4. start lock manager
+
+.I sanlock
+.br
+systemctl start wdmd sanlock
+
+.I dlm
+.br
+Follow external clustering documentation when applicable, otherwise:
+.br
+systemctl start corosync dlm
+
+.SS 5. create VG on shared devices
+
+vgcreate \-\-shared <vgname> <devices>
+
+The shared option sets the VG lock type to sanlock or dlm depending on
+which lock manager is running. LVM commands will perform locking for the
+VG using lvmlockd.
+
+.SS 6. start VG on all hosts
+
+vgchange \-\-lock\-start
+
+lvmlockd requires shared VGs to be started before they are used. This is
+a lock manager operation to start (join) the VG lockspace, and it may take
+some time. Until the start completes, locks for the VG are not available.
+LVM commands are allowed to read the VG while start is in progress. (An
+init/unit file can also be used to start VGs.)
+
+.SS 7. create and activate LVs
+
+Standard lvcreate and lvchange commands are used to create and activate
+LVs in a shared VG.
+
+An LV activated exclusively on one host cannot be activated on another.
+When multiple hosts need to use the same LV concurrently, the LV can be
+activated with a shared lock (see lvchange options \-aey vs \-asy.)
+(Shared locks are disallowed for certain LV types that cannot be used from
+multiple hosts.)
+
+
+.SS Normal start up and shut down
+
+After initial set up, start up and shut down include the following general
+steps. They can be performed manually or using the system service
+manager.
+
+\[bu]
+start lvmetad
+.br
+\[bu]
+start lvmlockd
+.br
+\[bu]
+start lock manager
+.br
+\[bu]
+vgchange \-\-lock\-start
+.br
+\[bu]
+activate LVs in shared VGs
+.br
+
+The shut down sequence is the reverse:
+
+\[bu]
+deactivate LVs in shared VGs
+.br
+\[bu]
+vgchange \-\-lock\-stop
+.br
+\[bu]
+stop lock manager
+.br
+\[bu]
+stop lvmlockd
+.br
+\[bu]
+stop lvmetad
+.br
+
+.P
+
+.SH TOPICS
+
+.SS VG access control
+
+The following terms are used to describe different forms of VG access
+control.
+
+.I "lockd VG"
+
+A "lockd VG" is a shared VG that has a "lock type" of dlm or sanlock.
+Using it requires lvmlockd. These VGs exist on shared storage that is
+visible to multiple hosts. LVM commands use lvmlockd to perform locking
+for these VGs when they are used.
+
+If the lock manager for the lock type is not available (e.g. not started
+or failed), lvmlockd is unable to acquire locks for LVM commands. LVM
+commands that only read the VG will generally be allowed to continue
+without locks in this case (with a warning). Commands to modify or
+activate the VG will fail without the necessary locks. Maintaining a
+properly running lock manager requires knowledge covered in separate
+documentation.
+
+.I "local VG"
+
+A "local VG" is meant to be used by a single host. It has no lock type or
+lock type "none". LVM commands and lvmlockd do not perform locking for
+these VGs. A local VG typically exists on local (non\-shared) devices and
+cannot be used concurrently from different hosts.
+
+If a local VG does exist on shared devices, it should be owned by a single
+host by having its system ID set, see
+.BR lvmsystemid (7).
+Only the host with a matching system ID can use the local VG. A VG
+with no lock type and no system ID should be excluded from all but one
+host using lvm.conf filters. Without any of these protections, a local VG
+on shared devices can be easily damaged or destroyed.
+
+.I "clvm VG"
+
+A "clvm VG" is a VG on shared storage (like a lockd VG) that requires
+clvmd for clustering. See below for converting a clvm VG to a lockd VG.
+
+
+.SS lockd VGs from hosts not using lvmlockd
+
+Only hosts that use lockd VGs should be configured to run lvmlockd.
+However, devices with lockd VGs may be visible from hosts not using
+lvmlockd. From a host not using lvmlockd, visible lockd VGs are ignored
+in the same way as foreign VGs, i.e. those with a foreign system ID, see
+.BR lvmsystemid (7).
+
+The \-\-shared option for reporting and display commands causes lockd VGs
+to be displayed on a host not using lvmlockd, like the \-\-foreign option
+does for foreign VGs.
+
+
+.SS vgcreate comparison
+
+The type of VG access control is specified in the vgcreate command.
+See
+.BR vgcreate (8)
+for all vgcreate options.
+
+.B vgcreate <vgname> <devices>
+
+.IP \[bu] 2
+Creates a local VG with the local system ID when neither lvmlockd nor clvm are configured.
+.IP \[bu] 2
+Creates a local VG with the local system ID when lvmlockd is configured.
+.IP \[bu] 2
+Creates a clvm VG when clvm is configured.
+
+.P
+
+.B vgcreate \-\-shared <vgname> <devices>
+.IP \[bu] 2
+Requires lvmlockd to be configured and running.
+.IP \[bu] 2
+Creates a lockd VG with lock type sanlock|dlm depending on which lock
+manager is running.
+.IP \[bu] 2
+LVM commands request locks from lvmlockd to use the VG.
+.IP \[bu] 2
+lvmlockd obtains locks from the selected lock manager.
+
+.P
+
+.B vgcreate \-c|\-\-clustered y <vgname> <devices>
+.IP \[bu] 2
+Requires clvm to be configured and running.
+.IP \[bu] 2
+Creates a clvm VG with the "clustered" flag.
+.IP \[bu] 2
+LVM commands request locks from clvmd to use the VG.
+
+.P
+
+.SS using lockd VGs
+
+There are some special considerations to be aware of when using lockd VGs.
+
+When use_lvmlockd is first enabled, and before the first lockd VG is
+created, no global lock will exist. In this initial state, LVM commands
+try and fail to acquire the global lock, producing a warning, and some
+commands are disallowed. Once the first lockd VG is created, the global
+lock will be available, and LVM will be fully operational.
+
+When a new lockd VG is created, its lockspace is automatically started on
+the host that creates it. Other hosts need to run 'vgchange
+\-\-lock\-start' to start the new VG before they can use it.
+
+From the 'vgs' command, lockd VGs are indicated by "s" (for shared) in the
+sixth attr field. The specific lock type and lock args for a lockd VG can
+be displayed with 'vgs \-o+locktype,lockargs'.
+
+lockd VGs need to be "started" and "stopped", unlike other types of VGs.
+See the following section for a full description of starting and stopping.
+
+
+.SS starting and stopping VGs
+
+Starting a lockd VG (vgchange \-\-lock\-start) causes the lock manager to
+start (join) the lockspace for the VG on the host where it is run. This
+makes locks for the VG available to LVM commands on the host. Before a VG
+is started, only LVM commands that read/display the VG without locks are
+allowed.
+
+Stopping a lockd VG (vgchange \-\-lock\-stop) causes the lock manager to
+stop (leave) the lockspace for the VG on the host where it is run. This
+makes locks for the VG inaccessible to the host. A VG cannot be stopped
+while it has active LVs.
+
+When using the lock type sanlock, starting a VG can take a long time
+(potentially minutes if the host was previously shut down without cleanly
+stopping the VG.)
+
+A lockd VG can be started after all the following are true:
+.br
+\[bu]
+lvmlockd is running
+.br
+\[bu]
+the lock manager is running
+.br
+\[bu]
+the VG is visible to the system
+.br
+
+A lockd VG can be stopped if all LVs are deactivated.
+
+All lockd VGs can be started/stopped using:
+.br
+vgchange \-\-lock-start
+.br
+vgchange \-\-lock-stop
+
+
+Individual VGs can be started/stopped using:
+.br
+vgchange \-\-lock\-start <vgname> ...
+.br
+vgchange \-\-lock\-stop <vgname> ...
+
+To make vgchange not wait for start to complete:
+.br
+vgchange \-\-lock\-start \-\-lock\-opt nowait
+.br
+vgchange \-\-lock\-start \-\-lock\-opt nowait <vgname>
+
+To stop all lockspaces and wait for all to complete:
+.br
+lvmlockctl \-\-stop\-lockspaces \-\-wait
+
+To start only selected lockd VGs, use the lvm.conf
+activation/lock_start_list. When defined, only VG names in this list are
+started by vgchange. If the list is not defined (the default), all
+visible lockd VGs are started. To start only "vg1", use the following
+lvm.conf configuration:
+
+.nf
+activation {
+ lock_start_list = [ "vg1" ]
+ ...
+}
+.fi
+
+
+.SS automatic starting and automatic activation
+
+Scripts or programs on a host that automatically start VGs will use the
+"auto" option to indicate that the command is being run automatically by
+the system:
+
+vgchange \-\-lock\-start \-\-lock\-opt auto [<vgname> ...]
+
+Without any additional configuration, including the "auto" option has no
+effect; all VGs are started unless restricted by lock_start_list.
+
+However, when the lvm.conf activation/auto_lock_start_list is defined, the
+auto start command performs an additional filtering phase to all VGs being
+started, testing each VG name against the auto_lock_start_list. The
+auto_lock_start_list defines lockd VGs that will be started by the auto
+start command. Visible lockd VGs not included in the list are ignored by
+the auto start command. If the list is undefined, all VG names pass this
+filter. (The lock_start_list is also still used to filter all VGs.)
+
+The auto_lock_start_list allows a user to select certain lockd VGs that
+should be automatically started by the system (or indirectly, those that
+should not).
+
+To use auto activation of lockd LVs (see auto_activation_volume_list),
+auto starting of the corresponding lockd VGs is necessary.
+
+
+.SS internal command locking
+
+To optimize the use of LVM with lvmlockd, be aware of the three kinds of
+locks and when they are used:
+
+.I GL lock
+
+The global lock (GL lock) is associated with global information, which is
+information not isolated to a single VG. This includes:
+
+\[bu]
+The global VG namespace.
+.br
+\[bu]
+The set of orphan PVs and unused devices.
+.br
+\[bu]
+The properties of orphan PVs, e.g. PV size.
+.br
+
+The global lock is used in shared mode by commands that read this
+information, or in exclusive mode by commands that change it.
+
+The command 'vgs' acquires the global lock in shared mode because it
+reports the list of all VG names.
+
+The vgcreate command acquires the global lock in exclusive mode because it
+creates a new VG name, and it takes a PV from the list of unused PVs.
+
+When an LVM command is given a tag argument, or uses select, it must read
+all VGs to match the tag or selection, which causes the global lock to be
+acquired.
+
+.I VG lock
+
+A VG lock is associated with each VG. The VG lock is acquired in shared
+mode to read the VG and in exclusive mode to change the VG (modify the VG
+metadata). This lock serializes modifications to a VG with all other LVM
+commands accessing the VG from all hosts.
+
+The command 'vgs' will not only acquire the GL lock to read the list of
+all VG names, but will acquire the VG lock for each VG prior to reading
+it.
+
+The command 'vgs <vgname>' does not acquire the GL lock (it does not need
+the list of all VG names), but will acquire the VG lock on each VG name
+argument.
+
+.I LV lock
+
+An LV lock is acquired before the LV is activated, and is released after
+the LV is deactivated. If the LV lock cannot be acquired, the LV is not
+activated. LV locks are persistent and remain in place after the
+activation command is done. GL and VG locks are transient, and are held
+only while an LVM command is running.
+
+.I lock retries
+
+If a request for a GL or VG lock fails due to a lock conflict with another
+host, lvmlockd automatically retries for a short time before returning a
+failure to the LVM command. If those retries are insufficient, the LVM
+command will retry the entire lock request a number of times specified by
+global/lvmlockd_lock_retries before failing. If a request for an LV lock
+fails due to a lock conflict, the command fails immediately.
+
+
+.SS sanlock global lock
+
+There are some special cases related to the global lock in sanlock VGs.
+
+The global lock exists in one of the sanlock VGs. The first sanlock VG
+created will contain the global lock. Subsequent sanlock VGs will each
+contain disabled global locks that can be enabled later if necessary.
+
+The VG containing the global lock must be visible to all hosts using
+sanlock VGs. This can be a reason to create a small sanlock VG, visible
+to all hosts, and dedicated to just holding the global lock. While not
+required, this strategy can help to avoid difficulty in the future if VGs
+are moved or removed.
+
+The vgcreate command typically acquires the global lock, but in the case
+of the first sanlock VG, there will be no global lock to acquire until the
+first vgcreate is complete. So, creating the first sanlock VG is a
+special case that skips the global lock.
+
+vgcreate for a sanlock VG determines it is the first one to exist if no
+other sanlock VGs are visible. It is possible that other sanlock VGs do
+exist but are not visible on the host running vgcreate. In this case,
+vgcreate would create a new sanlock VG with the global lock enabled. When
+the other VG containing a global lock appears, lvmlockd will see more than
+one VG with a global lock enabled, and LVM commands will report that there
+are duplicate global locks.
+
+If the situation arises where more than one sanlock VG contains a global
+lock, the global lock should be manually disabled in all but one of them
+with the command:
+
+lvmlockctl \-\-gl\-disable <vgname>
+
+(The one VG with the global lock enabled must be visible to all hosts.)
+
+An opposite problem can occur if the VG holding the global lock is
+removed. In this case, no global lock will exist following the vgremove,
+and subsequent LVM commands will fail to acquire it. In this case, the
+global lock needs to be manually enabled in one of the remaining sanlock
+VGs with the command:
+
+lvmlockctl \-\-gl\-enable <vgname>
+
+A small sanlock VG dedicated to holding the global lock can avoid the case
+where the GL lock must be manually enabled after a vgremove.
+
+
+.SS sanlock VG usage
+
+There are some special cases related to using a sanlock VG.
+
+vgremove of a sanlock VG will fail if other hosts have the VG started.
+Run vgchange \-\-lock-stop <vgname> on all other hosts before vgremove.
+
+(It may take several seconds before vgremove recognizes that all hosts
+have stopped.)
+
+A sanlock VG contains a hidden LV called "lvmlock" that holds the sanlock
+locks. vgreduce cannot yet remove the PV holding the lvmlockd LV.
+
+To place the lvmlock LV on a specific device, create the VG with only that
+device, then use vgextend to add other devices.
+
+
+.SS shared LVs
+
+When an LV is used concurrently from multiple hosts (e.g. by a
+multi\-host/cluster application or file system), the LV can be activated
+on multiple hosts concurrently using a shared lock.
+
+To activate the LV with a shared lock: lvchange \-asy vg/lv.
+
+With lvmlockd, an unspecified activation mode is always exclusive, i.e.
+\-ay defaults to \-aey.
+
+If the LV type does not allow the LV to be used concurrently from multiple
+hosts, then a shared activation lock is not allowed and the lvchange
+command will report an error. LV types that cannot be used concurrently
+from multiple hosts include thin, cache, raid, mirror, and snapshot.
+
+lvextend on LV with shared locks is not yet allowed. The LV must be
+deactivated, or activated exclusively to run lvextend.
+
+
+.SS recover from lost PV holding sanlock locks
+
+A number of special manual steps must be performed to restore sanlock
+locks if the PV holding the locks is lost. Contact the LVM group for
+help with this process.
+
+
+.SS locking system failures
+
+.B lvmlockd failure
+
+If lvmlockd fails or is killed while holding locks, the locks are orphaned
+in the lock manager. lvmlockd can be restarted with an option to adopt
+locks in the lock manager that had been held by the previous instance.
+
+.B dlm/corosync failure
+
+If dlm or corosync fail, the clustering system will fence the host using a
+method configured within the dlm/corosync clustering environment.
+
+LVM commands on other hosts will be blocked from acquiring any locks until
+the dlm/corosync recovery process is complete.
+
+.B sanlock lease storage failure
+
+If a host loses access to the device holding a VG's locks, sanlock cannot
+renew the VG's lockspace lease for those locks. After some time, the
+lease will expire, and locks held by the host can be acquired by other
+hosts.
+
+If no LVs are active in the VG, the lockspace with an expiring lease will
+be shut down, and errors will be reported when trying to use the VG. Use
+the lvmlockctl \-\-drop command to clear the stale lockspace from
+lvmlockd.
+
+If the VG has active LVs, the LVs must be quickly deactivated before the
+lockspace lease expires. After all LVs are deactivated, run lvmlockctl
+\-\-drop <vgname> to clear the expiring lockspace from lvmlockd. If all
+LVs in the VG are not deactivated within about 40 seconds, sanlock will
+reset the host using the local watchdog. The host reset is ultimately a
+severe form of "deactivating" LVs before they can be activated on other
+hosts. The reset is considered a better alternative than having LVs used
+by multiple hosts at once, which could easily damage or destroy their
+content. A future enhancement may automatically attempt to deactivate LVs
+before the lockspace lease expires.
+
+.B sanlock daemon failure
+
+If the sanlock daemon fails or exits while a lockspace is started, the
+local watchdog will reset the host.
+
+
+.SS changing dlm cluster name
+
+When a dlm VG is created, the cluster name is saved in the VG metadata for
+the new VG. To use the VG, a host must be in the named cluster. If the
+cluster name is changed, or the VG is moved to a different cluster, the
+cluster name for the dlm VG must be changed. To do this:
+
+1. Ensure the VG is not being used by any hosts.
+
+2. The new cluster must be active on the node making the change.
+.br
+ The current dlm cluster name can be seen by:
+.br
+ cat /sys/kernel/config/dlm/cluster/cluster_name
+
+3. Change the VG lock type to none:
+.br
+ vgchange \-\-lock\-type none \-\-force <vgname>
+
+4. Change the VG lock type back to dlm which sets the new cluster name:
+.br
+ vgchange \-\-lock\-type dlm <vgname>
+
+
+.SS changing a local VG to a lockd VG
+
+All LVs must be inactive to change the lock type.
+
+lvmlockd must be configured and running as described in USAGE.
+
+Change a local VG to a lockd VG with the command:
+.br
+vgchange \-\-lock\-type sanlock|dlm <vgname>
+
+Start the VG on any hosts that need to use it:
+.br
+vgchange \-\-lock\-start <vgname>
+
+
+.SS changing a clvm VG to a lockd VG
+
+All LVs must be inactive to change the lock type.
+
+First change the clvm VG to a local VG. Within a running clvm cluster,
+change a clvm VG to a local VG with the command:
+
+vgchange \-cn <vgname>
+
+If the clvm cluster is no longer running on any nodes, then extra options
+can be used forcibly make the VG local. Caution: this is only safe if all
+nodes have stopped using the VG:
+
+vgchange \-\-config 'global/locking_type=0 global/use_lvmlockd=0'
+.RS
+\-cn <vgname>
+.RE
+
+After the VG is local, follow the steps described in "changing a local VG
+to a lockd VG".
+
+
+.SS limitations of lockd VGs
+
+lvmlockd currently requires using lvmetad and lvmpolld.
+
+Things that do not yet work in lockd VGs:
+.br
+\[bu]
+creating a new thin pool and a new thin LV in a single command
+.br
+\[bu]
+using lvcreate to create cache pools or cache LVs (use lvconvert)
+.br
+\[bu]
+using external origins for thin LVs
+.br
+\[bu]
+splitting mirrors and snapshots from LVs
+.br
+\[bu]
+vgsplit
+.br
+\[bu]
+vgmerge
+.br
+\[bu]
+resizing an LV that is active in the shared mode on multiple hosts
+
+
+.SS lvmlockd changes from clvmd
+
+(See above for converting an existing clvm VG to a lockd VG.)
+
+While lvmlockd and clvmd are entirely different systems, LVM command usage
+remains similar. Differences are more notable when using lvmlockd's
+sanlock option.
+
+Visible usage differences between lockd VGs with lvmlockd and clvm VGs
+with clvmd:
+
+.IP \[bu] 2
+lvm.conf must be configured to use either lvmlockd (use_lvmlockd=1) or
+clvmd (locking_type=3), but not both.
+
+.IP \[bu] 2
+vgcreate \-\-shared creates a lockd VG, and vgcreate \-\-clustered y
+creates a clvm VG.
+
+.IP \[bu] 2
+lvmlockd adds the option of using sanlock for locking, avoiding the
+need for network clustering.
+
+.IP \[bu] 2
+lvmlockd defaults to the exclusive activation mode whenever the activation
+mode is unspecified, i.e. \-ay means \-aey, not \-asy.
+
+.IP \[bu] 2
+lvmlockd commands always apply to the local host, and never have an effect
+on a remote host. (The activation option 'l' is not used.)
+
+.IP \[bu] 2
+lvmlockd works with thin and cache pools and LVs.
+
+.IP \[bu] 2
+lvmlockd saves the cluster name for a lockd VG using dlm. Only hosts in
+the matching cluster can use the VG.
+
+.IP \[bu] 2
+lvmlockd requires starting/stopping lockd VGs with vgchange \-\-lock-start
+and \-\-lock-stop.
+
+.IP \[bu] 2
+vgremove of a sanlock VG may fail indicating that all hosts have not
+stopped the VG lockspace. Stop the VG on all hosts using vgchange
+\-\-lock-stop.
+
+.IP \[bu] 2
+vgreduce of a PV in a sanlock VG may fail if it holds the internal
+"lvmlock" LV that holds the sanlock locks.
+
+.IP \[bu] 2
+lvmlockd uses lock retries instead of lock queueing, so high lock
+contention may require increasing global/lvmlockd_lock_retries to
+avoid transient lock failures.
+
+.IP \[bu] 2
+lvmlockd includes VG reporting options lock_type and lock_args, and LV
+reporting option lock_args to view the corresponding metadata fields.
+
+.IP \[bu] 2
+In the 'vgs' command's sixth VG attr field, "s" for "shared" is displayed
+for lockd VGs.
+
+.IP \[bu] 2
+If lvmlockd fails or is killed while in use, locks it held remain but are
+orphaned in the lock manager. lvmlockd can be restarted with an option to
+adopt the orphan locks from the previous instance of lvmlockd.
+
+.P
diff --git a/man/lvmpolld.8.in b/man/lvmpolld.8.in
new file mode 100644
index 000000000..5906f4299
--- /dev/null
+++ b/man/lvmpolld.8.in
@@ -0,0 +1,90 @@
+.TH LVMPOLLD 8 "LVM TOOLS #VERSION#" "Red Hat Inc" \" -*- nroff -*-
+.SH NAME
+lvmpolld \(em LVM poll daemon
+.SH SYNOPSIS
+.B lvmpolld
+.RB [ \-l | \-\-log
+.RI { all | wire | debug }]
+.RB [ \-p | \-\-pidfile
+.IR pidfile_path ]
+.RB [ \-s | \-\-socket
+.IR socket_path ]
+.RB [ \-B | \-\-binary
+.IR lvm_binary_path ]
+.RB [ \-t | \-\-timeout
+.IR timeout_value ]
+.RB [ \-f | \-\-foreground ]
+.RB [ \-h | \-\-help ]
+.RB [ \-V | \-\-version ]
+
+.B lvmpolld
+.RB [ \-\-dump ]
+.SH DESCRIPTION
+lvmpolld is polling daemon for LVM. The daemon receives requests for polling
+of already initialised operations originating in LVM2 command line tool.
+The requests for polling originate in the \fBlvconvert\fP, \fBpvmove\fP,
+\fBlvchange\fP or \fBvgchange\fP LVM2 commands.
+
+The purpose of lvmpolld is to reduce the number of spawned background processes
+per otherwise unique polling operation. There should be only one. It also
+eliminates the possibility of unsolicited termination of background process by
+external factors.
+
+lvmpolld is used by LVM only if it is enabled in \fBlvm.conf\fP(5) by
+specifying the \fBglobal/use_lvmpolld\fP setting. If this is not defined in the
+LVM configuration explicitly then default setting is used instead (see the
+output of \fBlvmconfig \-\-type default global/use_lvmpolld\fP command).
+.SH OPTIONS
+
+To run the daemon in a test environment both the pidfile_path and the
+socket_path should be changed from the defaults.
+.TP
+.BR \-f ", " \-\-foreground
+Don't fork, but run in the foreground.
+.TP
+.BR \-h ", " \-\-help
+Show help information.
+.TP
+.IR \fB\-l\fP ", " \fB\-\-log\fP " {" all | wire | debug }
+Select the type of log messages to generate.
+Messages are logged by syslog.
+Additionally, when \-f is given they are also sent to standard error.
+There are two classes of messages: wire and debug. Selecting 'all' supplies both
+and is equivalent to a comma-separated list \-l wire,debug.
+.TP
+.BR \-p ", " \-\-pidfile \fIpidfile_path
+Path to the pidfile. This overrides both the built-in default
+(#DEFAULT_PID_DIR#/lvmpolld.pid) and the environment variable
+\fBLVM_LVMPOLLD_PIDFILE\fP. This file is used to prevent more
+than one instance of the daemon running simultaneously.
+.TP
+.BR \-s ", " \-\-socket " " \fIsocket_path
+Path to the socket file. This overrides both the built-in default
+(#DEFAULT_RUN_DIR#/lvmpolld.socket) and the environment variable
+\fBLVM_LVMPOLLD_SOCKET\fP.
+.TP
+.BR \-t ", " \-\-timeout " " \fItimeout_value
+The daemon may shutdown after being idle for the given time (in seconds). When the
+option is omitted or the value given is zero the daemon never shutdowns on idle.
+.TP
+.BR \-B ", " \-\-binary " " \fIlvm_binary_path
+Optional path to alternative LVM binary (default: #LVM_PATH#). Use for
+testing purposes only.
+.TP
+.BR \-V ", " \-\-version
+Display the version of lvmpolld daemon.
+.TP
+.B \-\-dump
+Contact the running lvmpolld daemon to obtain the complete state and print it
+out in a raw format.
+.SH ENVIRONMENT VARIABLES
+.TP
+.B LVM_LVMPOLLD_PIDFILE
+Path for the pid file.
+.TP
+.B LVM_LVMPOLLD_SOCKET
+Path for the socket file.
+
+.SH SEE ALSO
+.BR lvm (8),
+.BR lvm.conf (5)
diff --git a/man/lvmsystemid.7.in b/man/lvmsystemid.7.in
new file mode 100644
index 000000000..37a01aff2
--- /dev/null
+++ b/man/lvmsystemid.7.in
@@ -0,0 +1,352 @@
+.TH "LVMSYSTEMID" "7" "LVM TOOLS #VERSION#" "Red Hat, Inc" "\""
+
+.SH NAME
+lvmsystemid \(em LVM system ID
+
+.SH DESCRIPTION
+
+Local VGs may exist on shared storage where they are visible to multiple
+hosts. These VGs are intended to be used by only a single machine, even
+though they are visible to many. A system_id identifying a single host
+can be assigned to a VG to indicate the VGs owner. The VG owner can use
+the VG as usual, and all other hosts will ignore it. This protects the VG
+from accidental use by other hosts.
+
+The system_id is not a dynamic property, and can only be changed in very
+limited circumstances (see vgexport and vgimport). Even limited changes
+to the VG system_id are not perfectly reflected across hosts. A more
+coherent view of shared storage requires using an inter-host locking
+system to coordinate access and update caches.
+
+The system_id is a string uniquely identifying a host. It can be manually
+set to a custom value or it can be assigned automatically by lvm using a
+unique identifier already available on the host, e.g. machine-id or uname.
+
+In vgcreate, the local system_id is saved in the new VG metadata. The
+local host owns the new VG, and other hosts cannot use it.
+
+A VG without a system_id can be used by any host, and a VG with a
+system_id can only be used by a host with a matching system_id. A
+.B foreign VG
+is a VG with a system_id as viewed by a host with a system_id
+that does not match the VGs system_id. (Or from a host without a
+system_id.)
+
+Valid system_id characters are the same as valid VG name characters. If a
+system_id contains invalid characters, those characters are omitted and
+remaining characters are used. If a system_id is longer than the maximum
+name length, the characters up to the maximum length are used. The
+maximum length of a system_id is 128 characters.
+
+.SS Limitations and warnings
+
+To benefit fully from system_id, all hosts must have system_id set, and
+VGs must have system_id set. A VG on shared storage can be damaged or
+destroyed in some cases which the user must be careful to avoid.
+
+.IP \[bu] 2
+A VG without a system_id can be used without restriction from any host,
+even from hosts that have a system_id. Many VGs will not have a system_id
+and are unprotected. Verify that a VG has a system_id by running the
+command 'vgs -o+systemid'
+
+A VG will not have a system_id if it was created before this feature was
+added to lvm, or if it was created by a host that did not have a system_id
+defined. A system_id can be assigned to these VGs by using vgchange
+--systemid (see below).
+
+.IP \[bu] 2
+Two hosts should not be assigned the same system_id. Doing so defeats
+the purpose of the system_id which is to distinguish different hosts.
+
+.IP \[bu] 2
+Orphan PVs (or unused devices) on shared storage are completely
+unprotected by the system_id feature. Commands that use these PVs, such
+as vgcreate or vgextend, are not prevented from performing conflicting
+operations and corrupting the PVs. See the
+.B orphans
+section for more information.
+
+.IP \[bu] 2
+A host using an old version of lvm without the system_id feature will not
+recognize a new system_id in VGs from other hosts. Even though the old
+version of lvm is not blocked from reading a VG with a system_id, it is
+blocked from writing to the VG (or its LVs). The new system_id changes
+the write mode of a VG, making it appear read-only to previous lvm
+versions.
+
+This also means that if a host downgrades its version of lvm, it would
+lose access to any VGs it had created with a system_id. To avoid this,
+the system_id should be removed from VGs before downgrading to an lvm
+version without the system_id feature.
+
+.P
+
+.SS Types of VG access
+
+A local VG is mean to be used by a single host.
+.br
+A shared or clustered VG is meant to be used by multiple hosts.
+.br
+These can be further distinguished as:
+
+.B Unrestricted:
+A local VG that has no system_id. This VG type is unprotected and
+accessible to any host.
+
+.B Owned:
+A local VG that has a system_id set, as viewed from the one host with a
+matching system_id (the owner). This VG type is by definition acessible.
+
+.B Foreign:
+A local VG that has a system_id set, as viewed from any host with an
+unmatching system_id (or no system_id). It is owned by another host.
+This VG type is by definition not accessible.
+
+.B Exported:
+A local VG that has been exported with vgexport and has no system_id.
+This VG type can only be accessed by vgimport which will change it to
+owned.
+
+.B Shared:
+A shared or "lockd" VG has lock_type set and no system_id.
+A shared VG is meant to be used on shared storage from multiple hosts,
+and is only accessible to hosts using lvmlockd.
+
+.B Clustered:
+A clustered or "clvm" VG has the clustered flag set and no system_id.
+A clustered VG is meant to be used on shared storage from multiple hosts,
+and is only accessible to hosts using clvmd.
+
+.SS system_id_source
+
+A host's own system_id can be defined in a number of ways. lvm.conf
+global/system_id_source defines the method lvm will use to find the local
+system_id:
+
+.TP
+.B none
+.br
+
+lvm will not use a system_id. lvm is allowed to access VGs without a
+system_id, and will create new VGs without a system_id. An undefined
+system_id_source is equivalent to none.
+
+.I lvm.conf
+.nf
+global {
+ system_id_source = "none"
+}
+.fi
+
+.TP
+.B machineid
+.br
+
+The content of /etc/machine-id is used as the system_id if available.
+See
+.BR machine-id (5)
+and
+.BR systemd-machine-id-setup (1)
+to check if machine-id is available on the host.
+
+.I lvm.conf
+.nf
+global {
+ system_id_source = "machineid"
+}
+.fi
+
+.TP
+.B uname
+.br
+
+The string utsname.nodename from
+.BR uname (2)
+is used as the system_id. A uname beginning with "localhost"
+is ignored and equivalent to none.
+
+.I lvm.conf
+.nf
+global {
+ system_id_source = "uname"
+}
+.fi
+
+.TP
+.B lvmlocal
+.br
+
+The system_id is defined in lvmlocal.conf local/system_id.
+
+.I lvm.conf
+.nf
+global {
+ system_id_source = "lvmlocal"
+}
+.fi
+
+.I lvmlocal.conf
+.nf
+local {
+ system_id = "example_name"
+}
+.fi
+
+.TP
+.B file
+.br
+
+The system_id is defined in a file specified by lvm.conf
+global/system_id_file.
+
+.I lvm.conf
+.nf
+global {
+ system_id_source = "file"
+ system_id_file = "/path/to/file"
+}
+.fi
+
+.LP
+
+Changing system_id_source will often cause the system_id to change, which
+may prevent the host from using VGs that it previously used (see
+extra_system_ids below to handle this.)
+
+If a system_id_source other than none fails to resolve a system_id, the
+host will be allowed to access VGs with no system_id, but will not be
+allowed to access VGs with a defined system_id.
+
+.SS extra_system_ids
+
+In some cases, it may be useful for a host to access VGs with different
+system_id's, e.g. if a host's system_id changes, and it wants to use VGs
+that it created with its old system_id. To allow a host to access VGs
+with other system_id's, those other system_id's can be listed in
+lvmlocal.conf local/extra_system_ids.
+
+.I lvmlocal.conf
+.nf
+local {
+ extra_system_ids = [ "my_other_name" ]
+}
+.fi
+
+.SS vgcreate
+
+In vgcreate, the host running the command assigns its own system_id to the
+new VG. To override this and set another system_id:
+
+.B vgcreate --systemid
+.I SystemID VG Devices
+
+Overriding the system_id makes it possible for a host to create a VG that
+it may not be able to use. Another host with a system_id matching the one
+specified may not recognize the new VG without manually rescanning
+devices.
+
+If the --systemid argument is an empty string (""), the VG is created with
+no system_id, making it accessible to other hosts (see warnings above.)
+
+.SS report/display
+
+The system_id of a VG is displayed with the "systemid" reporting option.
+
+Report/display commands ignore foreign VGs by default. To report foreign
+VGs, the --foreign option can be used. This causes the VGs to be read
+from disk. Because lvmetad caching is not used, this option can cause
+poor performance.
+
+.B vgs --foreign -o+systemid
+
+When a host with no system_id sees foreign VGs, it warns about them as
+they are skipped. The host should be assigned a system_id, after which
+standard reporting commands will silently ignore foreign VGs.
+
+.SS vgexport/vgimport
+
+vgexport clears the system_id.
+
+Other hosts will continue to see a newly exported VG as foreign because of
+local caching (when lvmetad is used). Manually updating the local lvmetad
+cache with pvscan --cache will allow a host to recognize the newly
+exported VG.
+
+vgimport sets the VG system_id to the local system_id as determined by
+lvm.conf system_id_sources. vgimport automatically scans storage for
+newly exported VGs.
+
+After vgimport, the exporting host will continue to see the VG as
+exported, and not owned by the new host. Manually updating the local
+cache with pvscan --cache will allow a host to recognize the newly
+imported VG as foreign.
+
+.SS vgchange
+
+A host can change the system_id of its own VGs, but the command requires
+confirmation because the host may lose access to the VG being changed:
+
+.B vgchange --systemid
+.I SystemID VG
+
+The system_id can be removed from a VG by specifying an empty string ("")
+as the new system_id. This makes the VG accessible to other hosts (see
+warnings above.)
+
+A host cannot directly change the system_id of a foreign VG.
+
+To move a VG from one host to another, vgexport and vgimport should be
+used.
+
+To forcibly gain ownership of a foreign VG, a host can add the foreign
+system_id to its allow_system_id list, change the system_id of the foreign
+VG to its own, and remove the foreign system_id from its allow_system_id
+list.
+
+.SS shared VGs
+
+A shared/lockd VG has no system_id set, allowing multiple hosts to
+use it via lvmlockd. Changing a VG to a lockd type will clear the
+existing system_id.
+
+.SS clustered VGs
+
+A clustered/clvm VG has no system_id set, allowing multiple hosts to
+use it via clvmd. Changing a VG to clustered will clear the existing
+system_id. Changing a VG to not clustered will set the system_id to the
+host running the vgchange command.
+
+.SS creation_host
+
+In vgcreate, the VG metadata field creation_host is set by default to the
+host's uname. The creation_host cannot be changed, and is not used to
+control access. When system_id_source is "uname", the system_id and
+creation_host will be the same.
+
+.SS orphans
+
+Orphan PVs are unused devices; they are not currently used in any VG.
+Because of this, they are not protected by a system_id, and any host can
+use them. Coodination of changes to orphan PVs is beyond the scope of
+system_id. The same is true of any block device that is not a PV.
+
+The effects of this are especially evident when lvm uses lvmetad caching.
+For example, if multiple hosts see an orphan PV, and one host creates a VG
+using the orphan, the other hosts will continue to report the PV as an
+orphan. Nothing would automatically prevent the other hosts from using
+the newly allocated PV and corrupting it. If the other hosts run a
+command to rescan devices, and update lvmetad, they would then recognize
+the PV has been used by another host. A command that rescans devices
+could be pvscan --cache, or vgs --foreign.
+
+.SH SEE ALSO
+.BR vgcreate (8),
+.BR vgchange (8),
+.BR vgimport (8),
+.BR vgexport (8),
+.BR lvm.conf (5),
+.BR machine-id (5),
+.BR uname (2),
+.BR vgs (8)
+
diff --git a/man/lvmthin.7.in b/man/lvmthin.7.in
index 84a313853..eb791f089 100644
--- a/man/lvmthin.7.in
+++ b/man/lvmthin.7.in
@@ -254,6 +254,8 @@ or vgchange to activate thin snapshots with the "k" attribute.
.br
.B Metadata space exhaustion
.br
+.B Automatic extend settings
+.br
.B Zeroing
.br
.B Discard
@@ -398,10 +400,10 @@ explicitly.
# lvconvert \-\-type thin\-pool \-\-poolmetadata vg/pool0meta vg/pool0
# lvs \-a
- [lvol0_pmspare] vg ewi------- 10.00g
- pool0 vg twi---tz-- 10.00g
- [pool0_tdata] vg Twi------- 10.00g
- [pool0_tmeta] vg ewi------- 1.00g
+ [lvol0_pmspare] vg ewi-------
+ pool0 vg twi---tz--
+ [pool0_tdata] vg Twi-------
+ [pool0_tmeta] vg ewi-------
.fi
The "Metadata check and repair" section describes the use of
@@ -431,8 +433,13 @@ This is not recommended.
controls the command options used for the thin_check command.
If the thin_check command finds a problem with the metadata,
-the thin pool LV is not activated, and the thin pool metadata should
-be repaired.
+the thin pool LV is not activated, and the thin pool metadata needs
+to be repaired.
+
+Simple repair commands are not always successful. Advanced repair may
+require editing thin pool metadata and lvm metadata. Newer versions of
+the kernel and lvm tools may be more successful at repair. Report the
+details of damaged thin metadata to get the best advice on recovery.
Command to repair a thin pool:
.br
@@ -673,35 +680,67 @@ space in a thin pool.
\&
-An lvm daemon (dmeventd) will by default monitor the data usage of
-thin pool LVs and extend them when the usage reaches a certain level.
-The necessary free space must exist in the VG to extend the thin pool
-LVs.
+The lvm daemon dmeventd (lvm2-monitor) monitors the data usage of thin
+pool LVs and extends them when the usage reaches a certain level. The
+necessary free space must exist in the VG to extend thin pool LVs.
+Monitoring and extension of thin pool LVs are controlled independently.
+
+.I monitoring
-Command to enable or disable the monitoring and automatic extension
-of an existing thin pool LV:
+When a thin pool LV is activated, dmeventd will begin monitoring it by
+default.
+Command to start or stop dmeventd monitoring a thin pool LV:
+.br
.B lvchange \-\-monitor {y|n} VG/ThinPoolLV
+The current dmeventd monitoring status of a thin pool LV can be displayed
+with the command lvs -o+seg_monitor.
+
+.I autoextend
+
+dmeventd should be configured to extend thin pool LVs before all data
+space is used. Warnings are emitted through syslog when the use of a thin
+pool reaches 80%, 85%, 90% and 95%. (See the section "Data space
+exhaustion" for the effects of not extending a thin pool LV.) The point
+at which dmeventd extends thin pool LVs, and the amount are controlled
+with two configuration settings:
+
.BR lvm.conf (5)
.B thin_pool_autoextend_threshold
.br
+is a percentage full value that defines when the thin pool LV should be
+extended. Setting this to 100 disables automatic extention. The minimum
+value is 50.
+
.BR lvm.conf (5)
.B thin_pool_autoextend_percent
.br
-control the default autoextend behavior.
+defines how much extra data space should be added to the thin pool LV from
+the VG, in percent of its current size.
+
+.I disabling
+
+There are multiple ways that extension of thin pools could be prevented:
+
+.IP \[bu] 2
+If the dmeventd daemon is not running, no monitoring or automatic
+extension will occur.
-thin_pool_autoextend_threshold
-is a percentage value that defines when
-the thin pool LV should be extended. Setting this to 100 disables
-automatic extention. The minimum value is 50.
+.IP \[bu]
+Even when dmeventd is running, all monitoring can be disabled with the
+lvm.conf monitoring setting.
-thin_pool_autoextend_percent
-defines how much extra data space should
-be added to the thin pool, in percent of its current size.
+.IP \[bu]
+To activate or create a thin pool LV without interacting with dmeventd,
+the --ignoremonitoring option can be used. With this option, the command
+will not ask dmeventd to monitor the thin pool LV.
-Warnings are emitted through syslog when the use of a pool reaches 80%,
-85%, 90% and 95%.
+.IP \[bu]
+Setting thin_pool_autoextend_threshould to 100 disables automatic
+extension of thin pool LVs, even if they are being monitored by dmeventd.
+
+.P
.I Example
.br
@@ -715,8 +754,63 @@ For a 1G pool, using 700M will trigger a resize to 1.2G. When the usage exceeds
\&
-If thin pool data space is exhausted, writes to thin LVs will be queued
-until the the data space is extended. Reading is still possible.
+When properly managed, thin pool data space should be extended before it
+is all used (see the section "Automatically extend thin pool LV"). If
+thin pool data space is already exhausted, it can still be extended (see
+the section "Manually manage free data space of thin pool LV".)
+
+The behavior of a full thin pool is configurable with the --errorwhenfull
+y|n option to lvcreate or lvchange. The errorwhenfull setting applies
+only to writes; reading thin LVs can continue even when data space is
+exhausted.
+
+Command to change the handling of a full thin pool:
+.br
+.B lvchange --errorwhenfull {y|n} VG/ThinPoolLV
+
+.BR lvm.conf (5)
+.B error_when_full
+.br
+controls the default error when full behavior.
+
+The current setting of a thin pool LV can be displayed with the command:
+lvs -o+lv_when_full.
+
+The errorwhenfull setting does not effect the monitoring and autoextend
+settings, and the monitoring/autoextend settings do not effect the
+errorwhenfull setting. It is only when monitoring/autoextend are not
+effective that the thin pool becomes full and the errorwhenfull setting is
+applied.
+
+.I errorwhenfull n
+
+This is the default. Writes to thin LVs are accepted and queued, with the
+expectation that pool data space will be extended soon. Once data space
+is extended, the queued writes will be processed, and the thin pool will
+return to normal operation.
+
+While waiting to be extended, the thin pool will queue writes for up to 60
+seconds (the default). If data space has not been extended after this
+time, the queued writes will return an error to the caller, e.g. the file
+system. This can result in file system corruption for non-journaled file
+systems that may require fsck. When a thin pool returns errors for writes
+to a thin LV, any file system is subject to losing unsynced user data.
+
+The 60 second timeout can be changed or disabled with the dm\-thin\-pool
+kernel module option
+.B no_space_timeout.
+This option sets the number of seconds that thin pools will queue writes.
+If set to 0, writes will not time out. Disabling timeouts can result in
+the system running out of resources, memory exhaustion, hung tasks, and
+deadlocks. (The timeout applies to all thin pools on the system.)
+
+.I errorwhenfull y
+
+Writes to thin LVs immediately return an error, and no writes are queued.
+In the case of a file system, this can result in corruption that may
+require fsck (the specific consequences depend on the thin LV user.)
+
+.I data percent
When data space is exhausted, the lvs command displays 100 under Data% for
the thin pool LV:
@@ -727,25 +821,28 @@ the thin pool LV:
pool0 vg twi-a-tz-- 512.00m 100.00
.fi
-A thin pool can run out of data blocks for any of the following reasons:
+.I causes
-1. Automatic extension of the thin pool is disabled, and the thin pool is
-not manually extended. (Disabling automatic extension is not
-recommended.)
+A thin pool may run out of data space for any of the following reasons:
-2. The dmeventd daemon is not running and the thin pool is not manually
-extended. (Disabling dmeventd is not recommended.)
+.IP \[bu] 2
+Automatic extension of the thin pool is disabled, and the thin pool is not
+manually extended. (Disabling automatic extension is not recommended.)
-3. Automatic extension of the thin pool is too slow given the rate of
-writes to thin LVs in the pool. (This can be addressed by tuning the
-thin_pool_autoextend_threshold and thin_pool_autoextend_percent.)
+.IP \[bu]
+The dmeventd daemon is not running and the thin pool is not manually
+extended. (Disabling dmeventd is not recommended.)
-4. The VG does not have enough free blocks to extend the thin pool.
+.IP \[bu]
+Automatic extension of the thin pool is too slow given the rate of writes
+to thin LVs in the pool. (This can be addressed by tuning the
+thin_pool_autoextend_threshold and thin_pool_autoextend_percent.
+See "Automatic extend settings".)
-The response to data space exhaustion is to extend the thin pool. This is
-described in the section "Manually manage free data space of thin pool
-LV".
+.IP \[bu]
+The VG does not have enough free blocks to extend the thin pool.
+.P
.SS Metadata space exhaustion
@@ -783,6 +880,117 @@ repair.
4. Check and repair file system with fsck.
+.SS Automatic extend settings
+
+\&
+
+Thin pool LVs can be extended according to preset values. The presets
+determine if the LV should be extended based on how full it is, and if so
+by how much. When dmeventd monitors thin pool LVs, it uses lvextend with
+these presets. (See "Automatically extend thin pool LV".)
+
+Command to extend a thin pool data LV using presets:
+.br
+.B lvextend \-\-use\-policies VG/ThinPoolLV
+
+The command uses these settings:
+
+.BR lvm.conf (5)
+.B thin_pool_autoextend_threshold
+.br
+autoextend the LV when its usage exceeds this percent.
+
+.BR lvm.conf (5)
+.B thin_pool_autoextend_percent
+.br
+autoextend the LV by this much additional space.
+
+To see the default values of these settings, run:
+
+.B lvmconfig \-\-type default \-\-withcomment
+.RS
+.B activation/thin_pool_autoextend_threshold
+.RE
+
+.B lvmconfig \-\-type default \-\-withcomment
+.RS
+.B activation/thin_pool_autoextend_percent
+.RE
+
+To change these values globally, edit
+.BR lvm.conf (5).
+
+To change these values on a per-VG or per-LV basis, attach a "profile" to
+the VG or LV. A profile is a collection of config settings, saved in a
+local text file (using the lvm.conf format). lvm looks for profiles in
+the profile_dir directory, e.g. /etc/lvm/profile/. Once attached to a VG
+or LV, lvm will process the VG or LV using the settings from the attached
+profile. A profile is named and referenced by its file name.
+
+To use a profile to customize the lvextend settings for an LV:
+
+.IP \[bu] 2
+Create a file containing settings, saved in profile_dir.
+For the profile_dir location, run:
+.br
+.B lvmconfig config/profile_dir
+
+.IP \[bu] 2
+Attach the profile to an LV, using the command:
+.br
+.B lvchange \-\-metadataprofile ProfileName VG/ThinPoolLV
+
+.IP \[bu] 2
+Extend the LV using the profile settings:
+.br
+.B lvextend \-\-use\-policies VG/ThinPoolLV
+
+.P
+
+.I Example
+.br
+.nf
+# lvmconfig config/profile_dir
+profile_dir="/etc/lvm/profile"
+
+# cat /etc/lvm/profile/pool0extend.profile
+activation {
+ thin_pool_autoextend_threshold=50
+ thin_pool_autoextend_percent=10
+}
+
+# lvchange --metadataprofile pool0extend vg/pool0
+
+# lvextend --use-policies vg/pool0
+.fi
+
+.I Notes
+.IP \[bu] 2
+A profile is attached to a VG or LV by name, where the name references a
+local file in profile_dir. If the VG is moved to another machine, the
+file with the profile also needs to be moved.
+
+.IP \[bu] 2
+Only certain settings can be used in a VG or LV profile, see:
+.br
+.B lvmconfig \-\-type profilable-metadata.
+
+.IP \[bu] 2
+An LV without a profile of its own will inherit the VG profile.
+
+.IP \[bu] 2
+Remove a profile from an LV using the command:
+.br
+.B lvchange --detachprofile VG/ThinPoolLV.
+
+.IP \[bu] 2
+Commands can also have profiles applied to them. The settings that can be
+applied to a command are different than the settings that can be applied
+to a VG or LV. See lvmconfig \-\-type profilable\-command. To apply a
+profile to a command, write a profile, save it in the profile directory,
+and run the command using the option: \-\-commandprofile ProfileName.
+
+
.SS Zeroing
\&
@@ -868,14 +1076,13 @@ controls the default discards mode used when creating a thin pool.
\&
-The size of data blocks managed by a thin pool can be specified with
-the \-\-chunksize option when the thin pool LV is created. The default
-unit is kilobytes and the default value is 64KiB. The value must be a
-power of two between 4KiB and 1GiB.
+The size of data blocks managed by a thin pool can be specified with the
+\-\-chunksize option when the thin pool LV is created. The default unit
+is KiB. The value must be a multiple of 64KiB between 64KiB and 1GiB.
-When a thin pool is used primarily for the thin provisioning feature,
-a larger value is optimal. To optimize for a lot of snapshotting,
-a smaller value reduces copying time and consumes less space.
+When a thin pool is used primarily for the thin provisioning feature, a
+larger value is optimal. To optimize for many snapshots, a smaller value
+reduces copying time and consumes less space.
Command to display the thin pool LV chunk size:
.br
@@ -893,25 +1100,32 @@ Command to display the thin pool LV chunk size:
.br
controls the default chunk size used when creating a thin pool.
+The default value is shown by:
+.br
+.B lvmconfig \-\-type default allocation/thin_pool_chunk_size
+
.SS Size of pool metadata LV
\&
-The amount of thin metadata depends on how many blocks are shared
-between thin LVs (i.e. through snapshots). A thin pool with many
-snapshots may need a larger metadata LV.
+The amount of thin metadata depends on how many blocks are shared between
+thin LVs (i.e. through snapshots). A thin pool with many snapshots may
+need a larger metadata LV. Thin pool metadata LV sizes can be from 2MiB
+to 16GiB.
-The range of supported metadata LV sizes is 2MiB to 16GiB.
-.br
-The default size is estimated with the formula:
-.br
-ThinPoolLVSize / ThinPoolLVChunkSize * 64b.
+When using lvcreate to create what will become a thin metadata LV, the
+size is specified with the \-L|\-\-size option.
-When creating a thin metadata LV explicitly, the size is specified
-in the lvcreate command. When a command automatically creates a
-thin metadata LV, the \-\-poolmetadatasize option can be used specify
-a non-default size. The default unit is megabytes.
+When an LVM command automatically creates a thin metadata LV, the size is
+specified with the \-\-poolmetadatasize option. When this option is not
+given, LVM automatically chooses a size based on the data size and chunk
+size.
+
+It can be hard to predict the amount of metadata space that will be
+needed, so it is recommended to start with a size of 1GiB which should be
+enough for all practical purposes. A thin pool metadata LV can later be
+manually or automatically extended if needed.
.SS Create a thin snapshot of an external, read only LV
@@ -1148,10 +1362,10 @@ skipped while mounting readonly:
mount /dev/VG/SnapLV /mnt \-o ro,nouuid,norecovery
-
.SH SEE ALSO
.BR lvm (8),
.BR lvm.conf (5),
+.BR lvmconfig (8),
.BR lvcreate (8),
.BR lvconvert (8),
.BR lvchange (8),
diff --git a/man/lvremove.8.in b/man/lvremove.8.in
index 8426cac5a..8cf1bf348 100644
--- a/man/lvremove.8.in
+++ b/man/lvremove.8.in
@@ -9,12 +9,13 @@ lvremove \(em remove a logical volume
.IR ProfileName ]
.RB [ \-d | \-\-debug ]
.RB [ \-h | \-\-help ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
.RB [ \-\-version ]
.RB [ \-f | \-\-force ]
.RB [ \-\-noudevsync ]
-.IR LogicalVolume { Name | Path }
.RI [ LogicalVolume { Name | Path }...]
.SH DESCRIPTION
lvremove removes one or more logical volumes.
diff --git a/man/lvs.8.in b/man/lvs.8.in
index f2685848b..3cb2ec0eb 100644
--- a/man/lvs.8.in
+++ b/man/lvs.8.in
@@ -134,7 +134,6 @@ stripe_size,
sync_percent,
thin_count,
transaction_id,
-writebehind,
zero.
.IP
With \fB\-\-segments\fP, any "seg_" prefixes are optional;
diff --git a/man/pvchange.8.in b/man/pvchange.8.in
index 606b1dd40..5c5b809c5 100644
--- a/man/pvchange.8.in
+++ b/man/pvchange.8.in
@@ -16,6 +16,8 @@ pvchange \(em change attributes of a physical volume
.RB [ \-\-metadataignore
.RI { y | n }]
.RB [ \-h | \-? | \-\-help ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
.RB [ \-a | \-\-all ]
diff --git a/man/pvdisplay.8.in b/man/pvdisplay.8.in
index 70c9bfc7a..c1232d671 100644
--- a/man/pvdisplay.8.in
+++ b/man/pvdisplay.8.in
@@ -13,13 +13,14 @@ pvdisplay \- display attributes of a physical volume
.RB [ \-\-maps ]
.RB [ \-\-nosuffix ]
.RB [ \-s | \-\-short ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-\-units
.IR hsbkmgtHKMGT ]
.RB [ \-v [ v ]| \-\-verbose
.RB [ \-\-verbose ]]
.RB [ \-\-version ]
-.RI [ PhysicalVolumePath
-.RI [ PhysicalVolumePath ...]]
+.RI [ PhysicalVolumePath ...]
.br
.br
@@ -50,8 +51,7 @@ pvdisplay \- display attributes of a physical volume
.RB [ \-v [ v ]| \-\-verbose
.RB [ \-\-verbose ]]
.RB [ \-\-version ]
-.RI [ PhysicalVolumePath
-.RI [ PhysicalVolumePath ...]]
+.RI [ PhysicalVolumePath ...]
.SH DESCRIPTION
pvdisplay allows you to see the attributes of one or more physical volumes
like size, physical extent size, space used for the volume group descriptor
diff --git a/man/pvscan.8.in b/man/pvscan.8.in
index c1187ac8c..010c091c0 100644
--- a/man/pvscan.8.in
+++ b/man/pvscan.8.in
@@ -1,6 +1,7 @@
.TH PVSCAN 8 "LVM TOOLS #VERSION#" "Sistina Software UK" \" -*- nroff -*-
.SH NAME
pvscan \(em scan all disks for physical volumes
+
.SH SYNOPSIS
.B pvscan
.RB [ \-\-commandprofile
@@ -30,9 +31,118 @@ pvscan \(em scan all disks for physical volumes
.IR DevicePath
|
.IR major:minor ]...
+
.SH DESCRIPTION
-pvscan scans all supported LVM block devices in the system for
-physical volumes.
+pvscan scans all supported LVM block devices in the system for physical
+volumes.
+
+.SS Scanning with lvmetad
+
+pvscan operates differently when used with the
+.BR lvmetad (8)
+daemon.
+
+Scanning disks is required to read LVM metadata and identify LVM PVs.
+Once read, lvmetad caches the metadata so that LVM commands can read it
+without repeatedly scanning disks. This is helpful because scanning disks
+is time consuming, and frequent scanning may interfere with the normal
+work of the system and disks.
+
+When lvmetad is not used, LVM commands revert to scanning disks to read
+metadata. Any LVM command that needs metadata will scan disks for it;
+running the pvscan command is not necessary for the sake of other LVM
+commands.
+
+When lvmetad is used, LVM commands avoid scanning disks by reading
+metadata from lvmetad. When new disks appear, they must be scanned so
+their metadata can be cached in lvmetad. This is done by the command
+pvscan \-\-cache, which scans disks and passes the metadata to lvmetad.
+
+The pvscan \-\-cache command is typically run automatically by system
+services when a new device appears. Users do not generally need to run
+this command if the system and lvmetad are running properly.
+
+Many scripts contain unnecessary pvscan (or vgscan) commands for
+historical reasons. To avoid disrupting the system with extraneous disk
+scanning, an ordinary pvscan (without \-\-cache) will simply read metadata
+from lvmetad like other LVM commands. It does not do anything beyond
+displaying the current state of the cache.
+
+.I Notes
+
+.IP \[bu] 2
+When given specific device name arguments, pvscan \-\-cache will only
+read the named devices.
+
+.IP \[bu] 2
+LVM udev rules and systemd services are used to intiate automatic device
+scanning.
+
+.IP \[bu] 2
+To prevent devices from being scanned by pvscan --cache, add them
+to
+.BR lvm.conf (5)
+.B devices/global_filter.
+The devices/filter setting does not
+apply to system level scanning.
+For more information, see:
+.br
+.B lvmconfig --withcomments devices/global_filter
+
+.IP \[bu] 2
+If lvmetad is started or restarted after devices are visible, or
+if the global_filter has changed, then all devices must be rescanned
+for metadata with the command pvscan \-\-cache.
+
+.IP \[bu] 2
+lvmetad ignores older metadata formats, e.g. lvm1, and should not be
+used if they exist.
+
+.IP \[bu] 2
+To notify lvmetad about a device that is no longer present, the major and
+minor numbers must be given, not the path.
+
+.SS Automatic activation
+
+When event-driven system services detect a new LVM device, the first step
+is to automatically scan and cache the metadata from the device. This is
+done by pvscan \-\-cache. A second step is to automatically activate LVs
+that are present on the new device. This auto-activation is done by the
+same pvscan \-\-cache command when the option '\-a|\-\-activate ay' is
+included.
+
+Auto-activation of VGs or LVs can be enabled/disabled using:
+.br
+.BR lvm.conf (5)
+.B activation/auto_activation_volume_list
+
+For more information, see:
+.br
+.B lvmconfig --withcomments activation/auto_activation_volume_list
+
+When this setting is undefined, all LVs are auto-activated (when lvm is
+fully integrated with the event-driven system services.)
+
+When a VG or LV is not auto-activated, traditional activation using
+vgchange or lvchange -a|--activate is needed.
+
+.I Notes
+
+.IP \[bu] 2
+pvscan auto-activation can be only done in combination with \-\-cache.
+
+.IP \[bu] 2
+Auto-activation is designated by the "a" argument in '-a|--activate ay'.
+This is meant to distinguish system generated commands from explicit user
+commands, although it can be used in any activation command. Whenever it
+is used, the auto_activation_volume_list is applied.
+
+.IP \[bu] 2
+Auto-activation is not yet supported for LVs that are part of partial or
+clustered volume groups.
+
+.P
+
.SH OPTIONS
See \fBlvm\fP(8) for common options.
.TP
@@ -46,33 +156,19 @@ Only show physical volumes not belonging to any volume group.
Short listing format.
.TP
.BR \-u ", " \-\-uuid
-Show UUIDs (Uniform Unique Identifiers) in addition to device special names.
+Show UUIDs in addition to device names.
.TP
.BR \-a ", " \-\-activate " " \fIay
-Together with the information already cached in lvmetad, automatically activate
-any logical volumes that become activatable after the scan done on one or more devices.
-The logical volume to autoactivate is matched against the
-activation/auto_activation_volume_list set in lvm.conf. If this list is not set, then
-all volumes are considered for autoactivation. The autoactivation is not yet
-supported for logical volumes that are part of partial or clustered volume groups.
+Automatically activate any logical volumes that are possible to activate
+with the addition of the new devices.
.TP
.BR \-b ", " \-\-background
Run the command in the background.
.TP
.BR \-\-cache " [" \-\-major " " \fImajor " " \-\-minor " " \fIminor " | " \fIDevicePath " | " \fImajor:minor " ]..."
-Scan one or more devices and instruct the lvmetad daemon to update its cached
-state accordingly. Called internally by udev rules.
-All devices listed explicitly are processed \fBregardless\fP of any device
-filters set using \fBdevices/filter\fP configuration setting. To filter
-devices even in this case, the \fBdevices/global_filter\fP must be used.
-If lvmetad has not yet cached any metadata or the filters have recently been
-changed, then all devices may be scanned, effectively ignoring the rest of
-the command line. Otherwise, if all the devices referenced on the command line
-contain metadata in the default lvm2 format, other devices are not accessed.
-If metadata written using the obsolete GFS pool format is encountered, this is
-ignored and so lvmetad should not be used.
+Scan one or more devices and send the metadata to lvmetad.
+
.SH SEE ALSO
.BR lvm (8),
-.BR lvmetad (8),
-.BR pvcreate (8),
-.BR pvdisplay (8)
+.BR lvmconfig (8),
+.BR lvmetad (8)
diff --git a/man/vgchange.8.in b/man/vgchange.8.in
index dcd1faf0c..77ab70e28 100644
--- a/man/vgchange.8.in
+++ b/man/vgchange.8.in
@@ -10,7 +10,7 @@ vgchange \(em change attributes of a volume group
.RB [ \-A | \-\-autobackup
.RI { y | n }]
.RB [ \-a | \-\-activate
-.RI [ a | e | l ]
+.RI [ a | e | s | l ]
.RI { y | n }]
.RB [ \-\-activationmode
.IR { complete | degraded | partial } ]
@@ -34,6 +34,10 @@ vgchange \(em change attributes of a volume group
.RB [ \-\-ignoreskippedcluster ]
.RB [ \-\-sysinit ]
.RB [ \-\-noudevsync ]
+.RB [ \-\-lock\-start ]
+.RB [ \-\-lock\-stop ]
+.RB [ \-\-lock\-type
+.IR LockType ]
.RB [ \-l | \-\-logicalvolume
.IR MaxLogicalVolumes ]
.RB [ \-p | \-\-maxphysicalvolumes
@@ -45,6 +49,10 @@ vgchange \(em change attributes of a volume group
.RB [ \-P | \-\-partial ]
.RB [ \-s | \-\-physicalextentsize
.IR PhysicalExtentSize [ bBsSkKmMgGtTpPeE ]]
+.RB [ \-S | \-\-select
+.IR Selection ]
+.RB [ \-\-systemid
+.IR SystemID ]
.RB [ \-\-refresh ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
@@ -73,7 +81,7 @@ Controls automatic backup of metadata after the change. See
.BR vgcfgbackup (8).
Default is yes.
.TP
-.BR \-a ", " \-\-activate " [" \fIa | \fIe | \fIl ]{ \fIy | \fIn }
+.BR \-a ", " \-\-activate " [" \fIa | \fIe | \fIs | \fIl ]{ \fIy | \fIn }
Controls the availability of the logical volumes in the volume
group for input/output.
In other words, makes the logical volumes known/unknown to the kernel.
@@ -94,24 +102,73 @@ The location and name of the underlying device node may depend on
the distribution and configuration (e.g. udev) and might change
from release to release.
.IP
-If clustered locking is enabled, add 'e' to activate/deactivate
-exclusively on one node or 'l' to activate/deactivate only
-on the local node.
-Logical volumes with single-host snapshots are always activated
-exclusively because they can only be used on one node at once.
+In a clustered VG, clvmd is used for activation, and the
+following options are possible:
+
+With \-aey, clvmd activates the LV in exclusive mode
+(with an exclusive lock), allowing a single node to activate the LV.
+
+With \-asy, clvmd activates the LV in shared mode
+(with a shared lock), allowing multiple nodes to activate the LV concurrently.
+If the LV type prohibits shared access, such as an LV with a snapshot,
+the 's' option is ignored and an exclusive lock is used.
+
+With \-ay (no mode specified), clvmd activates the LV in shared mode
+if the LV type allows concurrent access, such as a linear LV.
+Otherwise, clvmd activates the LV in exclusive mode.
+
+With \-aey, \-asy, and \-ay, clvmd attempts to activate the LV
+on all nodes. If exclusive mode is used, then only one of the
+nodes will be successful.
+
+With \-an, clvmd attempts to deactivate the LV on all nodes.
+
+With \-aly, clvmd activates the LV only on the local node, and \-aln
+deactivates only on the local node. If the LV type allows concurrent
+access, then shared mode is used, otherwise exclusive.
+
+LVs with snapshots are always activated exclusively because they can only
+be used on one node at once.
+
+For local VGs, \-ay, \-aey, and \-asy are all equivalent.
+.IP
+In a shared VG, lvmlockd is used for locking, and the following options
+are possible:
+
+With \-aey, the command activates the LV in exclusive mode, allowing a
+single host to activate the LV (the host running the command). Before
+activating the LV, the command uses lvmlockd to acquire an exclusive lock
+on the LV. If the lock cannot be acquired, the LV is not activated and an
+error is reported. This would happen if the LV is active on another host.
+
+With \-asy, the command activates the LV in shared mode, allowing multiple
+hosts to activate the LV concurrently. Before activating the LV, the
+command uses lvmlockd to acquire a shared lock on the LV. If the lock
+cannot be acquired, the LV is not activated and an error is reported.
+This would happen if the LV is active exclusively on another host. If the
+LV type prohibits shared access, such as a snapshot, the command will
+report an error and fail.
+
+With \-an, the command deactivates the LV on the host running the command.
+After deactivating the LV, the command uses lvmlockd to release the
+current lock on the LV.
+
+With lvmlockd, an unspecified mode is always exclusive, \-ay defaults to
+\-aey.
+
.TP
.BR \-\-activationmode " {" \fIcomplete | \fIdegraded | \fIpartial }
The activation mode determines whether logical volumes are allowed to
activate when there are physical volumes missing (e.g. due to a device
-failure). \fIcomplete is the most restrictive; allowing only those
+failure). \fIcomplete\fP is the most restrictive; allowing only those
logical volumes to be activated that are not affected by the missing
-PVs. \fIdegraded allows RAID logical volumes to be activated even if
+PVs. \fIdegraded\fP allows RAID logical volumes to be activated even if
they have PVs missing. (Note that the "mirror" segment type is not
considered a RAID logical volume. The "raid1" segment type should
-be used instead.) Finally, \fIpartial allows any logical volume to
+be used instead.) Finally, \fIpartial\fP allows any logical volume to
be activated even if portions are missing due to a missing or failed
PV. This last option should only be used when performing recovery or
-repair operations. \fIdegraded is the default mode. To change it, modify
+repair operations. \fIdegraded\fP is the default mode. To change it, modify
.B activation_mode
in
.BR lvm.conf (5).
@@ -184,6 +241,20 @@ Make no attempt to interact with dmeventd unless
is specified.
Do not use this if dmeventd is already monitoring a device.
.TP
+.BR \-\-lock\-start
+Start the lockspace of a shared VG in lvmlockd. lvmlockd locks becomes
+available for the VG, allowing LVM to use the VG. See
+.BR lvmlockd (8).
+.TP
+.BR \-\-lock\-stop
+Stop the lockspace of a shared VG in lvmlockd. lvmlockd locks become
+unavailable for the VG, preventing LVM from using the VG. See
+.BR lvmlockd (8).
+.TP
+.BR \-\-lock\-type " " \fILockType
+Change the VG lock type to or from a shared lock type used with lvmlockd. See
+.BR lvmlockd (8).
+.TP
.BR \-l ", " \-\-logicalvolume " " \fIMaxLogicalVolumes
Changes the maximum logical volume number of an existing inactive
volume group.
@@ -241,6 +312,12 @@ impact on I/O performance to the logical volume. The smallest PE is 1KiB.
The 2.4 kernel has a limitation of 2TiB per block device.
.TP
+.BR \-\-systemid " " \fISystemID
+Changes the system ID of the VG. Using this option requires caution
+because the VG may become foreign to the host running the command,
+leaving the host unable to access it. See
+.BR lvmsystemid (7).
+.TP
.BR \-\-refresh
If any logical volume in the volume group is active, reload its metadata.
This is not necessary in normal operation, but may be useful
diff --git a/man/vgcreate.8.in b/man/vgcreate.8.in
index d850659fe..92ef06c2f 100644
--- a/man/vgcreate.8.in
+++ b/man/vgcreate.8.in
@@ -27,6 +27,9 @@ vgcreate \(em create a volume group
.IR NumberOfCopies | unmanaged | all ]
.RB [ \-s | \-\-physicalextentsize
.IR PhysicalExtentSize [ bBsSkKmMgGtTpPeE ]]
+.RB [ \-\-shared ]
+.RB [ \-\-systemid
+.IR SystemID ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
.RB [ \-\-version ]
@@ -110,7 +113,6 @@ power of 2 of at least 1 sector (where the sector size is the largest sector
size of the PVs currently used in the VG) or, if not a power of 2, at least
128KiB. For the older LVM1 format, it must be a power of 2 of at least 8KiB.
The default is 4 MiB.
-
Once this value has been set, it is difficult to change it without recreating
the volume group which would involve backing up and restoring data on any
logical volumes. However, if no extents need moving for the new
@@ -126,6 +128,23 @@ impact on I/O performance to the logical volume. The smallest PE is 1KiB
The 2.4 kernel has a limitation of 2TiB per block device.
+.TP
+.B \-\-shared
+Create a shared VG using lvmlockd. lvmlockd will select lock type sanlock
+or dlm depending on which lock manager is running. This allows multiple
+hosts to share a VG on shared devices. See
+.BR lvmlockd (8).
+
+.TP
+.BR \-\-systemid " " \fISystemID
+Specifies the system ID that will be given to the new VG, overriding the
+system ID of the host running the command. A VG is normally created
+without this option, in which case the new VG is given the system ID of
+the host creating it. Using this option requires caution because the
+system ID of the new VG may not match the system ID of the host running
+the command, leaving the VG inaccessible to the host. See
+.BR lvmsystemid (7).
+
.SH PHYSICAL DEVICE OPTIONS
The following options are available for initializing physical devices in the
volume group. These options are further described in the \fBpvcreate\fP(8)
diff --git a/man/vgdisplay.8.in b/man/vgdisplay.8.in
index 1e472d3ba..172dba472 100644
--- a/man/vgdisplay.8.in
+++ b/man/vgdisplay.8.in
@@ -8,6 +8,8 @@ vgdisplay \(em display attributes of volume groups
.RB [ \-\-commandprofile
.IR ProfileName ]
.RB [ \-s | \-\-short ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-v | \-\-verbose ]
.RB [ \-d | \-\-debug ]
.RB [ \-h | \-\-help ]
@@ -18,8 +20,7 @@ vgdisplay \(em display attributes of volume groups
.RB [ \-\-units
.IR hHbBsSkKmMgGtTpPeE ]
.RB [ \-\-version ]
-.RI [ VolumeGroupName
-.RI [ VolumeGroupName ...]]
+.RI [ VolumeGroupName ...]
.br
.br
@@ -49,8 +50,7 @@ vgdisplay \(em display attributes of volume groups
.IR hHbBsSkKmMgGtTpPeE ]
.RB [ \-v | \-\-verbose ]
.RB [ \-\-version ]
-.RI [ VolumeGroupName
-.RI [ VolumeGroupName ...]]
+.RI [ VolumeGroupName ...]
.SH DESCRIPTION
vgdisplay allows you to see the attributes of
.I VolumeGroupName
diff --git a/man/vgexport.8.in b/man/vgexport.8.in
index 2b3161818..08c1d780e 100644
--- a/man/vgexport.8.in
+++ b/man/vgexport.8.in
@@ -8,8 +8,9 @@ vgexport \- make volume groups unknown to the system
.IR ProfileName ]
.RB [ \-d | \-\-debug ]
.RB [ \-h | \-? | \-\-help ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-v | \-\-verbose ]
-.I VolumeGroupName
.RI [ VolumeGroupName ...]
.SH DESCRIPTION
vgexport allows you to make the inactive
@@ -19,6 +20,8 @@ You can then move all the Physical Volumes in that Volume Group to
a different system for later
.BR vgimport (8).
Most LVM2 tools ignore exported Volume Groups.
+vgexport clears the VG system ID, and vgimport sets the VG system ID
+to match the host running vgimport (if the host has a system ID).
.SH OPTIONS
See \fBlvm\fP(8) for common options.
.TP
@@ -28,4 +31,5 @@ Export all inactive Volume Groups.
.BR lvm (8),
.BR pvscan (8),
.BR vgimport (8),
-.BR vgscan (8)
+.BR vgscan (8),
+.BR lvmsystemid (7)
diff --git a/man/vgimport.8.in b/man/vgimport.8.in
index 31378f433..cd3404968 100644
--- a/man/vgimport.8.in
+++ b/man/vgimport.8.in
@@ -6,8 +6,9 @@ vgimport \(em make exported volume groups known to the system
.RB [ \-a | \-\-all ]
.RB [ \-d | \-\-debug ]
.RB [ \-h | \-? | \-\-help ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-v | \-\-verbose ]
-.I VolumeGroupName
.RI [ VolumeGroupName ...]
.SH DESCRIPTION
vgimport allows you to make a Volume Group that was previously
@@ -15,6 +16,8 @@ exported using
.BR vgexport (8)
known to the system again, perhaps after moving its Physical Volumes
from a different machine.
+vgexport clears the VG system ID, and vgimport sets the VG system ID
+to match the host running vgimport (if the host has a system ID).
.SH OPTIONS
See \fBlvm\fP(8) for common options.
.TP
@@ -29,4 +32,5 @@ failed and they cannot be restored.
.BR lvm (8),
.BR pvscan (8),
.BR vgexport (8),
-.BR vgscan (8)
+.BR vgscan (8),
+.BR lvmsystemid (7)
diff --git a/man/vgremove.8.in b/man/vgremove.8.in
index 10a431f25..082c1f3b4 100644
--- a/man/vgremove.8.in
+++ b/man/vgremove.8.in
@@ -9,9 +9,10 @@ vgremove \(em remove a volume group
.RB [ \-f | \-\-force ]
.RB [ \-h | \-? | \-\-help ]
.RB [ \-\-noudevsync ]
+.RB [ \-S | \-\-select
+.IR Selection ]
.RB [ \-t | \-\-test ]
.RB [ \-v | \-\-verbose ]
-.I VolumeGroupName
.RI [ VolumeGroupName ...]
.SH DESCRIPTION
vgremove allows you to remove one or more volume groups.
diff --git a/man/vgs.8.in b/man/vgs.8.in
index 6dd52f158..654e8bbd0 100644
--- a/man/vgs.8.in
+++ b/man/vgs.8.in
@@ -93,7 +93,7 @@ are missing from the system
.IP 5 3
Allocation policy: (c)ontiguous, c(l)ing, (n)ormal, (a)nywhere
.IP 6 3
-(c)lustered
+(c)lustered, (s)hared
.RE
.TP
.BR \-O ", " \-\-sort
diff --git a/nix/default.nix b/nix/default.nix
index abd3d01f0..abfbcd9f4 100644
--- a/nix/default.nix
+++ b/nix/default.nix
@@ -1,48 +1,225 @@
+# -*- mode: nix; indent-tabs-mode: nil -*-
{ nixpkgs ? <nixpkgs>, lvm2Src, release ? false,
rawhide32 ? "" , rawhide64 ? "" ,
+ fc20_32_updates ? "", fc20_64_updates ? "",
fc19_32_updates ? "", fc19_64_updates ? "",
fc18_32_updates ? "", fc18_64_updates ? "",
- lvm2Nix ? lvm2Src, T ? "" }:
+ T ? "", ENV ? "", timeout ? 60,
+ overrides ? { pkgs }: { install_rpms = {}; distros = {}; configs = {}; } }:
let
pkgs = import nixpkgs {};
- mkVM = { VM, extras ? [], diskFun, kernel }:
- VM rec {
- inherit kernel;
- name = "lvm2";
- fullName = "LVM2";
- src = jobs.tarball;
+ lib = pkgs.lib;
+ over = overrides { inherit pkgs; };
+ install_lcov = ''
+ rpm -Uv ${pkgs.fetchurl {
+ url = "http://archives.fedoraproject.org/pub/archive/fedora/linux/updates/16/i386/lcov-1.9-2.fc16.noarch.rpm";
+ sha256 = "0ycdh5mb7p5ll76mqk0p6gpnjskvxxgh3a3bfr1crh94nvpwhp4z"; }}
+ '';
+
+ mkTest = args: pkgs.stdenv.mkDerivation rec {
+ name = "lvm2-test-${(args.diskFun {}).name}";
+
+ builder = pkgs.writeScript "lvm2-collect-results" ''
+ #!${pkgs.bash}/bin/bash
+ . $stdenv/setup
+ mkdir -p $out/test-results
+ for i in ${lib.concatStringsSep " " buildInputs}; do
+ cat $i/test-results/list >> $out/test-results/list
+ cp $i/test-results'/'*.txt $out/test-results/ || true
+ done
+ mkdir -p $out/nix-support
+ grep '\<failed\>' $out/test-results/list && touch $out/nix-support/failed || true
+ '';
+
+ buildInputs = map (x: runTest (args // { flavour = x; }))
+ [ "ndev-vanilla" "ndev-lvmetad" "ndev-cluster" "udev-vanilla" "udev-lvmetad" "udev-cluster" ];
+ };
+
+ runTest = { build, diskFun, extras ? [], kernel, vmtools, flavour, ... }: pkgs.stdenv.mkDerivation rec {
diskImage = diskFun { extraPackages = extras; };
- memSize = 2047;
-
- # fc16 lcov is broken and el6 has none... be creative
- prepareImagePhase = ''
- rpm -Uv ${pkgs.fetchurl {
- url = "http://archives.fedoraproject.org/pub/archive/fedora/linux/updates/16/i386/lcov-1.9-2.fc16.noarch.rpm";
- sha256 = "0ycdh5mb7p5ll76mqk0p6gpnjskvxxgh3a3bfr1crh94nvpwhp4z"; }}
- dmesg -n 1 # avoid spilling dmesg into the main log, we capture it in harness
+ name = "lvm2-test-${diskImage.name}-${flavour}";
+
+ # this is the builder that runs in the guest
+ origBuilder = pkgs.writeScript "vm-test-guest" ''
+ #!/bin/bash
+ export PATH=/usr/bin:/bin:/usr/sbin:/sbin
+
+ # we always run in a fresh image, so need to install everything again
+ ls ${build}/rpms/*/*.rpm | grep -v sysvinit | xargs rpm -Uv --oldpackage # */
+ ${install_lcov}
+
+ mkdir -p /xchg/results
+ touch /xchg/booted
+
+ dmsetup targets
+
+ export LVM_TEST_BACKING_DEVICE=/dev/sdb
+ ulimit -c unlimited
+
+ watch=
+ if echo ${flavour} | grep -q udev; then
+ (/usr/lib/systemd/systemd-udevd || /usr/lib/udev/udevd || /sbin/udevd || \
+ find / -xdev -name \*udevd) >> /xchg/udevd.log 2>&1 &
+ watch="--watch /xchg/udevd.log"
+ fi
+
+ export ${ENV}
+ lvm2-testsuite --batch --outdir /xchg/results --continue \
+ --timeout ${toString timeout} --fatal-timeouts --heartbeat /xchg/heartbeat \
+ --flavours ${flavour} $watch --kmsg ${if lib.eqStrings T "" then "" else "--only ${T}"}
+
+ # TODO: coverage reports
+ # make lcov || true
+ # cp -R lcov_reports $out/coverage && \
+ # echo "report coverage $out/coverage" >> $out/nix-support/hydra-build-products || \
+ # true # not really fatal, although kinda disappointing
'';
- postBuild = ''
- mkdir -p $out/nix-support
- cd `cat /tmp/build-location`
- mv test/results/list test/results/list-rpm
- ls /tmp/rpmout/RPMS/*/*.rpm | grep -v sysvinit | xargs rpm -Uvh # */
- (/usr/lib/systemd/systemd-udevd || /usr/lib/udev/udevd || /sbin/udevd || find / -xdev -name \*udevd) &
- make check_system QUIET=1 T=${T} || touch $out/nix-support/failed
- mv test/results/list test/results/list-system
- cat test/results/list-* > test/results/list
- cp -R test/results $out/test-results && \
- echo "report tests $out/test-results" >> $out/nix-support/hydra-build-products || \
- true
- make lcov || true
- cp -R lcov_reports $out/coverage && \
- echo "report coverage $out/coverage" >> $out/nix-support/hydra-build-products || \
- true # not really fatal, although kinda disappointing
+ buildInputs = [ pkgs.coreutils pkgs.bash pkgs.utillinux ];
+
+ # make a qcow copy of the main image
+ preVM = ''
+ diskImage=$(pwd)/disk-image.qcow2
+ origImage=${diskImage}
+ if test -d "$origImage"; then origImage="$origImage/disk-image.qcow2"; fi
+ ${vmtools.qemu}/bin/qemu-img create -b "$origImage" -f qcow2 $diskImage
+ '';
+
+ builder = pkgs.writeScript "vm-test" ''
+ #!${pkgs.bash}/bin/bash
+ . $stdenv/setup
+
+ export QEMU_OPTS="-drive file=/dev/shm/testdisk.img,if=ide -m 256M"
+ export QEMU_DRIVE_OPTS=",if=ide"
+ export KERNEL_OPTS="log_buf_len=131072 loglevel=1"
+ export mountDisk=1
+
+ mkdir -p $out/test-results $out/nix-support
+ touch $out/nix-support/failed
+
+ monitor() {
+ set +e
+ counter=0
+ rm -f j.current j.last t.current t.last
+ while true; do
+ if ! test -f pid; then
+ counter=0
+ sleep 60
+ continue
+ fi
+
+ cat xchg/results/journal > j.current 2> /dev/null
+ cat xchg/heartbeat > hb.current 2> /dev/null
+ if diff j.current j.last >& /dev/null; then
+ counter=$(($counter + 1));
+ else
+ counter=0
+ fi
+ if test $counter -eq 10 || test $(wc -c <hb.current) -eq $(wc -c <hb.last); then
+ echo
+ echo "VM got stuck; heartbeat: $(wc -c <hb.current) $(wc -c <hb.last), counter = $counter."
+ echo "last journal entry: $(tail -n 1 j.current), previously $(tail -n 1 j.last)"
+ kill -- -$(cat pid)
+ fi
+ sleep 60
+ mv j.current j.last >& /dev/null
+ mv hb.current hb.last >& /dev/null
+ done
+ }
+
+ monitor &
+
+ for i in `seq 1 20`; do # we allow up to 20 VM restarts
+ rm -f xchg/booted
+ ${vmtools.qemu}/bin/qemu-img create -f qcow2 /dev/shm/testdisk.img 4G
+ setsid bash -e ${vmtools.vmRunCommand (vmtools.qemuCommandLinux kernel)} &
+ pid=$!
+
+ # give the VM some time to get up and running
+ slept=0
+ while test $slept -le 180 && test ! -e xchg/booted; do
+ sleep 10
+ slept=$(($slept + 10))
+ done
+ echo $pid > pid # monitor go
+ wait $pid || true
+ rm -f pid # disarm the monitor process
+
+ # if we have any new results, stash them
+ mv xchg/results'/'*.txt $out/test-results/ || true
+
+ if test -n "$(cat xchg/in-vm-exit)"; then # the VM is done
+ test 0 -eq "$(cat xchg/in-vm-exit)" && rm -f $out/nix-support/failed
+ break
+ fi
+
+ sleep 10 # wait for the VM to clean up before starting up a new one
+ done
+
+ cat xchg/results/list > $out/test-results/list || true
'';
+ };
+
+ mkTarball = profiling: pkgs.releaseTools.sourceTarball rec {
+ name = "lvm2-tarball";
+ versionSuffix = if lvm2Src ? revCount
+ then ".pre${toString lvm2Src.revCount}"
+ else "";
+ src = lvm2Src;
+ autoconfPhase = ":";
+ distPhase = ''
+ make distclean
+
+ version=`cat VERSION | cut "-d(" -f1`${versionSuffix}
+ version_dm=`cat VERSION_DM | cut "-d-" -f1`${versionSuffix}
+
+ chmod u+w *
+
+ # set up versions
+ sed -e s,-git,${versionSuffix}, -i VERSION VERSION_DM
+ sed -e "s,\(device_mapper_version\) [0-9.]*$,\1 $version_dm," \
+ -e "s,^\(Version:[^0-9%]*\)[0-9.]*$,\1 $version," \
+ -e "s,^\(Release:[^0-9%]*\)[0-9.]\+,\1 0.HYDRA," \
+ -i spec/source.inc
+
+ # tweak RPM configuration
+ echo "%define enable_profiling ${profiling}" >> spec/source.inc
+ echo "%define enable_testsuite 1" >> spec/source.inc
+ sed -e "s:%with clvmd corosync:%with clvmd corosync,singlenode:" -i spec/source.inc
+
+ # synthesize a changelog
+ sed -e '/^%changelog/,$d' -i spec/lvm2.spec
+ (echo "%changelog";
+ echo "* `date +"%a %b %d %Y"` Petr Rockai <prockai@redhat.com> - $version";
+ echo "- AUTOMATED BUILD BY Hydra") >> spec/lvm2.spec
+
+ cp spec/* . # */ # RPM needs the spec file in the source root
+
+ # make a tarball
+ mkdir ../LVM2.$version
+ mv * ../LVM2.$version
+ ensureDir $out/tarballs
+ cd ..
+ tar cvzf $out/tarballs/LVM2.$version.tgz LVM2.$version
+ '';
+ };
+
+ mkBuild = { src, VM, extras ? [], diskFun, ... }:
+ VM rec {
+ name = "lvm2-build-${diskImage.name}";
+ fullName = "lvm2-build-${diskImage.name}";
+
+ inherit src;
+ diskImage = diskFun { extraPackages = extras; };
+ memSize = 512;
+ checkPhase = ":";
+
+ preConfigure = install_lcov;
postInstall = ''
- for i in $out/rpms/*/*.rpm; do
+ mkdir -p $out/nix-support
+ for i in $out/rpms/*/*.rpm; do # */
if echo $i | grep -vq "\.src\.rpm$"; then
echo "file rpm $i" >> $out/nix-support/hydra-build-products
else
@@ -53,16 +230,18 @@ let
};
rootmods = [ "virtio_pci" "virtio_blk" "virtio_balloon" "ext4" "unix"
- "cifs" "virtio_net" "unix" "hmac" "md4" "ecb" "des_generic" "sha256" ];
+ "cifs" "virtio_net" "unix" "hmac" "md4" "ecb" "des_generic" "sha256"
+ "ata_piix" "sd_mod" ];
- centos_url = ver: arch: if ver == "6.5"
+ centos_url = ver: arch: if ver == "6.6" || ver == "7"
then "http://ftp.fi.muni.cz/pub/linux/centos/${ver}/os/${arch}/"
else "http://vault.centos.org/${ver}/os/${arch}/";
- fedora_url = ver: arch: if pkgs.lib.eqStrings ver "rawhide" || pkgs.lib.eqStrings ver "19"
+ fedora_url = ver: arch: if lib.eqStrings ver "rawhide" || lib.eqStrings ver "19"
then "ftp://ftp.fi.muni.cz/pub/linux/fedora/linux/development/${ver}/${arch}/os/"
else "mirror://fedora/linux/releases/${ver}/Everything/${arch}/os/";
fedora_update_url = ver: arch: "mirror://fedora/linux/updates/${ver}/${arch}";
- extra_distros = with pkgs.lib; let
+
+ distros = with lib; let
centos = { version, sha, arch }: {
name = "centos-${version}-${arch}";
fullName = "CentOS ${version} (${arch})";
@@ -72,8 +251,7 @@ let
};
urlPrefix = centos_url version arch;
archs = ["noarch" arch] ++ (if eqStrings arch "i386" then ["i586" "i686"] else []);
- packages = filter (n: !(eqStrings n "fedora-release")) pkgs.vmTools.commonFedoraPackages ++
- [ "centos-release" ];
+ packages = pkgs.vmTools.commonCentOSPackages;
};
fedora = { version, sha, arch }: rec {
name = "fedora-${version}-${arch}";
@@ -105,6 +283,8 @@ let
in {
rawhidex86_64 = rawhide "rawhide" "x86_64" rawhide64;
rawhidei386 = rawhide "rawhide" "i386" rawhide32;
+ fedora20ux86_64 = update "20" "x86_64" fc20_64_updates pkgs.vmTools.rpmDistros.fedora20x86_64;
+ fedora20ui386 = update "20" "i386" fc20_32_updates pkgs.vmTools.rpmDistros.fedora20i386;
fedora19ux86_64 = update "19" "x86_64" fc19_64_updates pkgs.vmTools.rpmDistros.fedora19x86_64;
fedora19ui386 = update "19" "i386" fc19_32_updates pkgs.vmTools.rpmDistros.fedora19i386;
fedora18ux86_64 = update "18" "x86_64" fc18_64_updates pkgs.vmTools.rpmDistros.fedora18x86_64;
@@ -139,115 +319,125 @@ let
version="6.5"; arch="x86_64";
sha="3353e378f5cb4bb6c3b3dd2ca266c6d68a1e29c36cf99f76aea3d8e158626024";
};
- };
- vm = pkgs: xmods: with pkgs.lib; rec {
+ centos66i386 = centos {
+ version="6.6"; arch="i386";
+ sha="a8b935fcac1c8515c6d8dab3c43c53b3e461f89eb7a93b1914303784e28fcd17";
+ };
+
+ centos66x86_64 = centos {
+ version="6.6"; arch="x86_64";
+ sha="7651b16a9a2a8a5fbd0ad3ff8bbbe6f2409a64850ccfd83a6a3f874f13d8622f";
+ };
+
+ centos70x86_64 = centos {
+ version="7"; arch="x86_64";
+ sha="1a7dd0d315b39ad504f54ea88676ab502a48064cb2d875ae3ae29431e175861c";
+ };
+ } // over.distros;
+
+ vm = { pkgs, xmods, dmmods ? false }: with lib; rec {
tools = import "${nixpkgs}/pkgs/build-support/vm/default.nix" {
inherit pkgs; rootModules = rootmods ++ xmods ++
- [ "loop" "dm_mod" "dm_snapshot" "dm_mirror" "dm_zero" "dm_raid" "dm_thin_pool" ]; };
+ (if dmmods then [ "loop" "dm_mod" "dm_snapshot" "dm_mirror" "dm_zero" "dm_raid" "dm_thin_pool" ]
+ else []); };
release = import "${nixpkgs}/pkgs/build-support/release/default.nix" {
pkgs = pkgs // { vmTools = tools; }; };
imgs = tools.diskImageFuns //
- mapAttrs (n: a: b: pkgs.vmTools.makeImageFromRPMDist (a // b)) extra_distros;
- rpmdistros = tools.rpmDistros // extra_distros;
+ mapAttrs (n: a: b: pkgs.vmTools.makeImageFromRPMDist (a // b)) distros;
+ rpmdistros = tools.rpmDistros // distros;
rpmbuild = tools.buildRPM;
};
- extra_rpms = rec {
+ install_rpms = rec {
common = [ "libselinux-devel" "libsepol-devel" "ncurses-devel" "readline-devel"
- "corosynclib-devel"
+ "valgrind" "valgrind-devel" "gdb" "strace"
"redhat-rpm-config" # needed for rpmbuild of lvm
"which" "e2fsprogs" # needed for fsadm
+ "e2fsprogs-libs" "e2fsprogs-devel"
"perl-GD" # for lcov
+ "mdadm" # for tests with lvm2 and mdadm
+ "device-mapper-persistent-data" # thin and cache
+ "pkgconfig" # better support for config
+ "kernel"
];
- centos63 = [ "clusterlib-devel" "openaislib-devel" "cman" "libudev-devel" ];
- centos64 = centos63;
+ centos63 = [ "clusterlib-devel" "openaislib-devel" "cman" "libudev-devel" "procps" "nc" ];
+ centos64 = centos63 ++ [ "corosynclib-devel" ];
centos65 = centos64;
- fedora16 = [ "clusterlib-devel" "openaislib-devel" "cman" "systemd-devel" "libudev-devel" ];
- fedora17 = [ "dlm-devel" "corosynclib-devel" "device-mapper-persistent-data"
- "dlm" "systemd-devel" "perl-Digest-MD5" "libudev-devel" ];
- fedora18 = [ "dlm-devel" "corosynclib-devel" "device-mapper-persistent-data"
- "dlm" "systemd-devel" "perl-Digest-MD5" ];
+ centos66 = centos65;
+ centos70 = [ "dlm-devel" "dlm" "corosynclib-devel" "perl-Digest-MD5" "systemd-devel"
+ "socat" # used by test suite lvmpolld
+ # "sanlock" # used by lvmlockd. Required version present in 7.2 only
+ "procps-ng" ];
+
+ fedora17_18 = [ "dlm-devel" "corosynclib-devel" "libblkid" "libblkid-devel"
+ "dlm" "systemd-devel" "perl-Digest-MD5" "kernel-modules-extra" ];
+ fedora17 = fedora17_18 ++ [ "libudev-devel" "nc" ];
+
+ fedora18 = fedora17_18 ++ [ "socat" ];
fedora18u = fedora18;
- fedora19 = [ "dlm-devel" "dlm" "corosynclib-devel" "perl-Digest-MD5" "systemd-devel" "procps-ng" ];
+
+ fedora19 = centos70 ++ [ "kernel-modules-extra" ];
fedora19u = fedora19;
- rawhide = fedora19;
- };
- mkRPM = { arch, image }: with pkgs.lib;
- let use = vm (if eqStrings arch "i386" then pkgs.pkgsi686Linux else pkgs)
- (if image == "centos64" || image == "centos65" then [] else [ "9p" "9pnet_virtio" ]);
- in mkVM {
+ fedora20 = fedora19;
+ fedora20u = fedora20;
+
+ rawhide = fedora20;
+ } // over.install_rpms;
+
+ wrapper = fun: { arch, image, build ? {}, istest ? false, src ? jobs.tarball }: with lib;
+ let use = vm { pkgs = if eqStrings arch "i386" then pkgs.pkgsi686Linux else pkgs;
+ xmods = if istest && (image == "centos64" || image == "centos65")
+ then [] else [ "9p" "9pnet_virtio" ];
+ dmmods = istest; };
+ in fun {
+ inherit build istest src;
VM = use.rpmbuild;
diskFun = builtins.getAttr "${image}${arch}" use.imgs;
- extras = extra_rpms.common ++ builtins.getAttr image extra_rpms;
+ extras = install_rpms.common ++ builtins.getAttr image install_rpms;
+ vmtools = use.tools;
kernel = use.tools.makeKernelFromRPMDist (builtins.getAttr "${image}${arch}" use.rpmdistros);
};
- jobs = rec {
- tarball = pkgs.releaseTools.sourceTarball rec {
- name = "lvm2-tarball";
- versionSuffix = if lvm2Src ? revCount
- then ".pre${toString lvm2Src.revCount}"
- else "";
- src = lvm2Src;
- autoconfPhase = ":";
- distPhase = ''
- set -x
- make distclean
- version=`cat VERSION | cut "-d(" -f1`${versionSuffix}
- version_dm=`cat VERSION_DM | cut "-d-" -f1`${versionSuffix}
- sed -e s,-git,${versionSuffix}, -i VERSION VERSION_DM
- rm -rf spec; cp -R ${lvm2Nix}/spec/* .
- chmod u+w *
- (echo "%define enable_profiling 1";
- echo "%define check_commands \\";
- echo "make lcov-reset \\";
- echo "dmsetup targets\\";
- echo "mkdir -p \$out/nix-support \\";
- echo "make check QUIET=1 T=${T} || touch \$out/nix-support/failed \\"
- echo "pwd > /tmp/build-location \\"
- echo "touch rpm-no-clean") >> source.inc
- sed -e "s,\(device_mapper_version\) [0-9.]*$,\1 $version_dm," \
- -e "s,^\(Version:[^0-9%]*\)[0-9.]*$,\1 $version," \
- -e "s,^\(Release:[^0-9%]*\)[0-9.]\+,\1 0.HYDRA," \
- -e "s:%with clvmd corosync:%with clvmd corosync,singlenode:" \
- -i source.inc
- sed -e '/^%changelog/,$d' \
- -i lvm2.spec
- echo "%changelog" >> lvm2.spec;
- echo "* `date +"%a %b %d %Y"` Petr Rockai <prockai@redhat.com> - $version" >> lvm2.spec;
- echo "- AUTOMATED BUILD BY Hydra" >> lvm2.spec
- mkdir ../LVM2.$version
- mv * ../LVM2.$version
- ensureDir $out/tarballs
- cd ..
- tar cvzf $out/tarballs/LVM2.$version.tgz LVM2.$version
- '';
- };
-
- fc19_x86_64 = mkRPM { arch = "x86_64"; image = "fedora19"; };
- fc19_i386 = mkRPM { arch = "i386" ; image = "fedora19"; };
- fc18_x86_64 = mkRPM { arch = "x86_64"; image = "fedora18"; };
- fc18_i386 = mkRPM { arch = "i386" ; image = "fedora18"; };
- fc17_x86_64 = mkRPM { arch = "x86_64"; image = "fedora17"; };
- fc17_i386 = mkRPM { arch = "i386" ; image = "fedora17"; };
- fc16_x86_64 = mkRPM { arch = "x86_64"; image = "fedora16"; };
- fc16_i386 = mkRPM { arch = "i386" ; image = "fedora16"; };
-
- fc18u_x86_64 = mkRPM { arch = "x86_64"; image = "fedora18u"; };
- fc18u_i386 = mkRPM { arch = "i386"; image = "fedora18u"; };
- fc19u_x86_64 = mkRPM { arch = "x86_64"; image = "fedora19u"; };
- fc19u_i386 = mkRPM { arch = "i386"; image = "fedora19u"; };
-
- #centos63_i386 = mkRPM { arch = "i386" ; image = "centos63"; };
- #centos63_x86_64 = mkRPM { arch = "x86_64" ; image = "centos63"; };
- centos64_i386 = mkRPM { arch = "i386" ; image = "centos64"; };
- centos64_x86_64 = mkRPM { arch = "x86_64" ; image = "centos64"; };
- centos65_i386 = mkRPM { arch = "i386" ; image = "centos65"; };
- centos65_x86_64 = mkRPM { arch = "x86_64" ; image = "centos65"; };
-
- rawhide_i386 = mkRPM { arch = "i386" ; image = "rawhide"; };
- rawhide_x86_64 = mkRPM { arch = "x86_64" ; image = "rawhide"; };
+ configs = {
+ fc20p_x86_64 = { arch = "x86_64"; image = "fedora20"; src = jobs.tarball_prof; };
+ fc20p_i386 = { arch = "i386" ; image = "fedora20"; src = jobs.tarball_prof; };
+ fc20_x86_64 = { arch = "x86_64"; image = "fedora20"; };
+ fc20_i386 = { arch = "i386" ; image = "fedora20"; };
+ fc19_x86_64 = { arch = "x86_64"; image = "fedora19"; };
+ fc19_i386 = { arch = "i386" ; image = "fedora19"; };
+ fc18_x86_64 = { arch = "x86_64"; image = "fedora18"; };
+ fc18_i386 = { arch = "i386" ; image = "fedora18"; };
+ fc17_x86_64 = { arch = "x86_64"; image = "fedora17"; };
+ fc17_i386 = { arch = "i386" ; image = "fedora17"; };
+
+ fc18u_x86_64 = { arch = "x86_64"; image = "fedora18u"; };
+ fc18u_i386 = { arch = "i386"; image = "fedora18u"; };
+ fc19u_x86_64 = { arch = "x86_64"; image = "fedora19u"; };
+ fc19u_i386 = { arch = "i386"; image = "fedora19u"; };
+
+ #centos63_i386 = { arch = "i386" ; image = "centos63"; };
+ #centos63_x86_64 = { arch = "x86_64" ; image = "centos63"; };
+ centos64_i386 = { arch = "i386" ; image = "centos64"; };
+ centos64_x86_64 = { arch = "x86_64" ; image = "centos64"; };
+ centos65_i386 = { arch = "i386" ; image = "centos65"; };
+ centos65_x86_64 = { arch = "x86_64" ; image = "centos65"; };
+ centos66_i386 = { arch = "i386" ; image = "centos66"; };
+ centos66_x86_64 = { arch = "x86_64" ; image = "centos66"; };
+
+ centos70_x86_64 = { arch = "x86_64" ; image = "centos70"; };
+
+ rawhide_i386 = { arch = "i386" ; image = "rawhide"; };
+ rawhide_x86_64 = { arch = "x86_64" ; image = "rawhide"; };
+ } // over.configs;
+
+ rpms = lib.mapAttrs (n: v: wrapper mkBuild v) configs;
+ tests = let make = n: v: wrapper mkTest (v // { build = builtins.getAttr n rpms; istest = true; });
+ in lib.mapAttrs make configs;
+
+ jobs = tests // {
+ tarball_prof = mkTarball "1";
+ tarball = mkTarball "0";
};
in jobs
diff --git a/po/Makefile.in b/po/Makefile.in
index ff5db17fd..96e932b45 100644
--- a/po/Makefile.in
+++ b/po/Makefile.in
@@ -17,7 +17,7 @@ top_builddir = @top_builddir@
LANGS=de
-TARGETS=$(LANGS:%=lvm2_%.mo) $(LANGS:%=dm_%.mo)
+#TARGETS=$(LANGS:%=lvm2_%.mo) $(LANGS:%=dm_%.mo)
DM_POSOURCES = $(top_builddir)/tools/dmsetup.pot $(top_builddir)/libdm/*.pot \
$(top_builddir)/libdm/*/*.pot
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 000000000..c87e43ef9
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,2 @@
+.liblvm_built
+setup.py
diff --git a/python/example.py b/python/example.py
index 5c14ee1a3..07ebbf2ef 100644
--- a/python/example.py
+++ b/python/example.py
@@ -27,7 +27,7 @@ import lvm
#Dump information about PV
def print_pv(pv):
- print 'PV name: ', pv.getName(), ' ID: ', pv.getUuid(), 'Size: ', pv.getSize()
+ print('PV name: ', pv.getName(), ' ID: ', pv.getUuid(), 'Size: ', pv.getSize())
#Dump some information about a specific volume group
@@ -35,7 +35,7 @@ def print_vg(vg_name):
#Open read only
vg = lvm.vgOpen(vg_name, 'r')
- print 'Volume group:', vg_name, 'Size: ', vg.getSize()
+ print('Volume group:', vg_name, 'Size: ', vg.getSize())
#Retrieve a list of Physical volumes for this volume group
pv_list = vg.listPVs()
@@ -48,9 +48,9 @@ def print_vg(vg_name):
lv_list = vg.listLVs()
if len(lv_list):
for l in lv_list:
- print 'LV name: ', l.getName(), ' ID: ', l.getUuid()
+ print('LV name: ', l.getName(), ' ID: ', l.getUuid())
else:
- print 'No logical volumes present!'
+ print('No logical volumes present!')
vg.close()
@@ -75,14 +75,14 @@ def find_vg_with_free_space():
def create_delete_logical_volume():
vg_name = find_vg_with_free_space()
- print 'Using volume group ', vg_name, ' for example'
+ print('Using volume group ', vg_name, ' for example')
if vg_name:
vg = lvm.vgOpen(vg_name, 'w')
lv = vg.createLvLinear('python_lvm_ok_to_delete', vg.getFreeSize())
if lv:
- print 'New lv, id= ', lv.getUuid()
+ print('New lv, id= ', lv.getUuid())
#Create a tag
lv.addTag('Demo_tag')
@@ -97,16 +97,16 @@ def create_delete_logical_volume():
#Try to rename
lv.rename("python_lvm_renamed")
- print 'LV name= ', lv.getName()
+ print('LV name= ', lv.getName())
lv.remove()
vg.close()
else:
- print 'No free space available to create demo lv!'
+ print('No free space available to create demo lv!')
if __name__ == '__main__':
#What version
- print 'lvm version=', lvm.getVersion()
+ print('lvm version=', lvm.getVersion())
#Get a list of volume group names
vg_names = lvm.listVgNames()
diff --git a/python/liblvm.c b/python/liblvm.c
index 3828f2707..089abb367 100644
--- a/python/liblvm.c
+++ b/python/liblvm.c
@@ -25,6 +25,23 @@
#include "lvm2app.h"
#include "defaults.h"
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PYINTTYPE_CHECK PyLong_Check
+#define PYINTTYPE_ASLONG PyLong_AsLong
+#define PYINTTYPE_FROMLONG PyLong_FromLong
+#define PYSTRYPE_CHECK PyUnicode_Check
+#define PYSTRTYPE_ASSTRING PyUnicode_AsUTF8
+#define PYSTRTYPE_FROMSTRING PyUnicode_FromString
+#else
+#define PYINTTYPE_CHECK PyInt_Check
+#define PYINTTYPE_ASLONG PyInt_AsLong
+#define PYINTTYPE_FROMLONG PyInt_FromLong
+#define PYSTRYPE_CHECK PyString_Check
+#define PYSTRTYPE_ASSTRING PyString_AsString
+#define PYSTRTYPE_FROMSTRING PyString_FromString
+#endif
+
static lvm_t _libh;
@@ -148,10 +165,10 @@ static PyObject *_liblvm_get_last_error(void)
if (!(info = PyTuple_New(2)))
return NULL;
- PyTuple_SetItem(info, 0, PyInt_FromLong((long) lvm_errno(_libh)));
+ PyTuple_SetItem(info, 0, PYINTTYPE_FROMLONG((long) lvm_errno(_libh)));
msg = lvm_errmsg(_libh);
- PyTuple_SetItem(info, 1, ((msg) ? PyString_FromString(msg) :
- PyString_FromString("Memory error while retrieving error message")));
+ PyTuple_SetItem(info, 1, ((msg) ? PYSTRTYPE_FROMSTRING(msg) :
+ PYSTRTYPE_FROMSTRING("Memory error while retrieving error message")));
return info;
}
@@ -199,7 +216,7 @@ static PyObject *_liblvm_lvm_list_vg_names(void)
return NULL;
dm_list_iterate_items(strl, vgnames) {
- PyTuple_SET_ITEM(pytuple, i, PyString_FromString(strl->str));
+ PyTuple_SET_ITEM(pytuple, i, PYSTRTYPE_FROMSTRING(strl->str));
i++;
}
@@ -224,7 +241,7 @@ static PyObject *_liblvm_lvm_list_vg_uuids(void)
return NULL;
dm_list_iterate_items(strl, uuids) {
- PyTuple_SET_ITEM(pytuple, i, PyString_FromString(strl->str));
+ PyTuple_SET_ITEM(pytuple, i, PYSTRTYPE_FROMSTRING(strl->str));
i++;
}
@@ -853,10 +870,19 @@ static PyObject *get_property(struct lvm_property_value *prop)
if (!(pytuple = PyTuple_New(2)))
return NULL;
- if (prop->is_integer)
- PyTuple_SET_ITEM(pytuple, 0, Py_BuildValue("K", prop->value.integer));
- else
- PyTuple_SET_ITEM(pytuple, 0, PyString_FromString(prop->value.string));
+ if (prop->is_integer) {
+ if (prop->is_signed) {
+ PyTuple_SET_ITEM(pytuple, 0, Py_BuildValue("L", prop->value.signed_integer));
+ } else {
+ PyTuple_SET_ITEM(pytuple, 0, Py_BuildValue("K", prop->value.integer));
+ }
+ } else {
+ if ( prop->value.string ) {
+ PyTuple_SET_ITEM(pytuple, 0, PYSTRTYPE_FROMSTRING(prop->value.string));
+ } else {
+ PyTuple_SET_ITEM(pytuple, 0, Py_None);
+ }
+ }
if (prop->is_settable)
setable = Py_True;
@@ -904,14 +930,14 @@ static PyObject *_liblvm_lvm_vg_set_property(vgobject *self, PyObject *args)
if (!lvm_property.is_valid)
goto lvmerror;
- if (PyObject_IsInstance(variant_type_arg, (PyObject*)&PyString_Type)) {
+ if (PYSTRYPE_CHECK(variant_type_arg)) {
if (!lvm_property.is_string) {
PyErr_Format(PyExc_ValueError, "Property requires string value");
goto bail;
}
- if (!(string_value = PyString_AsString(variant_type_arg))) {
+ if (!(string_value = PYSTRTYPE_ASSTRING(variant_type_arg))) {
PyErr_NoMemory();
goto bail;
}
@@ -924,8 +950,8 @@ static PyObject *_liblvm_lvm_vg_set_property(vgobject *self, PyObject *args)
goto bail;
}
- if (PyObject_IsInstance(variant_type_arg, (PyObject*)&PyInt_Type)) {
- temp_py_int = PyInt_AsLong(variant_type_arg);
+ if (PYINTTYPE_CHECK(variant_type_arg)) {
+ temp_py_int = PYINTTYPE_ASLONG(variant_type_arg);
/* -1 could be valid, need to see if an exception was gen. */
if (temp_py_int == -1 && PyErr_Occurred())
@@ -1056,7 +1082,7 @@ static PyObject *_liblvm_lvm_vg_get_tags(vgobject *self)
return NULL;
dm_list_iterate_items(strl, tagsl) {
- PyTuple_SET_ITEM(pytuple, i, PyString_FromString(strl->str));
+ PyTuple_SET_ITEM(pytuple, i, PYSTRTYPE_FROMSTRING(strl->str));
i++;
}
@@ -1444,13 +1470,18 @@ static PyObject *_liblvm_lvm_lv_add_tag(lvobject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s", &tag))
return NULL;
- if (lvm_lv_add_tag(self->lv, tag) == -1) {
- PyErr_SetObject(_LibLVMError, _liblvm_get_last_error());
- return NULL;
- }
+ if (lvm_lv_add_tag(self->lv, tag) == -1)
+ goto error;
+
+ if (lvm_vg_write(self->parent_vgobj->vg) == -1)
+ goto error;
Py_INCREF(Py_None);
return Py_None;
+
+error:
+ PyErr_SetObject(_LibLVMError, _liblvm_get_last_error());
+ return NULL;
}
static PyObject *_liblvm_lvm_lv_remove_tag(lvobject *self, PyObject *args)
@@ -1462,14 +1493,18 @@ static PyObject *_liblvm_lvm_lv_remove_tag(lvobject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "s", &tag))
return NULL;
- if (lvm_lv_remove_tag(self->lv, tag) == -1) {
- PyErr_SetObject(_LibLVMError, _liblvm_get_last_error());
- return NULL;
- }
+ if (lvm_lv_remove_tag(self->lv, tag) == -1)
+ goto error;
- Py_INCREF(Py_None);
+ if (lvm_vg_write(self->parent_vgobj->vg) == -1)
+ goto error;
+ Py_INCREF(Py_None);
return Py_None;
+
+error:
+ PyErr_SetObject(_LibLVMError, _liblvm_get_last_error());
+ return NULL;
}
static PyObject *_liblvm_lvm_lv_get_tags(lvobject *self)
@@ -1490,7 +1525,7 @@ static PyObject *_liblvm_lvm_lv_get_tags(lvobject *self)
return NULL;
dm_list_iterate_items(strl, tagsl) {
- PyTuple_SET_ITEM(pytuple, i, PyString_FromString(strl->str));
+ PyTuple_SET_ITEM(pytuple, i, PYSTRTYPE_FROMSTRING(strl->str));
i++;
}
@@ -1914,7 +1949,7 @@ static PyMethodDef _liblvm_pvseg_methods[] = {
};
static PyTypeObject _LibLVMvgType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_vg",
.tp_basicsize = sizeof(vgobject),
.tp_new = PyType_GenericNew,
@@ -1925,7 +1960,7 @@ static PyTypeObject _LibLVMvgType = {
};
static PyTypeObject _LibLVMlvType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_lv",
.tp_basicsize = sizeof(lvobject),
.tp_new = PyType_GenericNew,
@@ -1936,7 +1971,7 @@ static PyTypeObject _LibLVMlvType = {
};
static PyTypeObject _LibLVMpvlistType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_pvlist",
.tp_basicsize = sizeof(pvslistobject),
.tp_new = PyType_GenericNew,
@@ -1947,7 +1982,7 @@ static PyTypeObject _LibLVMpvlistType = {
};
static PyTypeObject _LibLVMpvType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_pv",
.tp_basicsize = sizeof(pvobject),
.tp_new = PyType_GenericNew,
@@ -1958,7 +1993,7 @@ static PyTypeObject _LibLVMpvType = {
};
static PyTypeObject _LibLVMlvsegType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_lvseg",
.tp_basicsize = sizeof(lvsegobject),
.tp_new = PyType_GenericNew,
@@ -1969,7 +2004,7 @@ static PyTypeObject _LibLVMlvsegType = {
};
static PyTypeObject _LibLVMpvsegType = {
- PyObject_HEAD_INIT(&PyType_Type)
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "lvm.Liblvm_pvseg",
.tp_basicsize = sizeof(pvsegobject),
.tp_new = PyType_GenericNew,
@@ -1987,40 +2022,65 @@ static void _liblvm_cleanup(void)
}
}
+#ifdef IS_PY3K
+static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "lvm",
+ "Liblvm module",
+ -1,
+ _Liblvm_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+#define MODINITERROR return NULL
+PyObject *
+PyInit_lvm(void)
+
+#else
+#define MODINITERROR return
PyMODINIT_FUNC initlvm(void);
PyMODINIT_FUNC initlvm(void)
+#endif
{
PyObject *m;
_libh = lvm_init(NULL);
if (PyType_Ready(&_LibLVMvgType) < 0)
- return;
+ MODINITERROR;
if (PyType_Ready(&_LibLVMlvType) < 0)
- return;
+ MODINITERROR;
if (PyType_Ready(&_LibLVMpvType) < 0)
- return;
+ MODINITERROR;
if (PyType_Ready(&_LibLVMlvsegType) < 0)
- return;
+ MODINITERROR;
if (PyType_Ready(&_LibLVMpvsegType) < 0)
- return;
+ MODINITERROR;
if (PyType_Ready(&_LibLVMpvlistType) < 0)
- return;
+ MODINITERROR;
- if (!(m = Py_InitModule3("lvm", _Liblvm_methods, "Liblvm module")))
- return;
+#ifdef IS_PY3K
+ m = PyModule_Create(&moduledef);
+#else
+ m = Py_InitModule3("lvm", _Liblvm_methods, "Liblvm module");
+#endif
+ if (m == NULL)
+ MODINITERROR;
if (PyModule_AddIntConstant(m, "THIN_DISCARDS_IGNORE",
LVM_THIN_DISCARDS_IGNORE) < 0)
- return;
+ MODINITERROR;
if (PyModule_AddIntConstant(m, "THIN_DISCARDS_NO_PASSDOWN",
LVM_THIN_DISCARDS_NO_PASSDOWN) < 0)
- return;
+ MODINITERROR;
if (PyModule_AddIntConstant(m, "THIN_DISCARDS_PASSDOWN",
LVM_THIN_DISCARDS_PASSDOWN) < 0)
- return;
+ MODINITERROR;
if ((_LibLVMError = PyErr_NewException((char*)"lvm.LibLVMError", NULL, NULL))) {
/* Each call to PyModule_AddObject decrefs it; compensate: */
@@ -2031,4 +2091,7 @@ PyMODINIT_FUNC initlvm(void)
}
Py_AtExit(_liblvm_cleanup);
+#ifdef IS_PY3K
+ return m;
+#endif
}
diff --git a/scripts/.gitignore b/scripts/.gitignore
index 6fd8a2db8..3a668ae88 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,9 +1,24 @@
+blk_availability_init_red_hat
+blk_availability_systemd_red_hat.service
+blkdeactivate.sh
clvmd_init_red_hat
cmirrord_init_red_hat
dm_event_systemd_red_hat.service
+dm_event_systemd_red_hat.socket
+lvm2_activation_generator_systemd_red_hat
+lvm2_cluster_activation_red_hat.sh
+lvm2_cluster_activation_systemd_red_hat.service
+lvm2_clvmd_systemd_red_hat.service
+lvm2_cmirrord_systemd_red_hat.service
lvm2_lvmetad_init_red_hat
lvm2_lvmetad_systemd_red_hat.service
lvm2_lvmetad_systemd_red_hat.socket
+lvm2_lvmlockd_systemd_red_hat.service
+lvm2_lvmlocking_systemd_red_hat.service
+lvm2_lvmpolld_init_red_hat
+lvm2_lvmpolld_systemd_red_hat.service
+lvm2_lvmpolld_systemd_red_hat.socket
lvm2_monitoring_init_red_hat
lvm2_monitoring_systemd_red_hat.service
+lvm2_pvscan_systemd_red_hat@.service
lvm2_tmpfiles_red_hat.conf
diff --git a/scripts/Makefile.in b/scripts/Makefile.in
index 19fd5d84e..2ae532583 100644
--- a/scripts/Makefile.in
+++ b/scripts/Makefile.in
@@ -33,14 +33,15 @@ ifeq ("@DMEVENTD@", "yes")
LVMLIBS += -ldevmapper-event
endif
-SCRIPTS = lvmdump.sh lvmconf.sh vgimportclone.sh
+LVM_SCRIPTS = lvmdump.sh lvmconf.sh vgimportclone.sh
+DM_SCRIPTS =
ifeq ("@FSADM@", "yes")
- SCRIPTS += fsadm.sh
+ LVM_SCRIPTS += fsadm.sh
endif
ifeq ("@BLKDEACTIVATE@", "yes")
- SCRIPTS += blkdeactivate.sh
+ DM_SCRIPTS += blkdeactivate.sh
endif
OCF_SCRIPTS =
@@ -58,11 +59,12 @@ vpath %.ocf $(srcdir)
$(INSTALL_DIR) $(ocf_scriptdir)
$(INSTALL_SCRIPT) $< $(ocf_scriptdir)/$(basename $(<F))
-install_lvm2: $(SCRIPTS:.sh=_install)
+install_lvm2: $(LVM_SCRIPTS:.sh=_install)
+install_device-mapper: $(DM_SCRIPTS:.sh=_install)
install_ocf: $(OCF_SCRIPTS:.ocf=_install)
-install: install_lvm2 install_ocf
+install: install_lvm2 install_ocf install_device-mapper
# FIXME Customise for other distributions
install_initscripts:
@@ -73,6 +75,9 @@ endif
ifeq ("@BUILD_LVMETAD@", "yes")
$(INSTALL_SCRIPT) lvm2_lvmetad_init_red_hat $(initdir)/lvm2-lvmetad
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+ $(INSTALL_SCRIPT) lvm2_lvmpolld_init_red_hat $(initdir)/lvm2-lvmpolld
+endif
ifneq ("@CLVMD@", "none")
$(INSTALL_SCRIPT) clvmd_init_red_hat $(initdir)/clvmd
endif
@@ -112,6 +117,14 @@ ifeq ("@BUILD_LVMETAD@", "yes")
$(INSTALL_DATA) lvm2_lvmetad_systemd_red_hat.service $(systemd_unit_dir)/lvm2-lvmetad.service
$(INSTALL_DATA) lvm2_pvscan_systemd_red_hat@.service $(systemd_unit_dir)/lvm2-pvscan@.service
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+ $(INSTALL_DATA) lvm2_lvmpolld_systemd_red_hat.socket $(systemd_unit_dir)/lvm2-lvmpolld.socket
+ $(INSTALL_DATA) lvm2_lvmpolld_systemd_red_hat.service $(systemd_unit_dir)/lvm2-lvmpolld.service
+endif
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+ $(INSTALL_DATA) lvm2_lvmlockd_systemd_red_hat.service $(systemd_unit_dir)/lvm2-lvmlockd.service
+ $(INSTALL_DATA) lvm2_lvmlocking_systemd_red_hat.service $(systemd_unit_dir)/lvm2-lvmlocking.service
+endif
ifneq ("@CLVMD@", "none")
$(INSTALL_DATA) lvm2_clvmd_systemd_red_hat.service $(systemd_unit_dir)/lvm2-clvmd.service
$(INSTALL_DATA) lvm2_cluster_activation_systemd_red_hat.service $(systemd_unit_dir)/lvm2-cluster-activation.service
@@ -140,6 +153,10 @@ DISTCLEAN_TARGETS += \
lvm2_lvmetad_init_red_hat \
lvm2_lvmetad_systemd_red_hat.service \
lvm2_lvmetad_systemd_red_hat.socket \
+ lvm2_lvmpolld_systemd_red_hat.service \
+ lvm2_lvmpolld_systemd_red_hat.socket \
+ lvm2_lvmlockd_systemd_red_hat.service \
+ lvm2_lvmlocking_systemd_red_hat.service \
lvm2_monitoring_init_red_hat \
lvm2_monitoring_systemd_red_hat.service \
lvm2_pvscan_systemd_red_hat@.service \
diff --git a/scripts/blk_availability_systemd_red_hat.service.in b/scripts/blk_availability_systemd_red_hat.service.in
index 9c1cb7834..3506738a4 100644
--- a/scripts/blk_availability_systemd_red_hat.service.in
+++ b/scripts/blk_availability_systemd_red_hat.service.in
@@ -1,6 +1,6 @@
[Unit]
Description=Availability of block devices
-After=lvm2-activation.service lvm2-lvmetad.service iscsi.service iscsid.service fcoe.service
+After=lvm2-activation.service lvm2-lvmetad.service iscsi-shutdown.service iscsi.service iscsid.service fcoe.service
DefaultDependencies=no
Conflicts=shutdown.target
diff --git a/scripts/blkdeactivate.sh.in b/scripts/blkdeactivate.sh.in
index f45415434..993f15130 100644
--- a/scripts/blkdeactivate.sh.in
+++ b/scripts/blkdeactivate.sh.in
@@ -1,6 +1,6 @@
#!/bin/bash
#
-# Copyright (C) 2012-2013 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2012-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -18,8 +18,10 @@
#
# Requires:
# bash >= 4.0 (associative array support)
-# lsblk >= 2.22 (lsblk -s support)
-# umount
+# util-linux {
+# lsblk >= 2.22 (lsblk -s support)
+# umount
+# }
# dmsetup >= 1.02.68 (--retry option support)
# lvm >= 2.2.89 (activation/retry_deactivation config support)
#
@@ -212,6 +214,11 @@ deactivate_lvm () {
test -z ${SKIP_VG_LIST["$DM_VG_NAME"]} || return 1
if test $LVM_DO_WHOLE_VG -eq 0; then
+ # Skip LVM device deactivation if LVM tools missing.
+ test $LVM_AVAILABLE -eq 0 && {
+ add_device_to_skip_list
+ return 1
+ }
# Deactivating only the LV specified
deactivate_holders "$DEV_DIR/$DM_VG_NAME/$DM_LV_NAME" || {
add_device_to_skip_list
@@ -227,6 +234,11 @@ deactivate_lvm () {
fi
else
+ # Skip LVM VG deactivation if LVM tools missing.
+ test $LVM_AVAILABLE -eq 0 && {
+ add_vg_to_skip_list
+ return 1
+ }
# Deactivating the whole VG the LV is part of
lv_list=$(eval $LVM vgs --config "$LVM_CONFIG" --noheadings --rows -o lv_name $DM_VG_NAME $ERR)
for lv in $lv_list; do
@@ -383,6 +395,12 @@ set_env() {
else
OUT="1>$DEV_DIR/null"
fi
+
+ if test -f $LVM; then
+ LVM_AVAILABLE=1
+ else
+ LVM_AVAILABLE=0
+ fi
}
while test $# -ne 0; do
diff --git a/scripts/clvmd_init_red_hat.in b/scripts/clvmd_init_red_hat.in
index 8c46c0665..d7f339240 100644
--- a/scripts/clvmd_init_red_hat.in
+++ b/scripts/clvmd_init_red_hat.in
@@ -24,9 +24,9 @@ exec_prefix=@exec_prefix@
sbindir=@sbindir@
lvm_vgchange=${sbindir}/vgchange
-lvm_vgdisplay=${sbindir}/vgdisplay
+lvm_vgs=${sbindir}/vgs
lvm_vgscan=${sbindir}/vgscan
-lvm_lvdisplay=${sbindir}/lvdisplay
+lvm_lvs=${sbindir}/lvs
CLVMDOPTS="-T30"
@@ -42,17 +42,12 @@ CLVMDOPTS="-T30"
LOCK_FILE="/var/lock/subsys/$DAEMON"
-# NOTE: replace this with vgs, once display filter per attr is implemented.
clustered_vgs() {
- ${lvm_vgdisplay} 2>/dev/null | \
- awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}'
+ ${lvm_vgs} --noheadings -o vg_name -S 'vg_clustered=1' 2>/dev/null
}
clustered_active_lvs() {
- for i in $(clustered_vgs); do
- ${lvm_lvdisplay} $i 2>/dev/null | \
- awk 'BEGIN {RS="LV Name"} {if (/[^N^O^T] available/) print $1;}'
- done
+ ${lvm_lvs} --noheadings -o lv_name -S 'vg_clustered=1 && lv_active!=""' 2>/dev/null
}
rh_status() {
diff --git a/scripts/lvm2_activation_generator_systemd_red_hat.c b/scripts/lvm2_activation_generator_systemd_red_hat.c
index 6a003f587..cd71b9ce5 100644
--- a/scripts/lvm2_activation_generator_systemd_red_hat.c
+++ b/scripts/lvm2_activation_generator_systemd_red_hat.c
@@ -24,8 +24,9 @@
#include "lvm2app.h"
#include "configure.h" /* for LVM_PATH */
-#define KMSG_DEV_PATH "/dev/kmsg"
-#define LVM_CONF_USE_LVMETAD "global/use_lvmetad"
+#define KMSG_DEV_PATH "/dev/kmsg"
+#define LVM_CONF_USE_LVMETAD "global/use_lvmetad"
+#define LVM_CONF_USE_LVMPOLLD "global/use_lvmpolld"
#define UNIT_TARGET_LOCAL_FS "local-fs.target"
#define UNIT_TARGET_REMOTE_FS "remote-fs.target"
@@ -63,22 +64,21 @@ static void kmsg(int log_level, const char *format, ...)
return;
/* The n+4: +3 for "<n>" prefix and +1 for '\0' suffix */
- (void) write(kmsg_fd, message, n + 4);
+ if (write(kmsg_fd, message, n + 4)) { /* Ignore result code */; }
}
-static int lvm_uses_lvmetad(void)
+static void lvm_get_use_lvmetad_and_lvmpolld(int *use_lvmetad, int *use_lvmpolld)
{
lvm_t lvm;
- int r;
+ *use_lvmetad = *use_lvmpolld = 0;
if (!(lvm = lvm_init(NULL))) {
kmsg(LOG_ERR, "LVM: Failed to initialize library context for activation generator.\n");
- return 0;
+ return;
}
- r = lvm_config_find_bool(lvm, LVM_CONF_USE_LVMETAD, 0);
+ *use_lvmetad = lvm_config_find_bool(lvm, LVM_CONF_USE_LVMETAD, 0);
+ *use_lvmpolld = lvm_config_find_bool(lvm, LVM_CONF_USE_LVMPOLLD, 0);
lvm_quit(lvm);
-
- return r;
}
static int register_unit_with_target(const char *dir, const char *unit, const char *target)
@@ -107,7 +107,7 @@ out:
return r;
}
-static int generate_unit(const char *dir, int unit)
+static int generate_unit(const char *dir, int unit, int sysinit_needed)
{
FILE *f;
const char *unit_name = unit_names[unit];
@@ -150,8 +150,10 @@ static int generate_unit(const char *dir, int unit)
"[Service]\n", f);
}
- fputs("ExecStart=" LVM_PATH " vgchange -aay --sysinit --ignoreskippedcluster\n"
- "Type=oneshot\n", f);
+ fputs("ExecStart=" LVM_PATH " vgchange -aay --ignoreskippedcluster", f);
+ if (sysinit_needed)
+ fputs (" --sysinit", f);
+ fputs("\nType=oneshot\n", f);
if (fclose(f) < 0) {
kmsg(LOG_ERR, "LVM: Failed to write unit file %s: %m.\n", unit_name);
@@ -168,6 +170,7 @@ static int generate_unit(const char *dir, int unit)
int main(int argc, char *argv[])
{
+ int use_lvmetad, use_lvmpolld, sysinit_needed;
const char *dir;
int r = EXIT_SUCCESS;
mode_t old_mask;
@@ -180,16 +183,20 @@ int main(int argc, char *argv[])
}
/* If lvmetad used, rely on autoactivation instead of direct activation. */
- if (lvm_uses_lvmetad())
+ lvm_get_use_lvmetad_and_lvmpolld(&use_lvmetad, &use_lvmpolld);
+ if (use_lvmetad)
goto out;
dir = argv[1];
/* mark lvm2-activation.*.service as world-accessible */
old_mask = umask(0022);
- if (!generate_unit(dir, UNIT_EARLY) ||
- !generate_unit(dir, UNIT_MAIN) ||
- !generate_unit(dir, UNIT_NET))
+
+ sysinit_needed = !use_lvmpolld;
+
+ if (!generate_unit(dir, UNIT_EARLY, sysinit_needed) ||
+ !generate_unit(dir, UNIT_MAIN, sysinit_needed) ||
+ !generate_unit(dir, UNIT_NET, sysinit_needed))
r = EXIT_FAILURE;
umask(old_mask);
out:
diff --git a/scripts/lvm2_cluster_activation_red_hat.sh.in b/scripts/lvm2_cluster_activation_red_hat.sh.in
index 0d4676c41..abea02682 100644
--- a/scripts/lvm2_cluster_activation_red_hat.sh.in
+++ b/scripts/lvm2_cluster_activation_red_hat.sh.in
@@ -7,16 +7,8 @@ lvm_vgscan=${sbindir}/vgscan
lvm_vgs=${sbindir}/vgs
lvm_lvm=${sbindir}/lvm
-parse_clustered_vgs() {
- while read -r name attrs;
- do
- test "${attrs:5:1}" == 'c' && echo -n "$name "
- done
-}
-
-# NOTE: replace this with vgs, once display filter per attr is implemented.
clustered_vgs() {
- ${lvm_vgs} -o vg_name,vg_attr --noheadings | parse_clustered_vgs
+ ${lvm_vgs} --noheadings -o vg_name -S 'vg_clustered=1' 2>/dev/null
}
activate() {
diff --git a/scripts/lvm2_lvmlockd_systemd_red_hat.service.in b/scripts/lvm2_lvmlockd_systemd_red_hat.service.in
new file mode 100644
index 000000000..17c7dbf91
--- /dev/null
+++ b/scripts/lvm2_lvmlockd_systemd_red_hat.service.in
@@ -0,0 +1,16 @@
+[Unit]
+Description=LVM2 lock daemon
+Documentation=man:lvmlockd(8)
+After=lvm2-lvmetad.service
+
+[Service]
+Type=simple
+NonBlocking=true
+ExecStart=@sbindir@/lvmlockd -f
+Environment=SD_ACTIVATION=1
+PIDFile=@LVMLOCKD_PIDFILE@
+SendSIGKILL=no
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/scripts/lvm2_lvmlocking_systemd_red_hat.service.in b/scripts/lvm2_lvmlocking_systemd_red_hat.service.in
new file mode 100644
index 000000000..bfac578a7
--- /dev/null
+++ b/scripts/lvm2_lvmlocking_systemd_red_hat.service.in
@@ -0,0 +1,24 @@
+[Unit]
+Description=Availability of lockspaces in lvmlockd
+Documentation=man:lvmlockd(8)
+After=lvm2-lvmlockd.service sanlock.service dlm.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+
+# start lockspaces and wait for them to finish starting
+ExecStart=@sbindir@/vgchange --lock-start --lock-opt autowait
+
+# auto activate LVs in the newly started lockd VGs
+ExecStart=@sbindir@/vgchange -aay -S 'locktype=sanlock || locktype=dlm'
+
+# deactivate LVs in lockd VGs
+ExecStop=@sbindir@/vgchange -an -S 'locktype=sanlock || locktype=dlm'
+
+# stop lockspaces and wait for them to finish stopping
+ExecStop=@sbindir@/lvmlockctl --stop-lockspaces --wait 1
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/scripts/lvm2_lvmpolld_init_red_hat.in b/scripts/lvm2_lvmpolld_init_red_hat.in
new file mode 100644
index 000000000..0a03f01c7
--- /dev/null
+++ b/scripts/lvm2_lvmpolld_init_red_hat.in
@@ -0,0 +1,114 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# This file is part of LVM2.
+# It is required for the proper handling of failures of LVM2 mirror
+# devices that were created using the -m option of lvcreate.
+#
+#
+# chkconfig: 12345 02 99
+# description: Starts and stops LVM poll daemon
+#
+# For Red-Hat-based distributions such as Fedora, RHEL, CentOS.
+#
+### BEGIN INIT INFO
+# Provides: lvm2-lvmpolld
+# Required-Start: $local_fs
+# Required-Stop: $local_fs
+# Default-Start: 1 2 3 4 5
+# Default-Stop: 0 6
+# Short-Description: A daemon that is responsible for monitoring in-progress
+# and possibly longer term operations on logical volumes.
+# It helps to reduce the number of spawned processes if same
+# logical volume is requested to get monitored multiple times.
+# Also avoids unsolicited termination due to external factors.
+### END INIT INFO
+
+. /etc/init.d/functions
+
+DAEMON=lvmpolld
+
+exec_prefix=@exec_prefix@
+sbindir=@sbindir@
+
+LOCK_FILE="/var/lock/subsys/$DAEMON"
+PID_FILE="@LVMPOLLD_PIDFILE@"
+
+rh_status() {
+ status -p $PID_FILE $DAEMON
+}
+
+rh_status_q() {
+ rh_status >/dev/null 2>&1
+}
+
+start()
+{
+ ret=0
+ action "Starting LVM poll daemon:" $DAEMON || ret=$?
+ return $ret
+}
+
+
+stop()
+{
+ ret=0
+ action "Signaling LVM poll daemon to exit:" killproc -p $PID_FILE $DAEMON -TERM || ret=$?
+ return $ret
+}
+
+rtrn=1
+
+# See how we were called.
+case "$1" in
+ start)
+ rh_status_q && exit 0
+ start
+ rtrn=$?
+ [ $rtrn = 0 ] && touch $LOCK_FILE
+ ;;
+
+ stop|force-stop)
+ rh_status_q || exit 0
+ stop
+ rtrn=$?
+ [ $rtrn = 0 ] && rm -f $LOCK_FILE
+ ;;
+
+ restart)
+ if stop
+ then
+ start
+ fi
+ rtrn=$?
+ ;;
+
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ if stop
+ then
+ start
+ fi
+ rtrn=$?
+ ;;
+
+ status)
+ rh_status
+ rtrn=$?
+ ;;
+
+ *)
+ echo $"Usage: $0 {start|stop|force-stop|restart|condrestart|try-restart|status}"
+ ;;
+esac
+
+exit $rtrn
diff --git a/scripts/lvm2_lvmpolld_systemd_red_hat.service.in b/scripts/lvm2_lvmpolld_systemd_red_hat.service.in
new file mode 100644
index 000000000..d7ff492ae
--- /dev/null
+++ b/scripts/lvm2_lvmpolld_systemd_red_hat.service.in
@@ -0,0 +1,17 @@
+[Unit]
+Description=LVM2 poll daemon
+Documentation=man:lvmpolld(8)
+Requires=lvm2-lvmpolld.socket
+After=lvm2-lvmpolld.socket
+DefaultDependencies=no
+Conflicts=shutdown.target
+
+[Service]
+Type=simple
+NonBlocking=true
+ExecStart=@sbindir@/lvmpolld -t 60 -f
+Environment=SD_ACTIVATION=1
+PIDFile=@LVMPOLLD_PIDFILE@
+
+[Install]
+WantedBy=sysinit.target
diff --git a/scripts/lvm2_lvmpolld_systemd_red_hat.socket.in b/scripts/lvm2_lvmpolld_systemd_red_hat.socket.in
new file mode 100644
index 000000000..ca9f1237f
--- /dev/null
+++ b/scripts/lvm2_lvmpolld_systemd_red_hat.socket.in
@@ -0,0 +1,12 @@
+[Unit]
+Description=LVM2 poll daemon socket
+Documentation=man:lvmpolld(8)
+DefaultDependencies=no
+
+[Socket]
+ListenStream=@DEFAULT_RUN_DIR@/lvmpolld.socket
+SocketMode=0600
+RemoveOnStop=true
+
+[Install]
+WantedBy=sysinit.target
diff --git a/scripts/lvm2_monitoring_init_red_hat.in b/scripts/lvm2_monitoring_init_red_hat.in
index 44de07f1b..7c5e80597 100644
--- a/scripts/lvm2_monitoring_init_red_hat.in
+++ b/scripts/lvm2_monitoring_init_red_hat.in
@@ -32,18 +32,28 @@
. /etc/init.d/functions
DAEMON=lvm2-monitor
+DMEVENTD_DAEMON=dmeventd
exec_prefix=@exec_prefix@
sbindir=@sbindir@
VGCHANGE=${sbindir}/vgchange
VGS=${sbindir}/vgs
+LVS=${sbindir}/lvs
LOCK_FILE="/var/lock/subsys/$DAEMON"
+PID_FILE="@DMEVENTD_PIDFILE@"
WARN=1
export LVM_SUPPRESS_LOCKING_FAILURE_MESSAGES=1
+rh_status() {
+ status -p $PID_FILE $DMEVENTD_DAEMON
+}
+
+rh_status_q() {
+ rh_status >/dev/null 2>&1
+}
start()
{
ret=0
@@ -79,12 +89,14 @@ rtrn=1
# See how we were called.
case "$1" in
start)
+ rh_status_q && exit 0
start
rtrn=$?
[ $rtrn = 0 ] && touch $LOCK_FILE
;;
force-stop)
+ rh_status_q || exit 0
WARN=0
stop
rtrn=$?
@@ -92,6 +104,7 @@ case "$1" in
;;
stop)
+ rh_status_q || exit 0
test "$runlevel" = "0" && WARN=0
test "$runlevel" = "6" && WARN=0
stop
@@ -109,7 +122,9 @@ case "$1" in
;;
status)
- # TODO anyone with an idea how to dump monitored volumes?
+ rh_status
+ rtrn=$?
+ [ $rtrn = 0 ] && $LVS -S 'seg_monitor=monitored' -o lv_full_name,seg_monitor
;;
*)
diff --git a/scripts/lvm2_monitoring_systemd_red_hat.service.in b/scripts/lvm2_monitoring_systemd_red_hat.service.in
index bb52f0d56..6aacdc270 100644
--- a/scripts/lvm2_monitoring_systemd_red_hat.service.in
+++ b/scripts/lvm2_monitoring_systemd_red_hat.service.in
@@ -2,7 +2,7 @@
Description=Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling
Documentation=man:dmeventd(8) man:lvcreate(8) man:lvchange(8) man:vgchange(8)
Requires=dm-event.socket lvm2-lvmetad.socket
-After=dm-event.socket lvm2-lvmetad.socket lvm2-activation.service lvm2-lvmetad.service
+After=dm-event.socket dm-event.service lvm2-lvmetad.socket lvm2-activation.service lvm2-lvmetad.service
Before=local-fs.target
DefaultDependencies=no
Conflicts=shutdown.target
diff --git a/scripts/lvm2_pvscan_systemd_red_hat@.service.in b/scripts/lvm2_pvscan_systemd_red_hat@.service.in
index 7d3b291a2..03651d576 100644
--- a/scripts/lvm2_pvscan_systemd_red_hat@.service.in
+++ b/scripts/lvm2_pvscan_systemd_red_hat@.service.in
@@ -4,7 +4,7 @@ Documentation=man:pvscan(8)
DefaultDependencies=no
BindsTo=dev-block-%i.device
Requires=lvm2-lvmetad.socket
-After=lvm2-lvmetad.socket
+After=lvm2-lvmetad.socket lvm2-lvmetad.service
Before=shutdown.target
Conflicts=shutdown.target
diff --git a/scripts/lvmconf.sh b/scripts/lvmconf.sh
index bfe518e82..c3c170036 100644
--- a/scripts/lvmconf.sh
+++ b/scripts/lvmconf.sh
@@ -16,22 +16,48 @@
# Edit an lvm.conf file to adjust various properties
#
-DEFAULT_USE_LVMETAD=0
+# cluster with clvmd and/or locking lib?
+HANDLE_CLUSTER=0
+
+# cluster without clvmd?
+HANDLE_HALVM=0
+
+# also enable services appropriately (lvmetad, clvmd)?
+HANDLE_SERVICES=0
+
+# also enable cmirrord service in addition?
+HANDLE_MIRROR_SERVICE=0
+
+# also start/start services in addition to enabling/disabling them?
+START_STOP_SERVICES=0
function usage
{
- echo "usage: $0 <command>"
+ echo "Usage: $0 <command>"
echo ""
echo "Commands:"
echo "Enable clvm: --enable-cluster [--lockinglibdir <dir>] [--lockinglib <lib>]"
echo "Disable clvm: --disable-cluster"
+ echo "Enable halvm: --enable-halvm"
+ echo "Disable halvm: --disable-halvm"
echo "Set locking library: --lockinglibdir <dir> [--lockinglib <lib>]"
echo ""
echo "Global options:"
echo "Config file location: --file <configfile>"
+ echo "Set services: --services [--mirrorservice] [--startstopservices]"
echo ""
+ echo "Use the separate command 'lvmconfig' to display configuration information"
}
+function set_default_use_lvmetad_var
+{
+ eval $(lvm dumpconfig --type default global/use_lvmetad 2>/dev/null)
+ if [ "$?" != "0" ]; then
+ USE_LVMETAD=0
+ else
+ USE_LVMETAD=$use_lvmetad
+ fi
+}
function parse_args
{
@@ -40,13 +66,27 @@ function parse_args
--enable-cluster)
LOCKING_TYPE=3
USE_LVMETAD=0
+ HANDLE_CLUSTER=1
shift
;;
--disable-cluster)
LOCKING_TYPE=1
- USE_LVMETAD=$DEFAULT_USE_LVMETAD
+ set_default_use_lvmetad_var
+ HANDLE_CLUSTER=1
shift
;;
+ --enable-halvm)
+ LOCKING_TYPE=1
+ USE_LVMETAD=0
+ HANDLE_HALVM=1
+ shift
+ ;;
+ --disable-halvm)
+ LOCKING_TYPE=1
+ set_default_use_lvmetad_var
+ HANDLE_HALVM=1
+ shift
+ ;;
--lockinglibdir)
if [ -n "$2" ]; then
LOCKINGLIBDIR=$2
@@ -55,6 +95,7 @@ function parse_args
usage
exit 1
fi
+ HANDLE_CLUSTER=1
;;
--lockinglib)
if [ -n "$2" ]; then
@@ -64,6 +105,7 @@ function parse_args
usage
exit 1
fi
+ HANDLE_CLUSTER=1
;;
--file)
if [ -n "$2" ]; then
@@ -74,6 +116,18 @@ function parse_args
exit 1
fi
;;
+ --services)
+ HANDLE_SERVICES=1
+ shift
+ ;;
+ --mirrorservice)
+ HANDLE_MIRROR_SERVICE=1
+ shift
+ ;;
+ --startstopservices)
+ START_STOP_SERVICES=1
+ shift
+ ;;
*)
usage
exit 1
@@ -91,6 +145,22 @@ function validate_args
exit 10
fi
+ if [ "$HANDLE_CLUSTER" = "1" -a "$HANDLE_HALVM" = "1" ]; then
+ echo "Either HA LVM or cluster method may be used at one time"
+ exit 18
+ fi
+
+ if [ "$HANDLE_SERVICES" = "0" ]; then
+ if [ "$HANDLE_MIRROR_SERVICE" = "1" ]; then
+ echo "--mirrorservice may be used only with --services"
+ exit 19
+ fi
+ if [ "$START_STOP_SERVICES" = "1" ]; then
+ echo "--startstopservices may be used only with --services"
+ exit 19
+ fi
+ fi
+
if [ -z "$LOCKING_TYPE" ] && [ -z "$LOCKINGLIBDIR" ]; then
usage
exit 1
@@ -277,3 +347,104 @@ if [ $? != 0 ]
fi
rm -f $SCRIPTFILE $TMPFILE
+
+function set_service {
+ local type="$1"
+ local action="$2"
+ shift 2
+
+ if [ "$type" = "systemd" ]; then
+ if [ "$action" = "activate" ]; then
+ for i in $@; do
+ eval $($SYSTEMCTL_BIN show $i -p LoadState)
+ test "$LoadState" = "loaded" || continue
+ $SYSTEMCTL_BIN enable $i
+ if [ "$START_STOP_SERVICES" = "1" ]; then
+ $SYSTEMCTL_BIN start $i
+ fi
+ done
+ elif [ "$action" = "deactivate" ]; then
+ for i in $@; do
+ eval $($SYSTEMCTL_BIN show $i -p LoadState)
+ test "$LoadState" = "loaded" || continue
+ $SYSTEMCTL_BIN disable $i
+ if [ "$START_STOP_SERVICES" = "1" ]; then
+ $SYSTEMCTL_BIN stop $i
+ fi
+ done
+ fi
+ elif [ "$type" = "sysv" ]; then
+ if [ "$action" = "activate" ]; then
+ for i in $@; do
+ $CHKCONFIG_BIN --list $i > /dev/null || continue
+ $CHKCONFIG_BIN $i on
+ if [ "$START_STOP_SERVICES" = "1" ]; then
+ $SERVICE_BIN $i start
+ fi
+ done
+ elif [ "$action" = "deactivate" ]; then
+ for i in $@; do
+ $CHKCONFIG_BIN --list $i > /dev/null || continue
+ if [ "$START_STOP_SERVICES" = "1" ]; then
+ $SERVICE_BIN $i stop
+ fi
+ $CHKCONFIG_BIN $i off
+ done
+ fi
+ fi
+}
+
+# Start/stop and enable/disable services if needed.
+
+if [ "$HANDLE_SERVICES" == "1" ]; then
+
+ SYSTEMCTL_BIN=$(which systemctl 2>/dev/null)
+ CHKCONFIG_BIN=$(which chkconfig 2>/dev/null)
+ SERVICE_BIN=$(which service 2>/dev/null)
+
+ # Systemd services
+ if [ -n "$SYSTEMCTL_BIN" ]; then
+ if [ "$USE_LVMETAD" = "0" ]; then
+ set_service systemd deactivate lvm2-lvmetad.service lvm2-lvmetad.socket
+ else
+ set_service systemd activate lvm2-lvmetad.socket
+ fi
+
+ if [ "$LOCKING_TYPE" = "3" ]; then
+ set_service systemd activate lvm2-cluster-activation.service
+ if [ "$HANDLE_MIRROR_SERVICE" = "1" ]; then
+ set_service activate lvm2-cmirrord.service
+ fi
+ else
+ set_service systemd deactivate lvm2-cluster-activation.service
+ if [ "$HANDLE_MIRROR_SERVICE" = "1" ]; then
+ set_service systemd deactivate lvm2-cmirrord.service
+ fi
+ fi
+
+ # System V init scripts
+ elif [ -n "$SERVICE_BIN" -a -n "$CHKCONFIG_BIN" ]; then
+ if [ "$USE_LVMETAD" = "0" ]; then
+ set_service sysv deactivate lvm2-lvmetad
+ else
+ set_service sysv activate lvm2-lvmetad
+ fi
+
+ if [ "$LOCKING_TYPE" = "3" ]; then
+ set_service sysv activate clvmd
+ if [ "$HANDLE_MIRROR_SERVICE" = "1" ]; then
+ set_service sysv activate cmirrord
+ fi
+ else
+ set_service sysv deactivate clvmd
+ if [ "$HANDLE_MIRROR_SERVICE" = "1" ]; then
+ set_service sysv deactivate cmirrord
+ fi
+ fi
+
+ # None of the service tools found, error out
+ else
+ echo "Missing tools to handle services"
+ exit 20
+ fi
+fi
diff --git a/scripts/lvmdump.sh b/scripts/lvmdump.sh
index 9229912ee..f88f31fcd 100755
--- a/scripts/lvmdump.sh
+++ b/scripts/lvmdump.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# We use some bash-isms (getopts?)
-# Copyright (C) 2007-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2007-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -41,6 +41,7 @@ NC=nc
LVM=${LVM_BINARY-lvm}
DMSETUP=${DMSETUP_BINARY-dmsetup}
LVMETAD_SOCKET=${LVM_LVMETAD_SOCKET-/var/run/lvm/lvmetad.socket}
+LVMPOLLD_SOCKET=${LVM_LVMPOLLD_SOCKET-/var/run/lvm/lvmpolld.socket}
die() {
code=$1; shift
@@ -59,6 +60,7 @@ function usage {
echo " -c if running clvmd, gather cluster data as well"
echo " -d <directory> dump into a directory instead of tarball"
echo " -l gather lvmetad state if running"
+ echo " -p gather lvmpolld state if running"
echo " -m gather LVM metadata from the PVs"
echo " -s gather system info and context"
echo " -u gather udev info and context"
@@ -72,13 +74,14 @@ clustered=0
metadata=0
sysreport=0
udev=0
-while getopts :acd:hlmus opt; do
+while getopts :acd:hlpmus opt; do
case $opt in
a) advanced=1 ;;
c) clustered=1 ;;
d) userdir=$OPTARG ;;
h) usage ;;
l) lvmetad=1 ;;
+ p) lvmpolld=1 ;;
m) metadata=1 ;;
s) sysreport=1 ;;
u) udev=1 ;;
@@ -255,6 +258,7 @@ if (( $sysreport )); then
log "$SYSTEMCTL status -l --no-pager -n $log_lines -o short-precise dm-event.socket dm-event.service \
lvm2-monitor.service \
lvm2-lvmetad.socket lvm2-lvmetad.service \
+ lvm2-lvmpolld.socket lvm2-lvmpolld.service \
lvm2-cluster-activation.service \
lvm2-clvmd.service \
lvm2-cmirrord.service \
@@ -296,6 +300,22 @@ if (( $lvmetad )); then
} > "$dir/lvmetad.txt"
fi
+if (( $lvmpolld )); then
+ (echo 'request="dump"'; echo '##') | {
+ if type -p $SOCAT >& /dev/null; then
+ echo "$SOCAT unix-connect:$LVMPOLLD_SOCKET -" >> "$log"
+ $SOCAT "unix-connect:$LVMPOLLD_SOCKET" - 2>> "$log"
+ elif echo | $NC -U "$LVMPOLLD_SOCKET"; then
+ echo "$NC -U $LVMPOLLD_SOCKET" >> "$log"
+ $NC -U "$LVMPOLLD_SOCKET" 2>> "$log"
+ else
+ myecho "WARNING: Neither socat nor nc -U seems to be available." 1>&2
+ echo "# DUMP FAILED"
+ return 1
+ fi
+ } > "$dir/lvmpolld.txt"
+fi
+
if test -z "$userdir"; then
lvm_dump="$dirbase.tgz"
myecho "Creating report tarball in $HOME/$lvm_dump..."
diff --git a/scripts/vgimportclone.sh b/scripts/vgimportclone.sh
index 719cd8dbb..388d14c87 100755
--- a/scripts/vgimportclone.sh
+++ b/scripts/vgimportclone.sh
@@ -204,11 +204,6 @@ for ARG
do
if [ -b "$ARG" ]
then
- PVS_OUT=`"${LVM}" pvs ${LVM_OPTS} --noheadings -o vg_name "$ARG"`
- checkvalue $? "$ARG could not be verified to be a PV without errors."
- PV_VGNAME=$(echo $PVS_OUT | $GREP -v '[[:space:]]+$')
- [ -z "$PV_VGNAME" ] && die 3 "$ARG is not in a VG."
-
ln -s "$ARG" ${TMP_LVM_SYSTEM_DIR}/vgimport${DEVNO}
DISKS="${DISKS} ${TMP_LVM_SYSTEM_DIR}/vgimport${DEVNO}"
DEVNO=$((${DEVNO}+1))
@@ -369,6 +364,7 @@ then
if [ "$use_lvmetad" = "1" ]
then
echo "Notifying lvmetad about changes since it was disabled temporarily."
+ echo "(This resolves any WARNING message about restarting lvmetad that appears above.)"
LVM_OPTS="${LVM_OPTS} --cache"
fi
diff --git a/spec/build.inc b/spec/build.inc
index 95c9b8357..94d5e06d3 100644
--- a/spec/build.inc
+++ b/spec/build.inc
@@ -4,6 +4,13 @@
%enableif %{enable_profiling} profiling
%global enable_lvmetad %(if echo %{services} | grep -q lvmetad; then echo 1; else echo 0; fi)
%enableif %{enable_lvmetad} lvmetad
+%global enable_lvmpolld %(if echo %{services} | grep -q lvmpolld; then echo 1; else echo 0; fi)
+%enableif %{enable_lvmpolld} lvmpolld
+%global enable_lvmlockd %(if echo %{services} | grep -q lvmlockd; then echo 1; else echo 0; fi)
+%if %{enable_lvmlockd}
+%enableif %{enable_lockd_dlm} lockd-dlm
+%enableif %{enable_lockd_sanlock} lockd-sanlock
+%endif
%build
%configure \
@@ -24,6 +31,7 @@
--enable-applib \
--enable-cmdlib \
--enable-dmeventd \
+ --enable-write_install \
%{configure_flags}
make %{?_smp_mflags}
@@ -32,10 +40,14 @@ make %{?_smp_mflags}
%install
make install DESTDIR=$RPM_BUILD_ROOT
make install_system_dirs DESTDIR=$RPM_BUILD_ROOT
-make install_initscripts DESTDIR=$RPM_BUILD_ROOT
%if %{enable_systemd}
make install_systemd_units DESTDIR=$RPM_BUILD_ROOT
make install_tmpfiles_configuration DESTDIR=$RPM_BUILD_ROOT
+%else
+make install_initscripts DESTDIR=$RPM_BUILD_ROOT
+%endif
+%if %{enable_testsuite}
+make -C test install DESTDIR=$RPM_BUILD_ROOT
%endif
# when building an src.rpm from freestanding specfiles
diff --git a/spec/packages.inc b/spec/packages.inc
index f0b7cfdd5..fc683350b 100644
--- a/spec/packages.inc
+++ b/spec/packages.inc
@@ -77,17 +77,22 @@ fi
%{_sbindir}/vgs
%{_sbindir}/vgscan
%{_sbindir}/vgsplit
+%{_sbindir}/lvmconfig
%{_sbindir}/lvmconf
%{_sbindir}/blkdeactivate
%if %{have_service lvmetad}
%{_sbindir}/lvmetad
%endif
+%if %{have_service lvmpolld}
+ %{_sbindir}/lvmpolld
+%endif
%if %{have_with cache}
%{_mandir}/man7/lvmcache.7.gz
%endif
%if %{have_with thin}
%{_mandir}/man7/lvmthin.7.gz
%endif
+%{_mandir}/man7/lvmsystemid.7.gz
%{_mandir}/man5/lvm.conf.5.gz
%{_mandir}/man8/fsadm.8.gz
%{_mandir}/man8/lvchange.8.gz
@@ -138,6 +143,8 @@ fi
%{_mandir}/man8/vgsplit.8.gz
%{_mandir}/man8/blkdeactivate.8.gz
%{_mandir}/man8/lvm-dumpconfig.8.gz
+%{_mandir}/man8/lvm-config.8.gz
+%{_mandir}/man8/lvmconfig.8.gz
%if %{enable_udev}
%{_udevdir}/11-dm-lvm.rules
%if %{have_service lvmetad}
@@ -145,17 +152,23 @@ fi
%{_udevdir}/69-dm-lvm-metad.rules
%endif
%endif
+%if %{have_service lvmpolld}
+ %{_mandir}/man8/lvmpolld.8.gz
+ %{_mandir}/man8/lvm-lvpoll.8.gz
+%endif
%dir %{_sysconfdir}/lvm
%ghost %{_sysconfdir}/lvm/cache/.cache
%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/lvm.conf
+%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/lvmlocal.conf
%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/command_profile_template.profile
%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/metadata_profile_template.profile
%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/thin-generic.profile
%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/thin-performance.profile
+%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/cache-mq.profile
+%config(noreplace) %verify(not md5 mtime size) %{_sysconfdir}/lvm/profile/cache-smq.profile
%dir %{_sysconfdir}/lvm/backup
%dir %{_sysconfdir}/lvm/cache
%dir %{_sysconfdir}/lvm/archive
-%dir %{_default_locking_dir}
%dir %{_default_run_dir}
%if %{enable_systemd}
%config(noreplace) %{_prefix}/lib/tmpfiles.d/%{name}.conf
@@ -166,12 +179,19 @@ fi
%{_unitdir}/lvm2-pvscan@.service
%{_unitdir}/blk-availability.service
%endif
+ %if %{have_service lvmpolld}
+ %{_unitdir}/lvm2-lvmpolld.service
+ %{_unitdir}/lvm2-lvmpolld.socket
+ %endif
%else
%{_sysconfdir}/rc.d/init.d/lvm2-monitor
%{_sysconfdir}/rc.d/init.d/blk-availability
%if %{have_service lvmetad}
%{_sysconfdir}/rc.d/init.d/lvm2-lvmetad
%endif
+ %if %{have_service lvmpolld}
+ %{_sysconfdir}/rc.d/init.d/lvm2-lvmpolld
+ %endif
%endif
##############################################################################
@@ -229,6 +249,46 @@ This package contains shared lvm2 libraries for applications.
%{_libdir}/libdevmapper-event-lvm2snapshot.so
%{_libdir}/libdevmapper-event-lvm2raid.so
+
+##############################################################################
+# LVM locking daemon
+##############################################################################
+%if %{have_service lvmlockd}
+%package lockd
+Summary: LVM locking daemon
+Group: System Environment/Base
+Requires: lvm2 = %{version}-%{release}
+%if %{enable_lockd_dlm}
+Requires: dlm-lib >= %{dlm_version}
+%endif
+%if %{enable_lockd_sanlock}
+Requires: sanlock-lib >= %{sanlock_version}
+%endif
+Requires(post): systemd-units
+Requires(preun): systemd-units
+Requires(postun): systemd-units
+
+%description lockd
+LVM commands use lvmlockd to coordinate access to shared storage.
+
+%post lockd
+%systemd_post lvm2-lvmlockd.service lvm2-lvmlocking.service
+
+%preun lockd
+%systemd_preun lvm2-lvmlockd.service lvm2-lvmlocking.service
+
+%postun lockd
+%systemd_postun lvm2-lvmlockd.service lvm2-lvmlocking.service
+
+%files lockd
+%{_sbindir}/lvmlockd
+%{_sbindir}/lvmlockctl
+%{_mandir}/man8/lvmlockd.8.gz
+%dir %{_default_locking_dir}
+%{_unitdir}/lvm2-lvmlockd.service
+%{_unitdir}/lvm2-lvmlocking.service
+%endif
+
##############################################################################
# Cluster subpackage
##############################################################################
@@ -267,11 +327,12 @@ fi
%defattr(-,root,root,-)
%attr(755,root,root) /usr/sbin/clvmd
%{_mandir}/man8/clvmd.8.gz
-%{_sysconfdir}/rc.d/init.d/clvmd
%if %{enable_systemd}
%{_unitdir}/lvm2-clvmd.service
%{_unitdir}/lvm2-cluster-activation.service
%{_unitdir}/../lvm2-cluster-activation
+%else
+ %{_sysconfdir}/rc.d/init.d/clvmd
%endif
%endif
@@ -304,33 +365,10 @@ fi
%defattr(-,root,root,-)
%attr(755,root,root) /usr/sbin/cmirrord
%{_mandir}/man8/cmirrord.8.gz
-%{_sysconfdir}/rc.d/init.d/cmirrord
%if %{enable_systemd}
%{_unitdir}/lvm2-cmirrord.service
-%endif
-
-%endif
-
-##############################################################################
-# Legacy SysV init subpackage
-##############################################################################
-%if %{enable_systemd}
-
-%package sysvinit
-Summary: SysV style init script for LVM2.
-Group: System Environment/Base
-Requires: %{name} = %{version}-%{release}
-Requires: initscripts
-
-%description sysvinit
-SysV style init script for LVM2. It needs to be installed only if systemd
-is not used as the system init process.
-
-%files sysvinit
-%{_sysconfdir}/rc.d/init.d/lvm2-monitor
-%{_sysconfdir}/rc.d/init.d/blk-availability
-%if %{have_service lvmetad}
-%{_sysconfdir}/rc.d/init.d/lvm2-lvmetad
+%else
+ %{_sysconfdir}/rc.d/init.d/cmirrord
%endif
%endif
@@ -362,7 +400,9 @@ for the kernel device-mapper.
%defattr(-,root,root,-)
%doc COPYING COPYING.LIB WHATS_NEW_DM VERSION_DM README INSTALL
%attr(755,root,root) %{_sbindir}/dmsetup
+%{_sbindir}/dmstats
%{_mandir}/man8/dmsetup.8.gz
+%{_mandir}/man8/dmstats.8.gz
%if %{enable_udev}
%doc udev/12-dm-permissions.rules
%dir %{_udevbasedir}
@@ -494,3 +534,18 @@ the device-mapper event library.
%{_includedir}/libdevmapper-event.h
%{_libdir}/pkgconfig/devmapper-event.pc
+%if %{enable_testsuite}
+%package testsuite
+Summary: LVM2 Testsuite
+License: LGPLv2
+Group: Development
+
+%description testsuite
+An extensive functional testsuite for LVM2.
+
+%files testsuite
+%defattr(-,root,root,-)
+%{_datadir}/lvm2-testsuite/
+%{_libexecdir}/lvm2-testsuite/
+%{_bindir}/lvm2-testsuite
+%endif
diff --git a/spec/source.inc b/spec/source.inc
index 6ef479130..2c17dbd74 100644
--- a/spec/source.inc
+++ b/spec/source.inc
@@ -4,9 +4,13 @@
# Defaults (rawhide)...
%global enable_profiling 0
+%global enable_testsuite 0
%global enable_udev 1
%global enable_systemd 1
%global enable_cmirror 1
+#%global enable_lvmlockd 0
+#%global enable_lockd_dlm 0
+#%global enable_lockd_sanlock 0
%global buildreq_cluster corosync-devel >= 1.99.9-1, dlm-devel >= 3.99.1-1
%global req_cluster corosync >= 1.99.9-1, dlm >= 3.99.2-1
@@ -24,6 +28,27 @@
%service lvmetad 1
+%service lvmpolld 1
+
+%if %{fedora} >= 22 || %{rhel} >= 7
+ %service lvmlockd 1
+ %define sanlock_version 3.2.4-1
+ %define enable_lockd_dlm 1
+ %define enable_lockd_sanlock 1
+ %if %{rhel}
+ %ifarch i686 x86_64 s390x
+ %global buildreq_lockd_dlm dlm-devel >= %{dlm_version}
+ %else
+ %define enable_lockd_dlm 0
+ %endif
+ %ifarch x86_64 ppc64le ppc64 aarch64
+ %global buildreq_lockd_sanlock sanlock-devel >= %{sanlock_version}
+ %else
+ %define enable_lockd_sanlock 0
+ %endif
+ %endif
+%endif
+
##############################################################
%if %{fedora} == 16 || %{rhel} == 6
@@ -71,12 +96,12 @@
# Do not reset Release to 1 unless both lvm2 and device-mapper
# versions are increased together.
-%define device_mapper_version 1.02.75
+%define device_mapper_version 1.02.97
Summary: Userland logical volume management tools
Name: lvm2
-Version: 2.02.96
-Release: 4%{?dist}
+Version: 2.02.120
+Release: 1%{?dist}
License: GPLv2
Group: System Environment/Base
URL: http://sources.redhat.com/lvm2
@@ -102,6 +127,8 @@ BuildRequires: pkgconfig
%maybe BuildRequires: %{?buildreq_udev}
%maybe BuildRequires: %{?buildreq_cluster}
+%maybe BuildRequires: %{?buildreq_lockd_dlm}
+%maybe BuildRequires: %{?buildreq_lockd_sanlock}
%description
LVM2 includes all of the support for handling read/write operations on
diff --git a/test/Makefile.in b/test/Makefile.in
index 1aec88b4b..211950954 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2013 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2007-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -23,60 +23,67 @@ abs_srcdir = @abs_srcdir@
abs_builddir = @abs_builddir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
+datarootdir = @datarootdir@
LVM_TEST_RESULTS ?= results
-export LVM_TEST_THIN_CHECK_CMD?=@THIN_CHECK_CMD@
-export LVM_TEST_THIN_DUMP_CMD?=@THIN_DUMP_CMD@
-export LVM_TEST_THIN_REPAIR_CMD?=@THIN_REPAIR_CMD@
-export LVM_TEST_THIN_RESTORE_CMD?=@THIN_RESTORE_CMD@
-
-export LVM_TEST_CACHE_CHECK_CMD?=@CACHE_CHECK_CMD@
-export LVM_TEST_CACHE_DUMP_CMD?=@CACHE_DUMP_CMD@
-export LVM_TEST_CACHE_REPAIR_CMD?=@CACHE_REPAIR_CMD@
-export LVM_TEST_CACHE_RESTORE_CMD?=@CACHE_RESTORE_CMD@
SUBDIRS = api unit
SOURCES = lib/not.c lib/harness.c
+CXXSOURCES = lib/runner.cpp
include $(top_builddir)/make.tmpl
T ?= .
S ?= @ # never match anything by default
VERBOSE ?= 0
-ALL = $(shell find $(srcdir) \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) | sort)
+ALL := $(shell find -L $(srcdir) \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) | sort)
comma = ,
-RUN = $(shell find $(srcdir) -regextype posix-egrep \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) -and -regex "$(srcdir)/.*($(subst $(comma),|,$(T))).*" -and -not -regex "$(srcdir)/.*($(subst $(comma),|,$(S))).*" | sort)
+RUN := $(shell find -L $(srcdir) -regextype posix-egrep \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) -and -regex "$(srcdir)/.*($(subst $(comma),|,$(T))).*" -and -not -regex "$(srcdir)/.*($(subst $(comma),|,$(S))).*" | sort)
RUN_BASE = $(subst $(srcdir)/,,$(RUN))
ifeq ("@BUILD_LVMETAD@", "yes")
LVMETAD_RUN_BASE = $(RUN_BASE)
+LVMETAD_NDEV_FLAVOUR = ,ndev-lvmetad
+LVMETAD_UDEV_FLAVOUR = ,udev-lvmetad
endif
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+LVMPOLLD_RUN_BASE = $(RUN_BASE)
+LVMPOLLD_NDEV_FLAVOUR = ,ndev-lvmpolld,ndev-cluster-lvmpolld,ndev-lvmetad-lvmpolld
+LVMPOLLD_UDEV_FLAVOUR = ,udev-lvmpolld,udev-cluster-lvmpolld,udev-lvmetad-lvmpolld
+endif
# Shell quote;
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+SHELL_PATH_SQ := $(subst ','\'',$(SHELL_PATH))
ifeq ("@UDEV_SYNC@", "yes")
dm_udev_synchronisation = 1
endif
-all: check
+all: .tests-stamp
help:
@echo -e "\nAvailable targets:"
@echo " all Default target, run check."
@echo " check Run all tests."
+ @echo " check_system Run all tests using udev."
@echo " check_local Run tests without clvmd and lvmetad."
@echo " check_cluster Run tests with cluster daemon."
@echo " check_lvmetad Run tests with lvmetad daemon."
+ @echo " check_lvmpolld Run tests with lvmpolld daemon."
+ @echo " check_lvmlockd_sanlock Run tests with lvmlockd and sanlock."
+ @echo " check_lvmlockd_dlm Run tests with lvmlockd and dlm."
@echo " clean Clean dir."
@echo " help Display callable targets."
@echo -e "\nSupported variables:"
+ @echo " LVM_TEST_BACKING_DEVICE Set device used for testing (see also LVM_TEST_DIR)."
@echo " LVM_TEST_CAN_CLOBBER_DMESG Allow to clobber dmesg buffer without /dev/kmsg. (1)"
@echo " LVM_TEST_DEVDIR Set to '/dev' to run on real /dev."
@echo " LVM_TEST_DIR Where to create test files [$(LVM_TEST_DIR)]."
@echo " LVM_TEST_LOCKING Normal (1), Cluster (3)."
@echo " LVM_TEST_LVMETAD Start lvmetad (1)."
+ @echo " LVM_TEST_LVMETAD_DEBUG_OPTS Allows to override debug opts [-l all]."
+ @echo " LVM_TEST_LVMPOLLD Start lvmpolld"
@echo " LVM_TEST_NODEBUG Do not debug lvm commands."
@echo " LVM_TEST_PARALLEL May skip agresive wipe of LVMTEST resources."
@echo " LVM_TEST_RESULTS Where to create result files [results]."
@@ -89,57 +96,131 @@ help:
@echo " LVM_TEST_CACHE_REPAIR_CMD Command for cache_repair [$(LVM_TEST_CACHE_REPAIR_CMD)]."
@echo " LVM_TEST_CACHE_RESTORE_CMD Command for cache_restore [$(LVM_TEST_CACHE_RESTORE_CMD)]."
@echo " LVM_TEST_UNLIMITED Set to get unlimited test log (>32MB)"
- @echo " LVM_VALGRIND Enable valgrind testing (1,2,3) execs $$"VALGRIND
+ @echo " LVM_VALGRIND Enable valgrind testing, execs $$"VALGRIND.
@echo " LVM_VALGRIND_CLVMD Enable valgrind testing of clvmd (1)."
@echo " LVM_VALGRIND_DMEVENTD Enable valgrind testing of dmeventd (1)."
@echo " LVM_VALGRIND_LVMETAD Enable valgrind testing of lvmetad (1)."
+ @echo " LVM_STRACE Enable strace logging."
+ @echo " LVM_DEBUG_LEVEL Sets debuging level for valgrind/strace (use > 0)."
@echo " LVM_VERIFY_UDEV Default verify state for lvm.conf."
@echo " S Skip given test (regex)."
@echo " T Run given test (regex)."
@echo " VERBOSE Verbose output (1), timing (2)."
check: .tests-stamp
- VERBOSE=$(VERBOSE) \
- cluster_LVM_TEST_LOCKING=3 \
- lvmetad_LVM_TEST_LVMETAD=1 \
- ./lib/harness $(patsubst %,normal:%,$(RUN_BASE)) \
- $(patsubst %,cluster:%,$(RUN_BASE)) \
- $(patsubst %,lvmetad:%,$(LVMETAD_RUN_BASE))
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir $(LVM_TEST_RESULTS) \
+ --flavours ndev-vanilla,ndev-cluster$(LVMETAD_NDEV_FLAVOUR)$(LVMPOLLD_NDEV_FLAVOUR) --only $(T) --skip $(S)
check_system: .tests-stamp
- VERBOSE=$(VERBOSE) \
- scluster_LVM_TEST_LOCKING=3 \
- slvmetad_LVM_TEST_LVMETAD=1 \
- snormal_LVM_TEST_DEVDIR=/dev \
- scluster_LVM_TEST_DEVDIR=/dev \
- slvmetad_LVM_TEST_DEVDIR=/dev \
- ./lib/harness $(patsubst %,snormal:%,$(RUN_BASE)) \
- $(patsubst %,scluster:%,$(RUN_BASE)) \
- $(patsubst %,slvmetad:%,$(LVMETAD_RUN_BASE))
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir $(LVM_TEST_RESULTS) \
+ --flavours udev-vanilla,udev-cluster$(LVMETAD_UDEV_FLAVOUR)$(LVMPOLLD_UDEV_FLAVOUR) --only $(T) --skip $(S)
check_cluster: .tests-stamp
- @echo Testing with locking_type 3
- VERBOSE=$(VERBOSE) LVM_TEST_LOCKING=3 ./lib/harness $(RUN_BASE)
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir $(LVM_TEST_RESULTS) \
+ --flavours ndev-cluster --only $(T) --skip $(S)
check_local: .tests-stamp
- @echo Testing with locking_type 1
- VERBOSE=$(VERBOSE) LVM_TEST_LOCKING=1 ./lib/harness $(RUN_BASE)
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir $(LVM_TEST_RESULTS) \
+ --flavours ndev-vanilla --only $(T) --skip $(S)
ifeq ("@BUILD_LVMETAD@", "yes")
check_lvmetad: .tests-stamp
- @echo Testing with lvmetad on
- VERBOSE=$(VERBOSE) LVM_TEST_LVMETAD=1 ./lib/harness $(RUN_BASE)
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir $(LVM_TEST_RESULTS) \
+ --flavours ndev-lvmetad --only $(T) --skip $(S)
+endif
+
+ifeq ("@BUILD_LVMPOLLD@", "yes")
+check_lvmpolld: .tests-stamp
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours ndev-lvmpolld,ndev-cluster-lvmpolld,ndev-lvmetad-lvmpolld --only $(T) --skip $(S)
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+check_lvmlockd_sanlock: .tests-stamp
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-sanlock --only shell/sanlock-prepare.sh
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-sanlock --only $(T) --skip $(S)
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-sanlock --only shell/sanlock-remove.sh
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+check_lvmlockd_dlm: .tests-stamp
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-dlm --only shell/dlm-prepare.sh
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-dlm --only $(T) --skip $(S)
+ VERBOSE=$(VERBOSE) ./lib/runner \
+ --testdir . --outdir results \
+ --flavours udev-lvmlockd-dlm --only shell/dlm-remove.sh
endif
+DATADIR = $(datadir)/lvm2-testsuite
+EXECDIR = $(libexecdir)/lvm2-testsuite
+
+LIB_FLAVOURS = \
+ lib/flavour-ndev-cluster-lvmpolld\
+ lib/flavour-ndev-cluster\
+ lib/flavour-ndev-lvmetad-lvmpolld\
+ lib/flavour-ndev-lvmetad\
+ lib/flavour-ndev-lvmpolld\
+ lib/flavour-ndev-vanilla\
+ lib/flavour-udev-cluster-lvmpolld\
+ lib/flavour-udev-cluster\
+ lib/flavour-udev-lvmetad-lvmpolld\
+ lib/flavour-udev-lvmetad\
+ lib/flavour-udev-lvmpolld\
+ lib/flavour-udev-lvmlockd-sanlock\
+ lib/flavour-udev-lvmlockd-dlm\
+ lib/flavour-udev-vanilla
+
+LIB_LOCAL = lib/paths lib/runner
+LIB_EXEC = lib/not lib/invalid lib/fail lib/should
+LIB_SHARED = lib/check lib/aux lib/inittest lib/utils lib/get lib/lvm-wrapper
+
+install: .tests-stamp lib/paths-installed
+ @echo $(srcdir)
+ @echo $(LIB_FLAVOURS)
+ $(INSTALL_DIR) $(DATADIR)/{shell,api,lib}
+ $(INSTALL_DATA) shell/*.sh $(DATADIR)/shell/
+ $(INSTALL_DATA) api/*.sh $(DATADIR)/api/
+ $(INSTALL_PROGRAM) api/*.{t,py} $(DATADIR)/api/
+ $(INSTALL_DATA) lib/paths-installed $(DATADIR)/lib/paths
+ $(INSTALL_DATA) $(LIB_FLAVOURS) $(DATADIR)/lib/
+ for i in cache-mq cache-smq thin-performance ; do \
+ $(INSTALL_DATA) $(abs_top_srcdir)/conf/$$i.profile $(DATADIR)/lib/$$i.profile; done
+ $(INSTALL_SCRIPT) $(LIB_SHARED) $(DATADIR)/lib/
+ for i in $(CMDS); do (cd $(DATADIR)/lib && $(LN_S) -f lvm-wrapper $$i); done
+
+ $(INSTALL_DIR) $(EXECDIR)
+ $(INSTALL_PROGRAM) $(LIB_EXEC) $(EXECDIR)
+ $(INSTALL_PROGRAM) -D lib/runner $(bindir)/lvm2-testsuite
+
lib/should: lib/not
- ln -sf not lib/should
+ $(LN_S) -f not lib/should
lib/invalid: lib/not
- ln -sf not lib/invalid
+ $(LN_S) -f not lib/invalid
lib/fail: lib/not
- ln -sf not lib/fail
+ $(LN_S) -f not lib/fail
+
+lib/runner: lib/runner.o .lib-dir-stamp
+ $(CXX) $(LDFLAGS) -o $@ $<
+lib/runner.o: $(wildcard $(srcdir)/lib/*.h)
lib/%: lib/%.o .lib-dir-stamp
$(CC) $(LDFLAGS) -o $@ $<
@@ -148,42 +229,54 @@ lib/%: $(srcdir)/lib/%.sh .lib-dir-stamp
cp $< $@
chmod +x $@
-lib/paths: $(srcdir)/Makefile.in .lib-dir-stamp Makefile
- $(RM) $@-t
- echo 'top_srcdir="$(top_srcdir)"' >> $@-t
- echo 'abs_top_builddir="$(abs_top_builddir)"' >> $@-t
- echo 'abs_top_srcdir="$(abs_top_srcdir)"' >> $@-t
- echo 'abs_srcdir="$(abs_srcdir)"' >> $@-t
- echo 'abs_builddir="$(abs_builddir)"' >> $@-t
+lib/flavour-%: $(srcdir)/lib/flavour-%.sh .lib-dir-stamp
+ cp $< $@
+
+lib/paths-common: $(srcdir)/Makefile.in .lib-dir-stamp Makefile
echo 'export DM_UDEV_SYNCHRONISATION=$(dm_udev_synchronisation)' >> $@-t
echo 'export THIN=@THIN@' >> $@-t
echo 'export RAID=@RAID@' >> $@-t
echo 'export CACHE=@CACHE@' >> $@-t
echo 'export LVMETAD_PIDFILE="@LVMETAD_PIDFILE@"' >> $@-t
+ echo 'export LVMPOLLD_PIDFILE="@LVMPOLLD_PIDFILE@"' >> $@-t
echo 'export DMEVENTD_PIDFILE="@DMEVENTD_PIDFILE@"' >> $@-t
echo 'export CLVMD_PIDFILE="@CLVMD_PIDFILE@"' >> $@-t
- echo 'export LVM_TEST_THIN_CHECK_CMD=$${LVM_TEST_THIN_CHECK_CMD:-@THIN_CHECK_CMD@}' >> $@-t
- echo 'export LVM_TEST_THIN_DUMP_CMD=$${LVM_TEST_THIN_DUMP_CMD:-@THIN_DUMP_CMD@}' >> $@-t
- echo 'export LVM_TEST_THIN_REPAIR_CMD=$${LVM_TEST_THIN_REPAIR_CMD:-@THIN_REPAIR_CMD@}' >> $@-t
- echo 'export LVM_TEST_THIN_RESTORE_CMD=$${LVM_TEST_THIN_RESTORE_CMD:-@THIN_RESTORE_CMD@}' >> $@-t
- echo 'export LVM_TEST_CACHE_CHECK_CMD=$${LVM_TEST_CACHE_CHECK_CMD:-@CACHE_CHECK_CMD@}' >> $@-t
- echo 'export LVM_TEST_CACHE_DUMP_CMD=$${LVM_TEST_CACHE_DUMP_CMD:-@CACHE_DUMP_CMD@}' >> $@-t
- echo 'export LVM_TEST_CACHE_REPAIR_CMD=$${LVM_TEST_CACHE_REPAIR_CMD:-@CACHE_REPAIR_CMD@}' >> $@-t
- echo 'export LVM_TEST_CACHE_RESTORE_CMD=$${LVM_TEST_CACHE_RESTORE_CMD:-@CACHE_RESTORE_CMD@}' >> $@-t
+ echo 'export LVM_TEST_THIN_CHECK_CMD=$${LVM_TEST_THIN_CHECK_CMD-@THIN_CHECK_CMD@}' >> $@-t
+ echo 'export LVM_TEST_THIN_DUMP_CMD=$${LVM_TEST_THIN_DUMP_CMD-@THIN_DUMP_CMD@}' >> $@-t
+ echo 'export LVM_TEST_THIN_REPAIR_CMD=$${LVM_TEST_THIN_REPAIR_CMD-@THIN_REPAIR_CMD@}' >> $@-t
+ echo 'export LVM_TEST_THIN_RESTORE_CMD=$${LVM_TEST_THIN_RESTORE_CMD-@THIN_RESTORE_CMD@}' >> $@-t
+ echo 'export LVM_TEST_CACHE_CHECK_CMD=$${LVM_TEST_CACHE_CHECK_CMD-@CACHE_CHECK_CMD@}' >> $@-t
+ echo 'export LVM_TEST_CACHE_DUMP_CMD=$${LVM_TEST_CACHE_DUMP_CMD-@CACHE_DUMP_CMD@}' >> $@-t
+ echo 'export LVM_TEST_CACHE_REPAIR_CMD=$${LVM_TEST_CACHE_REPAIR_CMD-@CACHE_REPAIR_CMD@}' >> $@-t
+ echo 'export LVM_TEST_CACHE_RESTORE_CMD=$${LVM_TEST_CACHE_RESTORE_CMD-@CACHE_RESTORE_CMD@}' >> $@-t
mv $@-t $@
-LIB = lib/not lib/invalid lib/fail lib/should lib/harness \
- lib/check lib/aux lib/inittest lib/utils lib/get lib/lvm-wrapper \
- lib/paths
+lib/paths-installed: lib/paths-common
+ $(RM) $@-t
+ cat lib/paths-common > $@-t
+ echo 'installed_testsuite=1' >> $@-t
+ echo 'export PATH=@libexecdir@/lvm2-testsuite:@datadir@/lvm2-testsuite/lib:$$PATH' >> $@-t
+ mv $@-t $@
+
+lib/paths: lib/paths-common
+ $(RM) $@-t
+ cat lib/paths-common > $@-t
+ echo 'top_srcdir="$(top_srcdir)"' >> $@-t
+ echo 'abs_top_builddir="$(abs_top_builddir)"' >> $@-t
+ echo 'abs_top_srcdir="$(abs_top_srcdir)"' >> $@-t
+ echo 'abs_srcdir="$(abs_srcdir)"' >> $@-t
+ echo 'abs_builddir="$(abs_builddir)"' >> $@-t
+ mv $@-t $@
CMDS = lvm $(shell cat $(top_builddir)/tools/.commands)
+LIB = $(LIB_SHARED) $(LIB_LOCAL) $(LIB_EXEC) $(LIB_FLAVOURS)
.tests-stamp: $(ALL) $(LIB) $(SUBDIRS)
@if test "$(srcdir)" != . ; then \
echo "Linking tests to builddir."; \
$(MKDIR_P) shell; \
for f in $(subst $(srcdir)/,,$(ALL)); do \
- ln -sf $(abs_top_srcdir)/test/$$f $$f; \
+ $(LN_S) -f $(abs_top_srcdir)/test/$$f $$f; \
done; \
fi
@$(MKDIR_P) -m a=rwx $(LVM_TEST_RESULTS)
@@ -191,23 +284,28 @@ CMDS = lvm $(shell cat $(top_builddir)/tools/.commands)
.lib-dir-stamp:
$(MKDIR_P) lib
- for i in $(CMDS); do ln -fs lvm-wrapper lib/$$i; done
- ln -fs $(abs_top_builddir)/tools/dmsetup lib/dmsetup
- ln -fs $(abs_top_builddir)/daemons/clvmd/clvmd lib/clvmd
- ln -fs $(abs_top_builddir)/daemons/dmeventd/dmeventd lib/dmeventd
- ln -fs $(abs_top_builddir)/daemons/lvmetad/lvmetad lib/lvmetad
- ln -fs $(abs_top_srcdir)/scripts/vgimportclone.sh lib/vgimportclone
- ln -fs $(abs_top_srcdir)/scripts/fsadm.sh lib/fsadm
- ln -fs $(abs_top_srcdir)/conf/thin-performance.profile lib/thin-performance.profile
+ for i in $(CMDS); do $(LN_S) -f lvm-wrapper lib/$$i; done
+ $(LN_S) -f $(abs_top_builddir)/tools/dmsetup lib/dmsetup
+ $(LN_S) -f $(abs_top_builddir)/daemons/clvmd/clvmd lib/clvmd
+ $(LN_S) -f $(abs_top_builddir)/daemons/dmeventd/dmeventd lib/dmeventd
+ $(LN_S) -f $(abs_top_builddir)/daemons/lvmetad/lvmetad lib/lvmetad
+ $(LN_S) -f $(abs_top_builddir)/daemons/lvmpolld/lvmpolld lib/lvmpolld
+ $(LN_S) -f $(abs_top_srcdir)/scripts/vgimportclone.sh lib/vgimportclone
+ $(LN_S) -f $(abs_top_srcdir)/scripts/fsadm.sh lib/fsadm
+ $(LN_S) -f $(abs_top_srcdir)/conf/thin-performance.profile lib/thin-performance.profile
touch $@
-clean:
- test "$(srcdir)" = . || $(RM) $(RUN_BASE)
- $(RM) -r $(LVM_TEST_RESULTS)
+CLEAN_DIRS += $(LVM_TEST_RESULTS)
+ifneq (.,$(firstword $(srcdir)))
+CLEAN_TARGETS += $(RUN_BASE)
+endif
CLEAN_TARGETS += .lib-dir-stamp .tests-stamp $(LIB) $(addprefix lib/,$(CMDS)) \
lib/clvmd lib/dmeventd lib/dmsetup lib/lvmetad lib/fsadm lib/vgimportclone \
- lib/thin-performance.profile
+ lib/thin-performance.profile lib/harness \
+ lib/paths-installed lib/paths-installed-t \
+ lib/paths-common lib/paths-common-t \
+ lib/lvmpolld
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
diff --git a/test/api/percent.sh b/test/api/percent.sh
index a503ca345..a9b7b90a6 100644
--- a/test/api/percent.sh
+++ b/test/api/percent.sh
@@ -13,7 +13,7 @@
. lib/inittest
-kernel_at_least 2 6 33 || skip
+aux kernel_at_least 2 6 33 || skip
aux prepare_pvs 2
@@ -21,9 +21,11 @@ vgcreate -s 4k $vg $(cat DEVICES)
lvcreate -aey -l 5 -n foo $vg
lvcreate -s -n snap $vg/foo -l 3 -c 4k
lvcreate -s -n snap2 $vg/foo -l 6 -c 4k
-dd if=/dev/urandom of="$DM_DEV_DIR/$vg/snap2" count=1 bs=1024
+dd if=/dev/zero of="$DM_DEV_DIR/$vg/snap2" count=1 bs=1024 oflag=direct
+# skip test with broken kernel
+check lv_field $vg/snap2 data_percent "50.00" || skip
lvcreate -aey --type mirror -m 1 -n mirr $vg -l 1 --mirrorlog core
-lvs $vg
+lvs -a $vg
aux apitest percent $vg
vgremove -ff $vg
diff --git a/test/api/pytest.sh b/test/api/pytest.sh
index 36f55fa3c..3b7c5b668 100644
--- a/test/api/pytest.sh
+++ b/test/api/pytest.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2012-2013 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2012-2015 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -45,42 +45,48 @@ export PY_UNIT_PVS=$(cat DEVICES)
#python_lvm_unit.py -v -f
# Run individual tests for shorter error trace
-python_lvm_unit.py -v TestLvm.test_config_find_bool
-python_lvm_unit.py -v TestLvm.test_config_override
-python_lvm_unit.py -v TestLvm.test_config_reload
-python_lvm_unit.py -v TestLvm.test_dupe_lv_create
-python_lvm_unit.py -v TestLvm.test_get_set_extend_size
-python_lvm_unit.py -v TestLvm.test_lv_active_inactive
-python_lvm_unit.py -v TestLvm.test_lv_property
-python_lvm_unit.py -v TestLvm.test_lv_rename
-python_lvm_unit.py -v TestLvm.test_lv_resize
-python_lvm_unit.py -v TestLvm.test_lv_seg
-python_lvm_unit.py -v TestLvm.test_lv_size
-python_lvm_unit.py -v TestLvm.test_lv_snapshot
-python_lvm_unit.py -v TestLvm.test_lv_suspend
-python_lvm_unit.py -v TestLvm.test_lv_tags
-python_lvm_unit.py -v TestLvm.test_percent_to_float
-python_lvm_unit.py -v TestLvm.test_pv_create
-python_lvm_unit.py -v TestLvm.test_pv_empty_listing
-python_lvm_unit.py -v TestLvm.test_pv_getters
-python_lvm_unit.py -v TestLvm.test_pv_life_cycle
-python_lvm_unit.py -v TestLvm.test_pv_lookup_from_vg
-python_lvm_unit.py -v TestLvm.test_pv_property
-python_lvm_unit.py -v TestLvm.test_pv_resize
-python_lvm_unit.py -v TestLvm.test_pv_segs
-python_lvm_unit.py -v TestLvm.test_scan
-python_lvm_unit.py -v TestLvm.test_version
-python_lvm_unit.py -v TestLvm.test_vg_from_pv_lookups
-python_lvm_unit.py -v TestLvm.test_vg_getters
-python_lvm_unit.py -v TestLvm.test_vg_get_name
-python_lvm_unit.py -v TestLvm.test_vg_get_set_prop
-python_lvm_unit.py -v TestLvm.test_vg_get_uuid
-python_lvm_unit.py -v TestLvm.test_vg_lv_name_validate
-python_lvm_unit.py -v TestLvm.test_vg_names
-python_lvm_unit.py -v TestLvm.test_vg_reduce
-python_lvm_unit.py -v TestLvm.test_vg_remove_restore
-python_lvm_unit.py -v TestLvm.test_vg_tags
-python_lvm_unit.py -v TestLvm.test_vg_uuids
+for i in \
+ lv_persistence \
+ config_find_bool \
+ config_override \
+ config_reload \
+ dupe_lv_create \
+ get_set_extend_size \
+ lv_active_inactive \
+ lv_property \
+ lv_rename \
+ lv_resize \
+ lv_seg \
+ lv_size \
+ lv_snapshot \
+ lv_suspend \
+ lv_tags \
+ percent_to_float \
+ pv_create \
+ pv_empty_listing \
+ pv_getters \
+ pv_life_cycle \
+ pv_lookup_from_vg \
+ pv_property \
+ pv_resize \
+ pv_segs \
+ scan \
+ version \
+ vg_from_pv_lookups \
+ vg_getters \
+ vg_get_name \
+ vg_get_set_prop \
+ vg_get_uuid \
+ vg_lv_name_validate \
+ vg_names \
+ vg_reduce \
+ vg_remove_restore \
+ vg_tags \
+ vg_uuids
+do
+ python_lvm_unit.py -v TestLvm.test_$i
+ rm -f debug.log_DEBUG*
+done
# CHECKME: not for testing?
#python_lvm_unit.py -v TestLvm.test_listing
diff --git a/test/api/python_lvm_unit.py b/test/api/python_lvm_unit.py
index 2f22fae23..6fc196310 100755
--- a/test/api/python_lvm_unit.py
+++ b/test/api/python_lvm_unit.py
@@ -18,6 +18,10 @@ import string
import lvm
import os
import itertools
+import sys
+
+if sys.version_info[0] > 2:
+ long = int
# Set of basic unit tests for the python bindings.
#
@@ -44,8 +48,8 @@ def rs(rand_len=10):
"""
Generate a random string
"""
- return ''.join(random.choice(string.ascii_uppercase)
- for x in range(rand_len))
+ return ''.join(
+ random.choice(string.ascii_uppercase)for x in range(rand_len))
def _get_allowed_devices():
@@ -56,18 +60,6 @@ def _get_allowed_devices():
return rc
-def compare_pv(right, left):
- r_name = right.getName()
- l_name = left.getName()
-
- if r_name > l_name:
- return 1
- elif r_name == l_name:
- return 0
- else:
- return -1
-
-
class AllowedPVS(object):
"""
We are only allowed to muck with certain PV, filter to only
@@ -92,7 +84,7 @@ class AllowedPVS(object):
rc.append(p)
#Sort them consistently
- rc.sort(compare_pv)
+ rc.sort(key=lambda x: x.getName())
return rc
def __exit__(self, t_type, value, traceback):
@@ -131,15 +123,15 @@ class TestLvm(unittest.TestCase):
for d in device_list:
vg.extend(d)
- vg.createLvThinpool(pool_name, vg.getSize()/2, 0, 0,
- lvm.THIN_DISCARDS_PASSDOWN, 1)
+ vg.createLvThinpool(
+ pool_name, vg.getSize() / 2, 0, 0, lvm.THIN_DISCARDS_PASSDOWN, 1)
return vg
@staticmethod
def _create_thin_lv(pv_devices, name):
thin_pool_name = 'thin_vg_pool_' + rs(4)
vg = TestLvm._create_thin_pool(pv_devices, thin_pool_name)
- vg.createLvThin(thin_pool_name, name, vg.getSize()/8)
+ vg.createLvThin(thin_pool_name, name, vg.getSize() / 8)
vg.close()
vg = None
@@ -239,7 +231,7 @@ class TestLvm(unittest.TestCase):
curr_size = pv.getSize()
dev_size = pv.getDevSize()
self.assertTrue(curr_size == dev_size)
- pv.resize(curr_size/2)
+ pv.resize(curr_size / 2)
with AllowedPVS() as pvs:
pv = pvs[0]
resized_size = pv.getSize()
@@ -298,22 +290,30 @@ class TestLvm(unittest.TestCase):
self.assertEqual(type(pv.getUuid()), str)
self.assertTrue(len(pv.getUuid()) > 0)
- self.assertTrue(type(pv.getMdaCount()) == int or
- type(pv.getMdaCount()) == long)
+ self.assertTrue(
+ type(pv.getMdaCount()) == int or
+ type(pv.getMdaCount()) == long)
- self.assertTrue(type(pv.getSize()) == int or
- type(pv.getSize()) == long)
+ self.assertTrue(
+ type(pv.getSize()) == int or
+ type(pv.getSize()) == long)
- self.assertTrue(type(pv.getDevSize()) == int or
- type(pv.getSize()) == long)
+ self.assertTrue(
+ type(pv.getDevSize()) == int or
+ type(pv.getSize()) == long)
- self.assertTrue(type(pv.getFree()) == int or
- type(pv.getFree()) == long)
+ self.assertTrue(
+ type(pv.getFree()) == int or
+ type(pv.getFree()) == long)
def _test_prop(self, prop_obj, prop, var_type, settable):
result = prop_obj.getProperty(prop)
- self.assertEqual(type(result[0]), var_type)
+ #If we have no string value we can get a None type back
+ if result[0] is not None:
+ self.assertEqual(type(result[0]), var_type)
+ else:
+ self.assertTrue(str == var_type)
self.assertEqual(type(result[1]), bool)
self.assertTrue(result[1] == settable)
@@ -336,7 +336,53 @@ class TestLvm(unittest.TestCase):
lv_name = 'lv_test'
TestLvm._create_thin_lv(TestLvm._get_pv_device_names(), lv_name)
lv, vg = TestLvm._get_lv(None, lv_name)
- self._test_prop(lv, 'seg_count', long, False)
+
+ lv_seg_properties = [
+ ('chunk_size', long, False), ('devices', str, False),
+ ('discards', str, False), ('region_size', long, False),
+ ('segtype', str, False), ('seg_pe_ranges', str, False),
+ ('seg_size', long, False), ('seg_size_pe', long, False),
+ ('seg_start', long, False), ('seg_start_pe', long, False),
+ ('seg_tags', str, False), ('stripes', long, False),
+ ('stripe_size', long, False), ('thin_count', long, False),
+ ('transaction_id', long, False), ('zero', long, False)]
+
+ lv_properties = [
+ ('convert_lv', str, False), ('copy_percent', long, False),
+ ('data_lv', str, False), ('lv_attr', str, False),
+ ('lv_host', str, False), ('lv_kernel_major', long, False),
+ ('lv_kernel_minor', long, False),
+ ('lv_kernel_read_ahead', long, False),
+ ('lv_major', long, False), ('lv_minor', long, False),
+ ('lv_name', str, False), ('lv_path', str, False),
+ ('lv_profile', str, False), ('lv_read_ahead', long, False),
+ ('lv_size', long, False), ('lv_tags', str, False),
+ ('lv_time', str, False), ('lv_uuid', str, False),
+ ('metadata_lv', str, False), ('mirror_log', str, False),
+ ('lv_modules', str, False), ('move_pv', str, False),
+ ('origin', str, False), ('origin_size', long, False),
+ ('pool_lv', str, False), ('raid_max_recovery_rate', long, False),
+ ('raid_min_recovery_rate', long, False),
+ ('raid_mismatch_count', long, False),
+ ('raid_sync_action', str, False),
+ ('raid_write_behind', long, False), ('seg_count', long, False),
+ ('snap_percent', long, False), ('sync_percent', long, False)]
+
+ # Generic test case, make sure we get what we expect
+ for t in lv_properties:
+ self._test_prop(lv, *t)
+
+ segments = lv.listLVsegs()
+ if segments and len(segments):
+ for s in segments:
+ for t in lv_seg_properties:
+ self._test_prop(s, *t)
+
+ # Test specific cases
+ tag = 'hello_world'
+ lv.addTag(tag)
+ tags = lv.getProperty('lv_tags')
+ self.assertTrue(tag in tags[0])
vg.close()
def test_lv_tags(self):
@@ -368,6 +414,100 @@ class TestLvm(unittest.TestCase):
lv.rename(current_name)
vg.close()
+ def test_lv_persistence(self):
+ # Make changes to the lv, close the vg and re-open to make sure that
+ # the changes persist
+ lv_name = 'lv_test_persist'
+ TestLvm._create_thick_lv(TestLvm._get_pv_device_names(), lv_name)
+
+ # Test rename
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ current_name = lv.getName()
+ new_name = rs()
+ lv.rename(new_name)
+
+ vg.close()
+ vg = None
+
+ lv, vg = TestLvm._get_lv(None, new_name)
+
+ self.assertTrue(lv is not None)
+
+ if lv and vg:
+ lv.rename(lv_name)
+ vg.close()
+ vg = None
+
+ # Test lv tag add
+ tag = 'hello_world'
+
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ lv.addTag(tag)
+ vg.close()
+ vg = None
+
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ tags = lv.getTags()
+
+ self.assertTrue(tag in tags)
+ vg.close()
+ vg = None
+
+ # Test lv tag delete
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+
+ if lv and vg:
+ tags = lv.getTags()
+
+ for t in tags:
+ lv.removeTag(t)
+
+ vg.close()
+ vg = None
+
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+
+ if lv and vg:
+ tags = lv.getTags()
+
+ if tags:
+ self.assertEqual(len(tags), 0)
+ vg.close()
+ vg = None
+
+ # Test lv deactivate
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+
+ if lv and vg:
+ lv.deactivate()
+ vg.close()
+ vg = None
+
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+ if lv and vg:
+ self.assertFalse(lv.isActive())
+ vg.close()
+ vg = None
+
+ # Test lv activate
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+ if lv and vg:
+ lv.activate()
+ vg.close()
+ vg = None
+
+ lv, vg = TestLvm._get_lv(None, lv_name)
+ self.assertTrue(lv is not None and vg is not None)
+ if lv and vg:
+ self.assertTrue(lv.isActive())
+ vg.close()
+ vg = None
+
def test_lv_snapshot(self):
thin_lv = 'thin_lv'
@@ -422,7 +562,7 @@ class TestLvm(unittest.TestCase):
lv, vg = TestLvm._get_lv(None, lv_name)
curr_size = lv.getSize()
- lv.resize(curr_size+(1024*1024))
+ lv.resize(curr_size + (1024 * 1024))
latest = lv.getSize()
self.assertTrue(curr_size != latest)
@@ -448,8 +588,9 @@ class TestLvm(unittest.TestCase):
new_extent = 1024 * 1024 * 4
- self.assertFalse(vg.getExtentSize() != new_extent,
- "Cannot determine if it works if they are the same")
+ self.assertFalse(
+ vg.getExtentSize() != new_extent,
+ "Cannot determine if it works if they are the same")
vg.setExtentSize(new_extent)
self.assertEqual(vg.getExtentSize(), new_extent)
@@ -508,8 +649,8 @@ class TestLvm(unittest.TestCase):
if len(lvs):
lv = lvs[0]
lv_name = lv.getName()
- self.assertRaises(lvm.LibLVMError, vg.createLvLinear, lv_name,
- lv.getSize())
+ self.assertRaises(
+ lvm.LibLVMError, vg.createLvLinear, lv_name, lv.getSize())
vg.close()
def test_vg_uuids(self):
@@ -559,10 +700,10 @@ class TestLvm(unittest.TestCase):
pv_name_lookup = vg.pvFromName(name)
pv_uuid_lookup = vg.pvFromUuid(uuid)
- self.assertTrue(pv_name_lookup.getName() ==
- pv_uuid_lookup.getName())
- self.assertTrue(pv_name_lookup.getUuid() ==
- pv_uuid_lookup.getUuid())
+ self.assertTrue(
+ pv_name_lookup.getName() == pv_uuid_lookup.getName())
+ self.assertTrue(
+ pv_name_lookup.getUuid() == pv_uuid_lookup.getUuid())
self.assertTrue(name == pv_name_lookup.getName())
self.assertTrue(uuid == pv_uuid_lookup.getUuid())
@@ -644,9 +785,10 @@ class TestLvm(unittest.TestCase):
self.assertTrue(len(uuid) > 0)
vg.close()
- RETURN_NUMERIC = ["getSeqno", "getSize", "getFreeSize", "getFreeSize",
- "getExtentSize", "getExtentCount", "getFreeExtentCount",
- "getPvCount", "getMaxPv", "getMaxLv"]
+ RETURN_NUMERIC = [
+ "getSeqno", "getSize", "getFreeSize", "getFreeSize",
+ "getExtentSize", "getExtentCount", "getFreeExtentCount",
+ "getPvCount", "getMaxPv", "getMaxLv"]
def test_vg_getters(self):
device_names = TestLvm._get_pv_device_names()
@@ -710,7 +852,7 @@ class TestLvm(unittest.TestCase):
i = 0
for d in device_names:
if i % 2 == 0:
- TestLvm._create_thin_lv([d], "thin_lv%d" % i)
+ TestLvm._create_thin_lv([d], "thin_lv%d" % i)
else:
TestLvm._create_thick_lv([d], "thick_lv%d" % i)
i += 1
@@ -760,7 +902,7 @@ class TestLvm(unittest.TestCase):
lvm.pvCreate(d)
def test_pv_create(self):
- size = [0, 1024*1024*4]
+ size = [0, 1024 * 1024 * 4]
pvmeta_copies = [0, 1, 2]
pvmeta_size = [0, 255, 512, 1024]
data_alignment = [0, 2048, 4096]
@@ -779,9 +921,9 @@ class TestLvm(unittest.TestCase):
self.assertRaises(lvm.LibLVMError, lvm.pvCreate, '')
self.assertRaises(lvm.LibLVMError, lvm.pvCreate, d, 4)
self.assertRaises(lvm.LibLVMError, lvm.pvCreate, d, 0, 4)
- self.assertRaises(lvm.LibLVMError, lvm.pvCreate, d, 0, 0, 0, 2**34)
- self.assertRaises(lvm.LibLVMError, lvm.pvCreate, d, 0, 0, 0, 4096,
- 2**34)
+ self.assertRaises(lvm.LibLVMError, lvm.pvCreate, d, 0, 0, 0, 2 ** 34)
+ self.assertRaises(
+ lvm.LibLVMError, lvm.pvCreate, d, 0, 0, 0, 4096, 2 ** 34)
#Try a number of combinations and permutations
for s in size:
@@ -797,12 +939,14 @@ class TestLvm(unittest.TestCase):
lvm.pvCreate(d, s, copies, pv_size, align)
lvm.pvRemove(d)
for align_offset in data_alignment_offset:
- lvm.pvCreate(d, s, copies, pv_size, align,
- align * align_offset)
+ lvm.pvCreate(
+ d, s, copies, pv_size, align,
+ align * align_offset)
lvm.pvRemove(d)
for z in zero:
- lvm.pvCreate(d, s, copies, pv_size, align,
- align * align_offset, z)
+ lvm.pvCreate(
+ d, s, copies, pv_size, align,
+ align * align_offset, z)
lvm.pvRemove(d)
#Restore
@@ -866,7 +1010,7 @@ class TestLvm(unittest.TestCase):
method(t)
def _test_bad_names(self, method, dupe_name):
- # Test for duplicate name
+ # Test for duplicate name
self.assertRaises(lvm.LibLVMError, method, dupe_name)
# Test for too long a name
@@ -889,8 +1033,9 @@ class TestLvm(unittest.TestCase):
def _lv_reserved_names(self, method):
prefixes = ['snapshot', 'pvmove']
- reserved = ['_mlog', '_mimage', '_pmspare', '_rimage', '_rmeta',
- '_vorigin', '_tdata', '_tmeta']
+ reserved = [
+ '_mlog', '_mimage', '_pmspare', '_rimage', '_rmeta',
+ '_vorigin', '_tdata', '_tmeta']
for p in prefixes:
self.assertRaises(lvm.LibLVMError, method, p + rs(3))
diff --git a/test/api/thin_percent.sh b/test/api/thin_percent.sh
index f93bd1570..fd64e3bba 100644
--- a/test/api/thin_percent.sh
+++ b/test/api/thin_percent.sh
@@ -11,6 +11,8 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
aux have_thin 1 0 0 || skip
diff --git a/test/lib/aux.sh b/test/lib/aux.sh
index 2fef20715..f870501c3 100644
--- a/test/lib/aux.sh
+++ b/test/lib/aux.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Copyright (C) 2011-2012 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2011-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -14,7 +14,7 @@
run_valgrind() {
# Execute script which may use $TESTNAME for creating individual
# log files for each execute command
- exec "${VALGRIND:-valg}" "$@"
+ exec "${VALGRIND:-valgrind}" "$@"
}
expect_failure() {
@@ -22,7 +22,7 @@ expect_failure() {
}
prepare_clvmd() {
- rm -f debug.log
+ rm -f debug.log strace.log
test "${LVM_TEST_LOCKING:-0}" -ne 3 && return # not needed
if pgrep clvmd ; then
@@ -31,47 +31,50 @@ prepare_clvmd() {
fi
# skip if we don't have our own clvmd...
- (which clvmd 2>/dev/null | grep "$abs_builddir") || skip
- # lvs is executed from clvmd - use our version
- export LVM_BINARY=$(which lvm)
+ if test -z "${installed_testsuite+varset}"; then
+ (which clvmd 2>/dev/null | grep -q "$abs_builddir") || skip
+ fi
- test -e "$DM_DEV_DIR/control" || dmsetup table # create control node
+ test -e "$DM_DEV_DIR/control" || dmsetup table >/dev/null # create control node
# skip if singlenode is not compiled in
- (clvmd --help 2>&1 | grep "Available cluster managers" | grep "singlenode") || skip
+ (clvmd --help 2>&1 | grep "Available cluster managers" | grep -q "singlenode") || skip
# lvmconf "activation/monitoring = 1"
local run_valgrind=
test "${LVM_VALGRIND_CLVMD:-0}" -eq 0 || run_valgrind="run_valgrind"
rm -f "$CLVMD_PIDFILE"
- $run_valgrind lib/clvmd -Isinglenode -d 1 -f &
+ echo "<======== Starting CLVMD ========>"
+ # lvs is executed from clvmd - use our version
+ LVM_LOG_FILE_EPOCH=CLVMD LVM_BINARY=$(which lvm) $run_valgrind clvmd -Isinglenode -d 1 -f &
echo $! > LOCAL_CLVMD
- for i in $(seq 1 100) ; do
+ for i in {1..100} ; do
test $i -eq 100 && die "Startup of clvmd is too slow."
- test -e "$CLVMD_PIDFILE" && break
+ test -e "$CLVMD_PIDFILE" -a -e "${CLVMD_PIDFILE%/*}/lvm/clvmd.sock" && break
sleep .2
done
}
prepare_dmeventd() {
- rm -f debug.log
+ rm -f debug.log strace.log
if pgrep dmeventd ; then
echo "Cannot test dmeventd with real dmeventd ($(pgrep dmeventd)) running."
skip
fi
# skip if we don't have our own dmeventd...
- (which dmeventd 2>/dev/null | grep "$abs_builddir") || skip
-
+ if test -z "${installed_testsuite+varset}"; then
+ (which dmeventd 2>/dev/null | grep -q "$abs_builddir") || skip
+ fi
lvmconf "activation/monitoring = 1"
local run_valgrind=
test "${LVM_VALGRIND_DMEVENTD:-0}" -eq 0 || run_valgrind="run_valgrind"
- $run_valgrind dmeventd -f "$@" &
+ LVM_LOG_FILE_EPOCH=DMEVENTD $run_valgrind dmeventd -f "$@" &
echo $! > LOCAL_DMEVENTD
# FIXME wait for pipe in /var/run instead
- for i in $(seq 1 100) ; do
+ for i in {1..100} ; do
test $i -eq 100 && die "Startup of dmeventd is too slow."
test -e "${DMEVENTD_PIDFILE}" && break
sleep .2
@@ -80,18 +83,25 @@ prepare_dmeventd() {
}
prepare_lvmetad() {
- rm -f debug.log
+ rm -f debug.log strace.log
# skip if we don't have our own lvmetad...
- (which lvmetad 2>/dev/null | grep "$abs_builddir") || skip
-
- lvmconf "global/use_lvmetad = 1"
- lvmconf "devices/md_component_detection = 0"
+ if test -z "${installed_testsuite+varset}"; then
+ (which lvmetad 2>/dev/null | grep -q "$abs_builddir") || skip
+ fi
local run_valgrind=
test "${LVM_VALGRIND_LVMETAD:-0}" -eq 0 || run_valgrind="run_valgrind"
+ kill_sleep_kill_ LOCAL_LVMETAD ${LVM_VALGRIND_LVMETAD:-0}
+
+ # Avoid reconfiguring, if already set to use_lvmetad
+ (grep use_lvmetad CONFIG_VALUES 2>/dev/null | tail -n 1 | grep -q 1) || \
+ aux lvmconf "global/use_lvmetad = 1" "devices/md_component_detection = 0"
+ # Default debug is "-l all" and could be override
+ # by setting LVM_TEST_LVMETAD_DEBUG_OPTS before calling inittest.
echo "preparing lvmetad..."
- $run_valgrind lvmetad -f "$@" -s "$TESTDIR/lvmetad.socket" -l wire,debug &
+ $run_valgrind lvmetad -f "$@" -s "$TESTDIR/lvmetad.socket" \
+ ${LVM_TEST_LVMETAD_DEBUG_OPTS--l all} "$@" &
echo $! > LOCAL_LVMETAD
while ! test -e "$TESTDIR/lvmetad.socket"; do echo -n .; sleep .1; done # wait for the socket
echo ok
@@ -120,16 +130,60 @@ lvmetad_dump() {
notify_lvmetad() {
if test -e LOCAL_LVMETAD; then
- pvscan --cache "$@" || true
+ # Ignore results here...
+ LVM_LOG_FILE_EPOCH= pvscan --cache "$@" || true
+ rm -f debug.log
fi
}
+prepare_lvmpolld() {
+ rm -f debug.log
+ # skip if we don't have our own lvmpolld...
+ (which lvmpolld 2>/dev/null | grep "$abs_builddir") || skip
+
+ lvmconf "global/use_lvmpolld = 1"
+
+ local run_valgrind=
+ test "${LVM_VALGRIND_LVMPOLLD:-0}" -eq 0 || run_valgrind="run_valgrind"
+
+ kill_sleep_kill_ LOCAL_LVMPOLLD ${LVM_VALGRIND_LVMPOLLD:-0}
+
+ echo "preparing lvmpolld..."
+ $run_valgrind lvmpolld -f "$@" -s "$TESTDIR/lvmpolld.socket" -B "$TESTDIR/lib/lvm" -l all &
+ echo $! > LOCAL_LVMPOLLD
+ while ! test -e "$TESTDIR/lvmpolld.socket"; do echo -n .; sleep .1; done # wait for the socket
+ echo ok
+}
+
+lvmpolld_talk() {
+ local use=nc
+ if type -p socat >& /dev/null; then
+ use=socat
+ elif echo | not nc -U "$TESTDIR/lvmpolld.socket" ; then
+ echo "WARNING: Neither socat nor nc -U seems to be available." 1>&2
+ echo "# failed to contact lvmpolld"
+ return 1
+ fi
+
+ if test "$use" = nc ; then
+ nc -U "$TESTDIR/lvmpolld.socket"
+ else
+ socat "unix-connect:$TESTDIR/lvmpolld.socket" -
+ fi | tee -a lvmpolld-talk.txt
+}
+
+lvmpolld_dump() {
+ (echo 'request="dump"'; echo '##') | lvmpolld_talk "$@"
+}
+
teardown_devs_prefixed() {
local prefix=$1
local stray=${2:-0}
local IFS=$IFS_NL
local dm
+ rm -rf "$TESTDIR/dev/$prefix"*
+
# Resume suspended devices first
for dm in $(dm_info suspended,name | grep "^Suspended:.*$prefix"); do
echo "dmsetup resume \"${dm#Suspended:}\""
@@ -170,16 +224,16 @@ teardown_devs_prefixed() {
num_remaining_devs=$num_devs
done
fi
+
+ udev_wait
}
teardown_devs() {
# Delete any remaining dm/udev semaphores
teardown_udev_cookies
- test -z "$PREFIX" || {
- rm -rf "$TESTDIR/dev/$PREFIX"*
- teardown_devs_prefixed "$PREFIX"
- }
+ test ! -f MD_DEV || cleanup_md_dev
+ test ! -f DEVICES || teardown_devs_prefixed "$PREFIX"
# NOTE: SCSI_DEBUG_DEV test must come before the LOOP test because
# prepare_scsi_debug_dev() also sets LOOP to short-circuit prepare_loop()
@@ -189,34 +243,79 @@ teardown_devs() {
test ! -f LOOP || losetup -d $(< LOOP) || true
test ! -f LOOPFILE || rm -f $(< LOOPFILE)
fi
- rm -f DEVICES # devs is set in prepare_devs()
- rm -f LOOP
+
+ not diff LOOP BACKING_DEV >/dev/null 2>&1 || rm -f BACKING_DEV
+ rm -f DEVICES LOOP
# Attempt to remove any loop devices that failed to get torn down if earlier tests aborted
test "${LVM_TEST_PARALLEL:-0}" -eq 1 -o -z "$COMMON_PREFIX" || {
- teardown_devs_prefixed "$COMMON_PREFIX" 1
local stray_loops=( $(losetup -a | grep "$COMMON_PREFIX" | cut -d: -f1) )
test ${#stray_loops[@]} -eq 0 || {
+ teardown_devs_prefixed "$COMMON_PREFIX" 1
echo "Removing stray loop devices containing $COMMON_PREFIX: ${stray_loops[@]}"
for i in "${stray_loops[@]}" ; do losetup -d $i ; done
+ # Leave test when udev processed all removed devices
+ udev_wait
}
}
-
- # Leave test when udev processed all removed devices
- udev_wait
}
kill_sleep_kill_() {
- if test -s "$1" ; then
- if kill -TERM "$(< $1)" ; then
- if test "$2" -eq 0 ; then sleep .1 ; else sleep 1 ; fi
- kill -KILL "$(< $1)" 2>/dev/null || true
- fi
+ pidfile=$1
+ slow=$2
+ if test -s $pidfile ; then
+ pid=$(< $pidfile)
+ kill -TERM $pid 2>/dev/null || return 0
+ if test $slow -eq 0 ; then sleep .1 ; else sleep 1 ; fi
+ kill -KILL $pid 2>/dev/null || true
+ wait=0
+ while ps $pid > /dev/null && test $wait -le 10; do
+ sleep .5
+ wait=$(($wait + 1))
+ done
fi
}
+print_procs_by_tag_() {
+ (ps -o pid,args ehax | grep -we"LVM_TEST_TAG=${1:-kill_me_$PREFIX}") || true
+}
+
+count_processes_with_tag() {
+ print_procs_by_tag_ | wc -l
+}
+
+kill_tagged_processes() {
+ local pid
+ local pids
+ local wait
+
+ # read uses all vars within pipe subshell
+ print_procs_by_tag_ "$@" | while read -r pid wait; do
+ if test -n "$pid" ; then
+ echo "Killing tagged process: $pid ${wait:0:120}..."
+ kill -TERM $pid 2>/dev/null || true
+ fi
+ pids="$pids $pid"
+ done
+
+ # wait if process exited and eventually -KILL
+ wait=0
+ for pid in $pids ; do
+ while ps $pid > /dev/null && test $wait -le 10; do
+ sleep .2
+ wait=$(($wait + 1))
+ done
+ test $wait -le 10 || kill -KILL $pid 2>/dev/null || true
+ done
+}
+
teardown() {
echo -n "## teardown..."
+ unset LVM_LOG_FILE_EPOCH
+
+ if test -f TESTNAME ; then
+
+ kill_tagged_processes
kill_sleep_kill_ LOCAL_LVMETAD ${LVM_VALGRIND_LVMETAD:-0}
@@ -224,9 +323,13 @@ teardown() {
# Avoid activation of dmeventd if there is no pid
cfg=$(test -s LOCAL_DMEVENTD || echo "--config activation{monitoring=0}")
vgremove -ff $cfg \
- $vg $vg1 $vg2 $vg3 $vg4 &>/dev/null || rm -f debug.log
+ $vg $vg1 $vg2 $vg3 $vg4 &>/dev/null || rm -f debug.log strace.log
}
+ kill_sleep_kill_ LOCAL_LVMPOLLD ${LVM_VALGRIND_LVMPOLLD:-0}
+
+ echo -n .
+
kill_sleep_kill_ LOCAL_CLVMD ${LVM_VALGRIND_CLVMD:-0}
echo -n .
@@ -239,6 +342,8 @@ teardown() {
echo -n .
+ fi
+
test -n "$TESTDIR" && {
cd "$TESTOLDPWD"
rm -rf "$TESTDIR" || echo BLA
@@ -273,7 +378,8 @@ prepare_loop() {
echo -n .
local LOOPFILE="$PWD/test.img"
- dd if=/dev/zero of="$LOOPFILE" bs=$((1024*1024)) count=0 seek=$(($size-1)) 2> /dev/null
+ rm -f "$LOOPFILE"
+ dd if=/dev/zero of="$LOOPFILE" bs=$((1024*1024)) count=0 seek=$(($size + 1)) 2> /dev/null
if LOOP=$(losetup -s -f "$LOOPFILE" 2>/dev/null); then
:
elif LOOP=$(losetup -f) && losetup "$LOOP" "$LOOPFILE"; then
@@ -295,7 +401,9 @@ prepare_loop() {
done
fi
test -n "$LOOP" # confirm or fail
+ BACKING_DEV="$LOOP"
echo "$LOOP" > LOOP
+ echo "$LOOP" > BACKING_DEV
echo "ok ($LOOP)"
}
@@ -305,14 +413,13 @@ prepare_loop() {
prepare_scsi_debug_dev() {
local DEV_SIZE=$1
local SCSI_DEBUG_PARAMS=${@:2}
+ local DEBUG_DEV
+ rm -f debug.log strace.log
test ! -f "SCSI_DEBUG_DEV" || return 0
test -z "$LOOP"
test -n "$DM_DEV_DIR"
- # Skip test if awk isn't available (required for get_sd_devs_)
- which awk || skip
-
# Skip test if scsi_debug module is unavailable or is already in use
modprobe --dry-run scsi_debug || skip
lsmod | not grep -q scsi_debug || skip
@@ -320,18 +427,22 @@ prepare_scsi_debug_dev() {
# Create the scsi_debug device and determine the new scsi device's name
# NOTE: it will _never_ make sense to pass num_tgts param;
# last param wins.. so num_tgts=1 is imposed
+ touch SCSI_DEBUG_DEV
modprobe scsi_debug dev_size_mb=$DEV_SIZE $SCSI_DEBUG_PARAMS num_tgts=1 || skip
- sleep 2 # allow for async Linux SCSI device registration
-
- local DEBUG_DEV="/dev/$(grep -H scsi_debug /sys/block/*/device/model | cut -f4 -d /)"
+
+ for i in {1..20} ; do
+ DEBUG_DEV="/dev/$(grep -H scsi_debug /sys/block/*/device/model | cut -f4 -d /)"
+ test -b "$DEBUG_DEV" && break
+ sleep .1 # allow for async Linux SCSI device registration
+ done
test -b "$DEBUG_DEV" || return 1 # should not happen
# Create symlink to scsi_debug device in $DM_DEV_DIR
SCSI_DEBUG_DEV="$DM_DEV_DIR/$(basename $DEBUG_DEV)"
echo "$SCSI_DEBUG_DEV" > SCSI_DEBUG_DEV
- echo "$SCSI_DEBUG_DEV" > LOOP
+ echo "$SCSI_DEBUG_DEV" > BACKING_DEV
# Setting $LOOP provides means for prepare_devs() override
- test "$LVM_TEST_DEVDIR" = "/dev" || ln -snf "$DEBUG_DEV" "$SCSI_DEBUG_DEV"
+ test "$DEBUG_DEV" = "$SCSI_DEBUG_DEV" || ln -snf "$DEBUG_DEV" "$SCSI_DEBUG_DEV"
}
cleanup_scsi_debug_dev() {
@@ -339,20 +450,111 @@ cleanup_scsi_debug_dev() {
rm -f SCSI_DEBUG_DEV LOOP
}
+prepare_md_dev() {
+ local level=$1
+ local rchunk=$2
+ local rdevs=$3
+
+ local maj=$(mdadm --version 2>&1) || skip "mdadm tool is missing!"
+ local mddev
+
+ cleanup_md_dev
+
+ rm -f debug.log strace.log MD_DEV MD_DEV_PV MD_DEVICES
+
+ # Have MD use a non-standard name to avoid colliding with an existing MD device
+ # - mdadm >= 3.0 requires that non-standard device names be in /dev/md/
+ # - newer mdadm _completely_ defers to udev to create the associated device node
+ maj=${maj##*- v}
+ maj=${maj%%.*}
+ [ "$maj" -ge 3 ] && \
+ mddev=/dev/md/md_lvm_test0 || \
+ mddev=/dev/md_lvm_test0
+
+ mdadm --create --metadata=1.0 "$mddev" --auto=md --level $level --chunk $rchunk --raid-devices=$rdevs "${@:4}" || {
+ # Some older 'mdadm' version managed to open and close devices internaly
+ # and reporting non-exclusive access on such device
+ # let's just skip the test if this happens.
+ # Note: It's pretty complex to get rid of consequences
+ # the following sequence avoid leaks on f19
+ # TODO: maybe try here to recreate few times....
+ mdadm --stop "$mddev" || true
+ udev_wait
+ mdadm --zero-superblock "${@:4}" || true
+ udev_wait
+ skip "Test skipped, unreliable mdadm detected!"
+ }
+ test -b "$mddev" || skip "mdadm has not created device!"
+
+ # LVM/DM will see this device
+ case "$DM_DEV_DIR" in
+ "/dev") readlink -f "$mddev" ;;
+ *) cp -LR "$mddev" "$DM_DEV_DIR"
+ echo "$DM_DEV_DIR/md_lvm_test0" ;;
+ esac > MD_DEV_PV
+ echo "$mddev" > MD_DEV
+ notify_lvmetad $(< MD_DEV_PV)
+ printf "%s\n" "${@:4}" > MD_DEVICES
+ for mddev in "${@:4}"; do
+ notify_lvmetad "$mddev"
+ done
+}
+
+cleanup_md_dev() {
+ test -f MD_DEV || return 0
+
+ local IFS=$IFS_NL
+ local dev=$(< MD_DEV)
+
+ udev_wait
+ mdadm --stop "$dev" || true
+ test "$DM_DEV_DIR" != "/dev" && rm -f "$DM_DEV_DIR/$(basename $dev)"
+ notify_lvmetad $(< MD_DEV_PV)
+ for dev in $(< MD_DEVICES); do
+ mdadm --zero-superblock "$dev" || true
+ notify_lvmetad "$dev"
+ done
+ udev_wait
+ if [ -b "$mddev" ]; then
+ # mdadm doesn't always cleanup the device node
+ # sleeps offer hack to defeat: 'md: md127 still in use'
+ # see: https://bugzilla.redhat.com/show_bug.cgi?id=509908#c25
+ sleep 2
+ rm -f "$mddev"
+ fi
+ rm -f MD_DEV MD_DEVICES MD_DEV_PV
+}
+
+prepare_backing_dev() {
+ if test -f BACKING_DEV; then
+ BACKING_DEV=$(< BACKING_DEV)
+ elif test -b "$LVM_TEST_BACKING_DEVICE"; then
+ BACKING_DEV=$LVM_TEST_BACKING_DEVICE
+ echo "$BACKING_DEV" > BACKING_DEV
+ else
+ prepare_loop "$@"
+ fi
+}
+
prepare_devs() {
local n=${1:-3}
local devsize=${2:-34}
local pvname=${3:-pv}
- local loopsz
+ local shift=0
- prepare_loop $(($n*$devsize))
- echo -n "## preparing $n devices..."
-
- if ! loopsz=$(blockdev --getsz "$LOOP" 2>/dev/null); then
- loopsz=$(blockdev --getsize "$LOOP" 2>/dev/null)
+ # sanlock requires more space for the internal sanlock lv
+ # This could probably be lower, but what are the units?
+ if test -n "$LVM_TEST_LOCK_TYPE_SANLOCK" ; then
+ devsize=1024
fi
- local size=$(($loopsz/$n))
+ touch DEVICES
+ prepare_backing_dev $(($n*$devsize))
+ # shift start of PV devices on /dev/loopXX by 1M
+ not diff LOOP BACKING_DEV >/dev/null 2>&1 || shift=2048
+ echo -n "## preparing $n devices..."
+
+ local size=$(($devsize*2048)) # sectors
local count=0
init_udev_transaction
for i in $(seq 1 $n); do
@@ -360,11 +562,26 @@ prepare_devs() {
local dev="$DM_DEV_DIR/mapper/$name"
DEVICES[$count]=$dev
count=$(( $count + 1 ))
- echo 0 $size linear "$LOOP" $((($i-1)*$size)) > "$name.table"
- dmsetup create -u "TEST-$name" "$name" "$name.table"
+ echo 0 $size linear "$BACKING_DEV" $((($i-1)*$size + $shift)) > "$name.table"
+ if not dmsetup create -u "TEST-$name" "$name" "$name.table" &&
+ test -n "$LVM_TEST_BACKING_DEVICE";
+ then # maybe the backing device is too small for this test
+ LVM_TEST_BACKING_DEVICE=
+ rm -f BACKING_DEV
+ prepare_devs "$@"
+ return $?
+ fi
done
finish_udev_transaction
+ # non-ephemeral devices need to be cleared between tests
+ test -f LOOP || for d in ${DEVICES[@]}; do
+ blkdiscard "$d" 2>/dev/null || true
+ # ensure disk header is always zeroed
+ dd if=/dev/zero of="$d" bs=32k count=1
+ wipefs -a "$d" 2>/dev/null || true
+ done
+
#for i in `seq 1 $n`; do
# local name="${PREFIX}$pvname$i"
# dmsetup info -c $name
@@ -377,18 +594,34 @@ prepare_devs() {
printf "%s\n" "${DEVICES[@]}" > DEVICES
# ( IFS=$'\n'; echo "${DEVICES[*]}" ) >DEVICES
echo "ok"
+
+ for dev in "${DEVICES[@]}"; do
+ notify_lvmetad "$dev"
+ done
}
-# Replace linear PV device with its 'delayed' version
-# Could be used to more deterministicaly hit some problems.
-# Parameters: {device path} [read delay ms] [write delay ms]
-# Original device is restored when both delay params are 0 (or missing).
-# i.e. delay_dev "$dev1" 0 200
-delay_dev() {
- target_at_least dm-delay 1 2 0 || skip
- local name=$(echo "$1" | sed -e 's,.*/,,')
- local read_ms=${2:-0}
- local write_ms=${3:-0}
+
+common_dev_() {
+ local tgtype=$1
+ local name=${2##*/}
+ local offsets
+ local read_ms
+ local write_ms
+
+ case "$tgtype" in
+ delay)
+ read_ms=${3:-0}
+ write_ms=${4:-0}
+ offsets=${@:5}
+ if test "$read_ms" -eq 0 -a "$write_ms" -eq 0 ; then
+ offsets=
+ else
+ test -z "${offsets[@]}" && offsets="0:"
+ fi ;;
+ error) offsets=${@:3}
+ test -z "${offsets[@]}" && offsets="0:" ;;
+ esac
+
local pos
local size
local type
@@ -397,17 +630,50 @@ delay_dev() {
read pos size type pvdev offset < "$name.table"
+ for fromlen in ${offsets[@]}; do
+ from=${fromlen%%:*}
+ len=${fromlen##*:}
+ test -n "$len" || len=$(($size - $from))
+ diff=$(($from - $pos))
+ if test $diff -gt 0 ; then
+ echo "$pos $diff $type $pvdev $(($pos + $offset))"
+ pos=$(($pos + $diff))
+ elif test $diff -lt 0 ; then
+ die "Position error"
+ fi
+
+ case "$tgtype" in
+ delay)
+ echo "$from $len delay $pvdev $(($pos + $offset)) $read_ms $pvdev $(($pos + $offset)) $write_ms" ;;
+ error)
+ echo "$from $len error" ;;
+ esac
+ pos=$(($pos + $len))
+ done > "$name.devtable"
+ diff=$(($size - $pos))
+ test "$diff" -gt 0 && echo "$pos $diff $type $pvdev $(($pos + $offset))" >>"$name.devtable"
+
init_udev_transaction
- if test $read_ms -ne 0 -o $write_ms -ne 0 ; then
- echo "0 $size delay $pvdev $offset $read_ms $pvdev $offset $write_ms" | \
- dmsetup load "$name"
- else
- dmsetup load "$name" "$name.table"
- fi
+ dmsetup load "$name" "$name.devtable"
+ # TODO: add support for resume without udev rescan
dmsetup resume "$name"
finish_udev_transaction
}
+# Replace linear PV device with its 'delayed' version
+# Could be used to more deterministicaly hit some problems.
+# Parameters: {device path} [read delay ms] [write delay ms] [offset:size]...
+# Original device is restored when both delay params are 0 (or missing).
+# If the size is missing, the remaing portion of device is taken
+# i.e. delay_dev "$dev1" 0 200 256:
+delay_dev() {
+ if test ! -f HAVE_DM_DELAY ; then
+ target_at_least dm-delay 1 1 0 || skip
+ fi
+ touch HAVE_DM_DELAY
+ common_dev_ delay "$@"
+}
+
disable_dev() {
local dev
local silent
@@ -433,8 +699,8 @@ disable_dev() {
echo "Disabling device $dev ($maj:$min)"
notify="$notify $maj:$min"
if test -n "$error"; then
- echo 0 10000000 error | dmsetup load $dev
- dmsetup resume $dev
+ echo 0 10000000 error | dmsetup load "$dev"
+ dmsetup resume "$dev"
else
dmsetup remove -f "$dev" 2>/dev/null || true
fi
@@ -455,7 +721,7 @@ enable_dev() {
shift
fi
- rm -f debug.log
+ rm -f debug.log strace.log
init_udev_transaction
for dev in "$@"; do
local name=$(echo "$dev" | sed -e 's,.*/,,')
@@ -477,46 +743,7 @@ enable_dev() {
# Original device table is replace with multiple lines
# i.e. error_dev "$dev1" 8:32 96:8
error_dev() {
- local dev=$1
- local name=$(echo "$dev" | sed -e 's,.*/,,')
- local fromlen
- local pos
- local size
- local type
- local pvdev
- local offset
- local silent
-
- read pos size type pvdev offset < $name.table
-
- shift
- rm -f $name.errtable
- for fromlen in "$@"; do
- from=${fromlen%%:*}
- len=${fromlen##*:}
- diff=$(($from - $pos))
- if test $diff -gt 0 ; then
- echo "$pos $diff $type $pvdev $(($pos + $offset))" >>$name.errtable
- pos=$(($pos + $diff))
- elif test $diff -lt 0 ; then
- die "Position error"
- fi
- echo "$from $len error" >>$name.errtable
- pos=$(($pos + $len))
- done
- diff=$(($size - $pos))
- test $diff -gt 0 && echo "$pos $diff $type $pvdev $(($pos + $offset))" >>$name.errtable
-
- init_udev_transaction
- if dmsetup table $name ; then
- dmsetup load "$name" "$name.errtable"
- else
- dmsetup create -u "TEST-$name" "$name" "$name.errtable"
- fi
- # using device name (since device path does not exists yet with udev)
- dmsetup resume "$name"
- finish_udev_transaction
- test -n "$silent" || notify_lvmetad "$dev"
+ common_dev_ error "$@"
}
backup_dev() {
@@ -545,7 +772,7 @@ prepare_pvs() {
prepare_vg() {
teardown_devs
- prepare_pvs "$@"
+ prepare_devs "$@"
vgcreate -s 512K $vg "${DEVICES[@]}"
}
@@ -558,7 +785,7 @@ extend_filter() {
}
extend_filter_LVMTEST() {
- extend_filter "a|$DM_DEV_DIR/LVMTEST|"
+ extend_filter "a|$DM_DEV_DIR/$PREFIX|"
}
hide_dev() {
@@ -578,7 +805,7 @@ unhide_dev() {
}
mkdev_md5sum() {
- rm -f debug.log
+ rm -f debug.log strace.log
mkfs.ext2 "$DM_DEV_DIR/$1/$2" || return 1
md5sum "$DM_DEV_DIR/$1/$2" > "md5.$1-$2"
}
@@ -594,6 +821,11 @@ generate_config() {
fi
LVM_TEST_LOCKING=${LVM_TEST_LOCKING:-1}
+ LVM_TEST_LVMETAD=${LVM_TEST_LVMETAD:-0}
+ LVM_TEST_LVMPOLLD=${LVM_TEST_LVMPOLLD:-0}
+ LVM_TEST_LVMLOCKD=${LVM_TEST_LVMLOCKD:-0}
+ LVM_TEST_LOCK_TYPE_SANLOCK=${LVM_TEST_LOCK_TYPE_SANLOCK:-0}
+ LVM_TEST_LOCK_TYPE_DLM=${LVM_TEST_LOCK_TYPE_DLM:-0}
if test "$DM_DEV_DIR" = "/dev"; then
LVM_VERIFY_UDEV=${LVM_VERIFY_UDEV:-0}
else
@@ -601,40 +833,49 @@ generate_config() {
fi
test -f "$config_values" || {
cat > "$config_values" <<-EOF
+activation/checks = 1
+activation/monitoring = 0
+activation/polling_interval = 0
+activation/retry_deactivation = 1
+activation/snapshot_autoextend_percent = 50
+activation/snapshot_autoextend_threshold = 50
+activation/udev_rules = 1
+activation/udev_sync = 1
+activation/verify_udev_operations = $LVM_VERIFY_UDEV
+allocation/wipe_signatures_when_zeroing_new_lvs = 0
+backup/archive = 0
+backup/backup = 0
+devices/cache_dir = "$TESTDIR/etc"
+devices/default_data_alignment = 1
devices/dir = "$DM_DEV_DIR"
-devices/scan = "$DM_DEV_DIR"
devices/filter = "a|.*|"
devices/global_filter = [ "a|$DM_DEV_DIR/mapper/.*pv[0-9_]*$|", "r|.*|" ]
-devices/cache_dir = "$TESTDIR/etc"
-devices/sysfs_scan = 0
-devices/default_data_alignment = 1
devices/md_component_detection = 0
-log/syslog = 0
-log/indent = 1
-log/level = 9
-log/file = "$TESTDIR/debug.log"
-log/overwrite = 1
-log/activation = 1
-log/verbose = 0
-activation/retry_deactivation = 1
-backup/backup = 0
-backup/archive = 0
+devices/scan = "$DM_DEV_DIR"
+devices/sysfs_scan = 1
global/abort_on_internal_errors = 1
+global/cache_check_executable = "$LVM_TEST_CACHE_CHECK_CMD"
+global/cache_dump_executable = "$LVM_TEST_CACHE_DUMP_CMD"
+global/cache_repair_executable = "$LVM_TEST_CACHE_REPAIR_CMD"
global/detect_internal_vg_cache_corruption = 1
+global/fallback_to_local_locking = 0
global/library_dir = "$TESTDIR/lib"
global/locking_dir = "$TESTDIR/var/lock/lvm"
global/locking_type=$LVM_TEST_LOCKING
global/si_unit_consistency = 1
-global/fallback_to_local_locking = 0
-activation/checks = 1
-activation/udev_sync = 1
-activation/udev_rules = 1
-activation/verify_udev_operations = $LVM_VERIFY_UDEV
-activation/polling_interval = 0
-activation/snapshot_autoextend_percent = 50
-activation/snapshot_autoextend_threshold = 50
-activation/monitoring = 0
-allocation/wipe_signatures_when_zeroing_new_lvs = 0
+global/thin_check_executable = "$LVM_TEST_THIN_CHECK_CMD"
+global/thin_dump_executable = "$LVM_TEST_THIN_DUMP_CMD"
+global/thin_repair_executable = "$LVM_TEST_THIN_REPAIR_CMD"
+global/use_lvmetad = $LVM_TEST_LVMETAD
+global/use_lvmpolld = $LVM_TEST_LVMPOLLD
+global/use_lvmlockd = $LVM_TEST_LVMLOCKD
+log/activation = 1
+log/file = "$TESTDIR/debug.log"
+log/indent = 1
+log/level = 9
+log/overwrite = 1
+log/syslog = 0
+log/verbose = 0
EOF
}
@@ -643,16 +884,43 @@ EOF
echo "$v"
done >> "$config_values"
- local s
- for s in $(cut -f1 -d/ "$config_values" | sort | uniq); do
- echo "$s {"
- local k
- for k in $(grep ^"$s"/ "$config_values" | cut -f1 -d= | sed -e 's, *$,,' | sort | uniq); do
- grep "^$k" "$config_values" | tail -n 1 | sed -e "s,^$s/, ,"
- done
- echo "}"
- echo
- done | tee "$config"
+ declare -A CONF 2>/dev/null || {
+ # Associative arrays is not available
+ local s
+ for s in $(cut -f1 -d/ "$config_values" | sort | uniq); do
+ echo "$s {"
+ local k
+ for k in $(grep ^"$s"/ "$config_values" | cut -f1 -d= | sed -e 's, *$,,' | sort | uniq); do
+ grep "^$k" "$config_values" | tail -n 1 | sed -e "s,^$s/, ,"
+ done
+ echo "}"
+ echo
+ done | tee "$config" | sed -e "s,^,## LVMCONF: ,"
+ return 0
+ }
+
+ local sec
+ local last_sec
+
+ # read sequential list and put into associative array
+ while IFS=$IFS_NL read -r v; do
+ # trim white-space-chars via echo when inserting
+ CONF[$(echo ${v%%[={]*})]=${v#*/}
+ done < "$config_values"
+
+ # sort by section and iterate through them
+ printf "%s\n" ${!CONF[@]} | sort | while read -r v ; do
+ sec=${v%%/*} # split on section'/'param_name
+ test "$sec" = "$last_sec" || {
+ test -z "$last_sec" || echo "}"
+ echo "$sec {"
+ last_sec=$sec
+ }
+ echo " ${CONF[$v]}"
+ done > "$config"
+ echo "}" >> "$config"
+
+ sed -e "s,^,## LVMCONF: ," "$config"
}
lvmconf() {
@@ -665,28 +933,21 @@ profileconf() {
profile_name="$1"
shift
generate_config "$@"
- test -d etc/profile || mkdir etc/profile
+ mkdir -p etc/profile
mv -f "PROFILE_$profile_name" "etc/profile/$profile_name.profile"
}
prepare_profiles() {
- test -d etc/profile || mkdir etc/profile
+ mkdir -p etc/profile
for profile_name in $@; do
- test -L "$abs_top_builddir/test/lib/$profile_name.profile" || skip
- cp "$abs_top_builddir/test/lib/$profile_name.profile" "etc/profile/$profile_name.profile"
+ test -L "lib/$profile_name.profile" || skip
+ cp "lib/$profile_name.profile" "etc/profile/$profile_name.profile"
done
}
apitest() {
- local t=$1
- shift
- test -x "$abs_top_builddir/test/api/$t.t" || skip
- "$abs_top_builddir/test/api/$t.t" "$@" && rm -f debug.log
-}
-
-api() {
- test -x "$abs_top_builddir/test/api/wrapper" || skip
- "$abs_top_builddir/test/api/wrapper" "$@" && rm -f debug.log
+ test -x "$TESTOLDPWD/api/$1.t" || skip
+ "$TESTOLDPWD/api/$1.t" "${@:2}" && rm -f debug.log strace.log
}
mirror_recovery_works() {
@@ -773,7 +1034,7 @@ version_at_least() {
local major
local minor
local revision
- IFS=. read -r major minor revision <<< "$1"
+ IFS=".-" read -r major minor revision <<< "$1"
shift
test -z "$1" && return 0
@@ -797,7 +1058,7 @@ version_at_least() {
#
# i.e. dm_target_at_least dm-thin-pool 1 0
target_at_least() {
- rm -f debug.log
+ rm -f debug.log strace.log
case "$1" in
dm-*) modprobe "$1" || true ;;
esac
@@ -810,39 +1071,67 @@ target_at_least() {
local version=$(dmsetup targets 2>/dev/null | grep "${1##dm-} " 2>/dev/null)
version=${version##* v}
- shift
- version_at_least "$version" "$@"
+ version_at_least "$version" "${@:2}" || {
+ echo "Found $1 version $version, but requested ${*:2}." >&2
+ return 1
+ }
}
have_thin() {
- test "$THIN" = shared -o "$THIN" = internal || return 1
- target_at_least dm-thin-pool "$@" || return 1
+ test "$THIN" = shared -o "$THIN" = internal || {
+ echo "Thin is not built-in." >&2
+ return 1;
+ }
+ target_at_least dm-thin-pool "$@"
+ declare -a CONF
# disable thin_check if not present in system
- test -x "$LVM_TEST_THIN_CHECK_CMD" || LVM_TEST_THIN_CHECK_CMD=""
- test -x "$LVM_TEST_THIN_DUMP_CMD" || LVM_TEST_THIN_DUMP_CMD=""
- test -x "$LVM_TEST_THIN_REPAIR_CMD" || LVM_TEST_THIN_REPAIR_CMD=""
- lvmconf "global/thin_check_executable = \"$LVM_TEST_THIN_CHECK_CMD\"" \
- "global/thin_dump_executable = \"$LVM_TEST_THIN_DUMP_CMD\"" \
- "global/thin_repair_executable = \"$LVM_TEST_THIN_REPAIR_CMD\""
+ if test -n "$LVM_TEST_THIN_CHECK_CMD" -a ! -x "$LVM_TEST_THIN_CHECK_CMD" ; then
+ CONF[0]="global/thin_check_executable = \"\""
+ fi
+ if test -n "$LVM_TEST_THIN_DUMP_CMD" -a ! -x "$LVM_TEST_THIN_DUMP_CMD" ; then
+ CONF[1]="global/thin_dump_executable = \"\""
+ fi
+ if test -n "$LVM_TEST_THIN_REPAIR_CMD" -a ! -x "$LVM_TEST_THIN_REPAIR_CMD" ; then
+ CONF[2]="global/thin_repair_executable = \"\""
+ fi
+ if test ${#CONF[@]} -ne 0 ; then
+ echo "TEST WARNING: Reconfiguring ${CONF[@]}"
+ lvmconf "${CONF[@]}"
+ fi
}
have_raid() {
- test "$RAID" = shared -o "$RAID" = internal || return 1
+ test "$RAID" = shared -o "$RAID" = internal || {
+ echo "Raid is not built-in." >&2
+ return 1;
+ }
target_at_least dm-raid "$@"
}
have_cache() {
- test "$CACHE" = shared -o "$CACHE" = internal || return 1
+ test "$CACHE" = shared -o "$CACHE" = internal || {
+ echo "Cache is not built-in." >&2
+ return 1;
+ }
target_at_least dm-cache "$@"
- test -x "$LVM_TEST_CACHE_CHECK_CMD" || LVM_TEST_CACHE_CHECK_CMD=""
- test -x "$LVM_TEST_CACHE_DUMP_CMD" || LVM_TEST_CACHE_DUMP_CMD=""
- test -x "$LVM_TEST_CACHE_REPAIR_CMD" || LVM_TEST_CACHE_REPAIR_CMD=""
- lvmconf "global/cache_check_executable = \"$LVM_TEST_CACHE_CHECK_CMD\"" \
- "global/cache_dump_executable = \"$LVM_TEST_CACHE_DUMP_CMD\"" \
- "global/cache_repair_executable = \"$LVM_TEST_CACHE_REPAIR_CMD\""
+ declare -a CONF
+ # disable cache_check if not present in system
+ if test -n "$LVM_TEST_CACHE_CHECK_CMD" -a ! -x "$LVM_TEST_CACHE_CHECK_CMD" ; then
+ CONF[0]="global/cache_check_executable = \"\""
+ fi
+ if test -n "$LVM_TEST_CACHE_DUMP_CMD" -a ! -x "$LVM_TEST_CACHE_DUMP_CMD" ; then
+ CONF[1]="global/cache_dump_executable = \"\""
+ fi
+ if test -n "$LVM_TEST_CACHE_REPAIR_CMD" -a ! -x "$LVM_TEST_CACHE_REPAIR_CMD" ; then
+ CONF[2]="global/cache_repair_executable = \"\""
+ fi
+ if test ${#CONF[@]} -ne 0 ; then
+ echo "TEST WARNING: Reconfiguring ${CONF[@]}"
+ lvmconf "${CONF[@]}"
+ fi
}
have_tool_at_least() {
@@ -863,6 +1152,72 @@ dmsetup_wrapped() {
dmsetup "$@"
}
+awk_parse_init_count_in_lvmpolld_dump() {
+ printf '%s' \
+ \
+ $'BEGINFILE { x=0; answ=0; FS="="; key="[[:space:]]*"vkey }' \
+ $'{' \
+ $'if (/.*{$/) { x++ }' \
+ $'else if (/.*}$/) { x-- }' \
+ $'else if ( x == 2 && $1 ~ key) { value=substr($2, 2); value=substr(value, 1, length(value) - 1); }' \
+ $'if ( x == 2 && value == vvalue && $1 ~ /[[:space:]]*init_requests_count/) { answ=$2 }' \
+ $'if (answ > 0) { exit 0 }' \
+ $'}' \
+ $'END { printf "%d", answ }'
+}
+
+check_lvmpolld_init_rq_count() {
+ local ret=$(awk -v vvalue="$2" -v vkey=${3:-lvname} "$(awk_parse_init_count_in_lvmpolld_dump)" lvmpolld_dump.txt)
+ test $ret -eq $1 || {
+ echo "check_lvmpolld_init_rq_count failed. Expected $1, got $ret"
+ return 1
+ }
+}
+
+wait_pvmove_lv_ready() {
+ # given sleep .1 this is about 60 secs of waiting
+ local retries=${2:-300}
+
+ if [ -e LOCAL_LVMPOLLD ]; then
+ local lvid
+ while : ; do
+ test $retries -le 0 && die "Waiting for lvmpolld timed out"
+ test -n "$lvid" || {
+ lvid=$(get lv_field ${1//-/\/} vg_uuid,lv_uuid -a 2>/dev/null)
+ lvid=${lvid//\ /}
+ lvid=${lvid//-/}
+ }
+ test -z "$lvid" || {
+ lvmpolld_dump > lvmpolld_dump.txt
+ ! check_lvmpolld_init_rq_count 1 $lvid lvid || break;
+ }
+ sleep .1
+ retries=$((retries-1))
+ done
+ else
+ while : ; do
+ test $retries -le 0 && die "Waiting for pvmove LV to get activated has timed out"
+ dmsetup info -c -o tables_loaded $1 > out 2>/dev/null|| true;
+ not grep Live out >/dev/null || break
+ sleep .1
+ retries=$((retries-1))
+ done
+ fi
+}
+
+# return total memory size in kB units
+total_mem() {
+ while IFS=":" read -r a b ; do
+ case "$a" in MemTotal*) echo ${b%% kB} ; break ;; esac
+ done < /proc/meminfo
+}
+
+kernel_at_least() {
+ version_at_least "$(uname -r)" "$@"
+}
+
+test -z "$LVM_TEST_AUX_TRACE" || set -x
+
test -f DEVICES && devs=$(< DEVICES)
if test "$1" = dmsetup; then
diff --git a/test/lib/brick-shelltest.h b/test/lib/brick-shelltest.h
new file mode 100644
index 000000000..b29e626e9
--- /dev/null
+++ b/test/lib/brick-shelltest.h
@@ -0,0 +1,1292 @@
+// -*- mode: C++; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+
+/*
+ * This brick allows you to build a test runner for shell-based functional
+ * tests. It comes with fairly elaborate features (although most are only
+ * available on posix systems), geared toward difficult-to-test software.
+ *
+ * It provides a full-featured "main" function (brick::shelltest::run) that you
+ * can use as a drop-in shell test runner.
+ *
+ * Features include:
+ * - interactive and batch-mode execution
+ * - collects test results and test logs in a simple text-based format
+ * - measures resource use of individual tests
+ * - rugged: suited for running in monitored virtual machines
+ * - supports test flavouring
+ */
+
+/*
+ * (c) 2014 Petr Ročkai <me@mornfall.net>
+ * (c) 2014 Red Hat, Inc.
+ */
+
+/* Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE. */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include <vector>
+#include <map>
+#include <deque>
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <sstream>
+#include <cassert>
+#include <iterator>
+#include <algorithm>
+#include <stdexcept>
+
+#include <dirent.h>
+
+#ifdef __unix
+#include <sys/stat.h>
+#include <sys/resource.h> /* rusage */
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/klog.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "configure.h"
+
+#ifndef BRICK_SHELLTEST_H
+#define BRICK_SHELLTEST_H
+
+namespace brick {
+namespace shelltest {
+
+/* TODO: remove this section in favour of brick-filesystem.h */
+
+inline std::runtime_error syserr( std::string msg, std::string ctx = "" ) {
+ return std::runtime_error( std::string( strerror( errno ) ) + " " + msg + " " + ctx );
+}
+
+struct dir {
+ DIR *d;
+ dir( std::string p ) {
+ d = opendir( p.c_str() );
+ if ( !d )
+ throw syserr( "error opening directory", p );
+ }
+ ~dir() { closedir( d ); }
+};
+
+typedef std::vector< std::string > Listing;
+
+inline void fsync_name( std::string n )
+{
+ int fd = open( n.c_str(), O_WRONLY );
+ if ( fd >= 0 ) {
+ fsync( fd );
+ close( fd );
+ }
+}
+
+inline Listing listdir( std::string p, bool recurse = false, std::string prefix = "" )
+{
+ Listing r;
+
+ dir d( p );
+ struct dirent entry, *iter = 0;
+ int readerr;
+
+ while ( (readerr = readdir_r( d.d, &entry, &iter )) == 0 && iter ) {
+ std::string ename( entry.d_name );
+
+ if ( ename == "." || ename == ".." )
+ continue;
+
+ if ( recurse ) {
+ struct stat64 stat;
+ std::string s = p + "/" + ename;
+ if ( ::stat64( s.c_str(), &stat ) == -1 )
+ continue;
+ if ( S_ISDIR(stat.st_mode) ) {
+ Listing sl = listdir( s, true, prefix + ename + "/" );
+ for ( Listing::iterator i = sl.begin(); i != sl.end(); ++i )
+ r.push_back( prefix + *i );
+ } else
+ r.push_back( prefix + ename );
+ } else
+ r.push_back( ename );
+ };
+
+ if ( readerr != 0 )
+ throw syserr( "error reading directory", p );
+
+ return r;
+}
+
+/* END remove this section */
+
+struct Journal {
+ enum R {
+ STARTED,
+ RETRIED,
+ UNKNOWN,
+ FAILED,
+ INTERRUPTED,
+ KNOWNFAIL,
+ PASSED,
+ SKIPPED,
+ TIMEOUT,
+ WARNED,
+ };
+
+ friend std::ostream &operator<<( std::ostream &o, R r ) {
+ switch ( r ) {
+ case STARTED: return o << "started";
+ case RETRIED: return o << "retried";
+ case FAILED: return o << "failed";
+ case INTERRUPTED: return o << "interrupted";
+ case PASSED: return o << "passed";
+ case SKIPPED: return o << "skipped";
+ case TIMEOUT: return o << "timeout";
+ case WARNED: return o << "warnings";
+ default: return o << "unknown";
+ }
+ }
+
+ friend std::istream &operator>>( std::istream &i, R &r ) {
+ std::string x;
+ i >> x;
+
+ r = UNKNOWN;
+ if ( x == "started" ) r = STARTED;
+ if ( x == "retried" ) r = RETRIED;
+ if ( x == "failed" ) r = FAILED;
+ if ( x == "interrupted" ) r = INTERRUPTED;
+ if ( x == "passed" ) r = PASSED;
+ if ( x == "skipped" ) r = SKIPPED;
+ if ( x == "timeout" ) r = TIMEOUT;
+ if ( x == "warnings" ) r = WARNED;
+ return i;
+ }
+
+ template< typename S, typename T >
+ friend std::istream &operator>>( std::istream &i, std::pair< S, T > &r ) {
+ return i >> r.first >> r.second;
+ }
+
+ typedef std::map< std::string, R > Status;
+ Status status, written;
+
+ std::string location, list;
+ int timeouts;
+
+ void append( std::string path ) {
+ std::ofstream of( path.c_str(), std::fstream::app );
+ Status::iterator writ;
+ for ( Status::iterator i = status.begin(); i != status.end(); ++i ) {
+ writ = written.find( i->first );
+ if ( writ == written.end() || writ->second != i->second )
+ of << i->first << " " << i->second << std::endl;
+ }
+ written = status;
+ of.close();
+ }
+
+ void write( std::string path ) {
+ std::ofstream of( path.c_str() );
+ for ( Status::iterator i = status.begin(); i != status.end(); ++i )
+ of << i->first << " " << i->second << std::endl;
+ of.close();
+ }
+
+ void sync() {
+ append( location );
+ fsync_name( location );
+ write ( list );
+ fsync_name( list );
+ }
+
+ void started( std::string n ) {
+ if ( status.count( n ) && status[ n ] == STARTED )
+ status[ n ] = RETRIED;
+ else
+ status[ n ] = STARTED;
+ sync();
+ }
+
+ void done( std::string n, R r ) {
+ status[ n ] = r;
+ if ( r == TIMEOUT )
+ ++ timeouts;
+ else
+ timeouts = 0;
+ sync();
+ }
+
+ bool done( std::string n ) {
+ if ( !status.count( n ) )
+ return false;
+ return status[ n ] != STARTED && status[ n ] != INTERRUPTED;
+ }
+
+ int count( R r ) {
+ int c = 0;
+ for ( Status::iterator i = status.begin(); i != status.end(); ++i )
+ if ( i->second == r )
+ ++ c;
+ return c;
+ }
+
+ void banner() {
+ std::cout << std::endl << "### " << status.size() << " tests: "
+ << count( PASSED ) << " passed, "
+ << count( SKIPPED ) << " skipped, "
+ << count( TIMEOUT ) + count( WARNED ) << " broken, "
+ << count( FAILED ) << " failed" << std::endl;
+ }
+
+ void details() {
+ for ( Status::iterator i = status.begin(); i != status.end(); ++i )
+ if ( i->second != PASSED )
+ std::cout << i->second << ": " << i->first << std::endl;
+ }
+
+ void read( std::string n ) {
+ std::ifstream ifs( n.c_str() );
+ typedef std::istream_iterator< std::pair< std::string, R > > It;
+ for ( It i( ifs ); i != It(); ++i )
+ status[ i->first ] = i->second;
+ }
+
+ void read() { read( location ); }
+
+ Journal( std::string dir )
+ : location( dir + "/journal" ),
+ list( dir + "/list" ),
+ timeouts( 0 )
+ {}
+};
+
+struct TimedBuffer {
+ typedef std::pair< time_t, std::string > Line;
+
+ std::deque< Line > data;
+ Line incomplete;
+ bool stamp;
+
+ Line shift( bool force = false ) {
+ Line result = std::make_pair( 0, "" );
+ if ( force && data.empty() )
+ std::swap( result, incomplete );
+ else {
+ result = data.front();
+ data.pop_front();
+ }
+ return result;
+ }
+
+ void push( std::string buf ) {
+ time_t now = stamp ? time( 0 ) : 0;
+ std::string::iterator b = buf.begin(), e = buf.begin();
+
+ while ( e != buf.end() )
+ {
+ e = std::find( b, buf.end(), '\n' );
+ incomplete.second += std::string( b, e );
+
+ if ( !incomplete.first )
+ incomplete.first = now;
+
+ if ( e != buf.end() ) {
+ incomplete.second += "\n";
+ data.push_back( incomplete );
+ if (incomplete.second[0] == '#') {
+ /* Disable timing between '## 0 STACKTRACE' & '## teardown' keywords */
+ if (incomplete.second.find("# 0 STACKTRACE", 1) != std::string::npos ||
+ incomplete.second.find("# timing off", 1) != std::string::npos) {
+ stamp = false;
+ now = 0;
+ } else if (incomplete.second.find("# teardown", 1) != std::string::npos ||
+ incomplete.second.find("# timing on", 1) != std::string::npos) {
+ stamp = true;
+ now = time( 0 );
+ }
+ }
+ incomplete = std::make_pair( now, "" );
+ }
+ b = (e == buf.end() ? e : e + 1);
+ }
+ }
+
+ bool empty( bool force = false ) {
+ if ( force && !incomplete.second.empty() )
+ return false;
+ return data.empty();
+ }
+
+ TimedBuffer() : stamp(true) {}
+};
+
+struct Sink {
+ virtual void outline( bool ) {}
+ virtual void push( std::string x ) = 0;
+ virtual void sync( bool ) {}
+ virtual ~Sink() {}
+};
+
+struct Substitute {
+ typedef std::map< std::string, std::string > Map;
+ std::string testdir; // replace testdir first
+ std::string prefix;
+
+ std::string map( std::string line ) {
+ if ( std::string( line, 0, 9 ) == "@TESTDIR=" )
+ testdir = std::string( line, 9, line.length() - 10 ); // skip \n
+ else if ( std::string( line, 0, 8 ) == "@PREFIX=" )
+ prefix = std::string( line, 8, line.length() - 9 ); // skip \n
+ else {
+ size_t off;
+ if (!testdir.empty())
+ while ( (off = line.find( testdir )) != std::string::npos )
+ line.replace( off, testdir.length(), "@TESTDIR@" );
+
+ if (!prefix.empty())
+ while ( (off = line.find( prefix )) != std::string::npos )
+ line.replace( off, prefix.length(), "@PREFIX@" );
+ }
+ return line;
+ }
+};
+
+struct Format {
+ time_t start;
+ Substitute subst;
+
+ std::string format( TimedBuffer::Line l ) {
+ std::stringstream result;
+ if ( l.first >= start ) {
+ time_t rel = l.first - start;
+ result << "[" << std::setw( 2 ) << std::setfill( ' ' ) << rel / 60
+ << ":" << std::setw( 2 ) << std::setfill( '0' ) << rel % 60 << "] ";
+ }
+ result << subst.map( l.second );
+ return result.str();
+ }
+
+ Format() : start( time( 0 ) ) {}
+};
+
+struct BufSink : Sink {
+ TimedBuffer data;
+ Format fmt;
+
+ virtual void push( std::string x ) {
+ data.push( x );
+ }
+
+ void dump( std::ostream &o ) {
+ o << std::endl;
+ while ( !data.empty( true ) )
+ o << "| " << fmt.format( data.shift( true ) );
+ }
+};
+
+struct FdSink : Sink {
+ int fd;
+
+ TimedBuffer stream;
+ Format fmt;
+ bool killed;
+
+ virtual void outline( bool force )
+ {
+ TimedBuffer::Line line = stream.shift( force );
+ std::string out = fmt.format( line );
+ write( fd, out.c_str(), out.length() );
+ }
+
+ virtual void sync( bool force ) {
+ if ( killed )
+ return;
+ while ( !stream.empty( force ) )
+ outline( force );
+ }
+
+ virtual void push( std::string x ) {
+ if ( !killed )
+ stream.push( x );
+ }
+
+ FdSink( int _fd ) : fd( _fd ), killed( false ) {}
+};
+
+struct FileSink : FdSink {
+ std::string file;
+ FileSink( std::string n ) : FdSink( -1 ), file( n ) {}
+
+ void sync( bool force ) {
+ if ( fd < 0 && !killed ) {
+#ifdef O_CLOEXEC
+ fd = open( file.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644 );
+#else
+ fd = open( file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644 );
+ if ( fcntl( fd, F_SETFD, FD_CLOEXEC ) < 0 )
+ perror("failed to set FD_CLOEXEC on file");
+#endif
+ if ( fd < 0 )
+ killed = true;
+ }
+ FdSink::sync( force );
+ }
+
+ ~FileSink() {
+ if ( fd >= 0 ) {
+ fsync( fd );
+ close( fd );
+ }
+ }
+};
+
+#define BRICK_SYSLOG_ACTION_READ_CLEAR 4
+#define BRICK_SYSLOG_ACTION_CLEAR 5
+
+struct Source {
+ int fd;
+
+ virtual void sync( Sink *sink ) {
+ ssize_t sz;
+ char buf[ 128 * 1024 ];
+ if ( (sz = read(fd, buf, sizeof(buf) - 1)) > 0 )
+ sink->push( std::string( buf, sz ) );
+
+ /*
+ * On RHEL5 box this code busy-loops here, while
+ * parent process no longer writes anything.
+ *
+ * Unclear why 'select()' is anouncing available
+ * data, while we read 0 bytes with errno == 0.
+ *
+ * Temporarily resolved with usleep() instead of loop.
+ */
+ if (!sz && (!errno || errno == EINTR))
+ usleep(50000);
+
+ if ( sz < 0 && errno != EAGAIN )
+ throw syserr( "reading pipe" );
+ }
+
+ virtual void reset() {}
+
+ virtual int fd_set_( fd_set *set ) {
+ if ( fd >= 0 ) {
+ FD_SET( fd, set );
+ return fd;
+ } else
+ return -1;
+ }
+
+ Source( int _fd = -1 ) : fd( _fd ) {}
+ virtual ~Source() {
+ if ( fd >= 0 )
+ ::close( fd );
+ }
+};
+
+struct FileSource : Source {
+ std::string file;
+ FileSource( std::string n ) : Source( -1 ), file( n ) {}
+
+ int fd_set_( ::fd_set * ) { return -1; } /* reading a file is always non-blocking */
+ void sync( Sink *s ) {
+ if ( fd < 0 ) {
+#ifdef O_CLOEXEC
+ fd = open( file.c_str(), O_RDONLY | O_CLOEXEC | O_NONBLOCK );
+#else
+ fd = open( file.c_str(), O_RDONLY | O_NONBLOCK );
+ if ( fcntl( fd, F_SETFD, FD_CLOEXEC ) < 0 )
+ perror("failed to set FD_CLOEXEC on file");
+#endif
+ if ( fd >= 0 )
+ lseek( fd, 0, SEEK_END );
+ }
+ if ( fd >= 0 )
+ Source::sync( s );
+ }
+};
+
+struct KMsg : Source {
+ bool can_clear;
+
+ KMsg() : can_clear( strcmp(getenv("LVM_TEST_CAN_CLOBBER_DMESG") ? : "0", "0") ) {
+#ifdef __unix
+ if ( (fd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK)) < 0 ) {
+ if (errno != ENOENT) /* Older kernels (<3.5) do not support /dev/kmsg */
+ perror("opening /dev/kmsg");
+ if ( klogctl( BRICK_SYSLOG_ACTION_CLEAR, 0, 0 ) < 0 )
+ can_clear = false;
+ } else if (lseek(fd, 0L, SEEK_END) == (off_t) -1)
+ perror("lseek /dev/kmsg");
+#endif
+ }
+
+ bool dev_kmsg() {
+ return fd >= 0;
+ }
+
+ void sync( Sink *s ) {
+#ifdef __unix
+ ssize_t sz;
+ char buf[ 128 * 1024 ];
+
+ if ( dev_kmsg() ) {
+ while ( (sz = ::read(fd, buf, sizeof(buf) - 1)) > 0 )
+ s->push( std::string( buf, sz ) );
+ } else if ( can_clear ) {
+ while ( (sz = klogctl( BRICK_SYSLOG_ACTION_READ_CLEAR, buf,
+ sizeof(buf) - 1 )) > 0 )
+ s->push( std::string( buf, sz ) );
+ if ( sz < 0 && errno == EPERM )
+ can_clear = false;
+ }
+#endif
+ }
+};
+
+struct Observer : Sink {
+ TimedBuffer stream;
+
+ bool warnings;
+ Observer() : warnings( false ) {}
+
+ void push( std::string s ) {
+ stream.push( s );
+ }
+
+ void sync( bool force ) {
+ while ( !stream.empty( force ) ) {
+ TimedBuffer::Line line = stream.shift( force );
+ if ( line.second.find( "TEST WARNING" ) != std::string::npos )
+ warnings = true;
+ }
+ }
+};
+
+struct IO : Sink {
+ typedef std::vector< Sink* > Sinks;
+ typedef std::vector< Source* > Sources;
+
+ mutable Sinks sinks;
+ mutable Sources sources;
+
+ Observer *_observer;
+
+ virtual void push( std::string x ) {
+ for ( Sinks::iterator i = sinks.begin(); i != sinks.end(); ++i )
+ (*i)->push( x );
+ }
+
+ void sync( bool force ) {
+ for ( Sources::iterator i = sources.begin(); i != sources.end(); ++i )
+ (*i)->sync( this );
+
+ for ( Sinks::iterator i = sinks.begin(); i != sinks.end(); ++i )
+ (*i)->sync( force );
+ }
+
+ void close() {
+ for ( Sources::iterator i = sources.begin(); i != sources.end(); ++i )
+ delete *i;
+ sources.clear();
+ }
+
+ int fd_set_( fd_set *set ) {
+ int max = -1;
+
+ for ( Sources::iterator i = sources.begin(); i != sources.end(); ++i )
+ max = std::max( (*i)->fd_set_( set ), max );
+ return max + 1;
+ }
+
+ Observer &observer() { return *_observer; }
+
+ IO() {
+ clear();
+ }
+
+ /* a stealing copy constructor */
+ IO( const IO &io ) : sinks( io.sinks ), sources( io.sources ), _observer( io._observer )
+ {
+ io.sinks.clear();
+ io.sources.clear();
+ }
+
+ IO &operator= ( const IO &io ) {
+ this->~IO();
+ return *new (this) IO( io );
+ }
+
+ void clear( int to_push = 1 ) {
+ for ( Sinks::iterator i = sinks.begin(); i != sinks.end(); ++i )
+ delete *i;
+ sinks.clear();
+ if ( to_push )
+ sinks.push_back( _observer = new Observer );
+ }
+
+ ~IO() { close(); clear(0); }
+
+};
+
+namespace {
+pid_t kill_pid = 0;
+bool fatal_signal = false;
+bool interrupt = false;
+}
+
+struct Options {
+ bool verbose, batch, interactive, cont, fatal_timeouts, kmsg;
+ std::string testdir, outdir, workdir, heartbeat;
+ std::vector< std::string > flavours, filter, watch;
+ std::string flavour_envvar;
+ int timeout;
+ Options() : verbose( false ), batch( false ), interactive( false ),
+ cont( false ), fatal_timeouts( false ), kmsg( true ),
+ timeout( 180 ) {}
+};
+
+struct TestProcess
+{
+ std::string filename;
+ bool interactive;
+ int fd;
+
+ void exec() __attribute__ ((noreturn)) {
+ assert( fd >= 0 );
+ if ( !interactive ) {
+ int devnull = ::open( "/dev/null", O_RDONLY );
+ if ( devnull >= 0 ) { /* gcc really doesn't like to not have stdin */
+ dup2( devnull, STDIN_FILENO );
+ close( devnull );
+ } else
+ close( STDIN_FILENO );
+ dup2( fd, STDOUT_FILENO );
+ dup2( fd, STDERR_FILENO );
+ close( fd );
+ }
+
+ setpgid( 0, 0 );
+
+ execlp( "bash", "bash", "-noprofile", "-norc", filename.c_str(), NULL );
+ perror( "execlp" );
+ _exit( 202 );
+ }
+
+ TestProcess( std::string file )
+ : filename( file ), interactive( false ), fd( -1 )
+ {}
+};
+
+struct TestCase {
+ TestProcess child;
+ std::string name, flavour;
+ IO io;
+ BufSink *iobuf;
+
+ struct rusage usage;
+ int status;
+ bool timeout;
+ pid_t pid;
+
+ time_t start, end, silent_start, last_update, last_heartbeat;
+ Options options;
+
+ Journal *journal;
+
+ std::string pretty() {
+ if ( options.batch )
+ return flavour + ": " + name;
+ return "[" + flavour + "] " + name;
+ }
+
+ std::string id() {
+ return flavour + ":" + name;
+ }
+
+ void pipe() {
+ int fds[2];
+
+ if (socketpair( PF_UNIX, SOCK_STREAM, 0, fds )) {
+ perror("socketpair");
+ exit(201);
+ }
+
+#if 0
+ if (fcntl( fds[0], F_SETFL, O_NONBLOCK ) == -1) {
+ perror("fcntl on socket");
+ exit(202);
+ }
+#endif
+
+ io.sources.push_back( new Source( fds[0] ) );
+ child.fd = fds[1];
+ child.interactive = options.interactive;
+ }
+
+ bool monitor() {
+ end = time( 0 );
+
+ /* heartbeat */
+ if ( end - last_heartbeat >= 20 && !options.heartbeat.empty() ) {
+ std::ofstream hb( options.heartbeat.c_str(), std::fstream::app );
+ hb << ".";
+ hb.close();
+ fsync_name( options.heartbeat );
+ last_heartbeat = end;
+ }
+
+ if ( wait4(pid, &status, WNOHANG, &usage) != 0 ) {
+ io.sync( true );
+ return false;
+ }
+
+ /* kill off tests after a timeout silence */
+ if ( !options.interactive )
+ if ( end - silent_start > options.timeout ) {
+ kill( pid, SIGINT );
+ sleep( 5 ); /* wait a bit for a reaction */
+ if ( waitpid( pid, &status, WNOHANG ) == 0 ) {
+ system( "echo t > /proc/sysrq-trigger 2> /dev/null" );
+ kill( -pid, SIGKILL );
+ waitpid( pid, &status, 0 );
+ }
+ timeout = true;
+ io.sync( true );
+ return false;
+ }
+
+ struct timeval wait;
+ fd_set set;
+
+ FD_ZERO( &set );
+ int nfds = io.fd_set_( &set );
+ wait.tv_sec = 0;
+ wait.tv_usec = 500000; /* timeout 0.5s */
+
+ if ( !options.verbose && !options.interactive && !options.batch ) {
+ if ( end - last_update >= 1 ) {
+ progress( Update ) << tag( "running" ) << pretty() << " "
+ << end - start << std::flush;
+ last_update = end;
+ }
+ }
+ if ( select( nfds, &set, NULL, NULL, &wait ) > 0 ) {
+ silent_start = end; /* something happened */
+ io.sync( false );
+ }
+
+ return true;
+ }
+
+ std::string timefmt( time_t t ) {
+ std::stringstream ss;
+ ss << t / 60 << ":" << std::setw( 2 ) << std::setfill( '0' ) << t % 60;
+ return ss.str();
+ }
+
+ std::string rusage()
+ {
+ std::stringstream ss;
+ time_t wall = end - start, user = usage.ru_utime.tv_sec,
+ system = usage.ru_stime.tv_sec;
+ size_t rss = usage.ru_maxrss / 1024,
+ inb = usage.ru_inblock / 100,
+ outb = usage.ru_oublock / 100;
+
+ size_t inb_10 = inb % 10, outb_10 = outb % 10;
+ inb /= 10; outb /= 10;
+
+ ss << timefmt( wall ) << " wall " << timefmt( user ) << " user "
+ << timefmt( system ) << " sys " << std::setw( 3 ) << rss << "M RSS | "
+ << "IOPS: " << std::setw( 5 ) << inb << "." << inb_10 << "K in "
+ << std::setw( 5 ) << outb << "." << outb_10 << "K out";
+ return ss.str();
+ }
+
+ std::string tag( std::string n ) {
+ if ( options.batch )
+ return "## ";
+ size_t pad = n.length();
+ pad = (pad < 12) ? 12 - pad : 0;
+ return "### " + std::string( pad, ' ' ) + n + ": ";
+ }
+
+ std::string tag( Journal::R r ) {
+ std::stringstream s;
+ s << r;
+ return tag( s.str() );
+ }
+
+ enum P { First, Update, Last };
+
+ std::ostream &progress( P p = Last )
+ {
+ static struct : std::streambuf {} buf;
+ static std::ostream null(&buf);
+
+ if ( options.batch && p == First )
+ return std::cout;
+
+ if ( isatty( STDOUT_FILENO ) && !options.batch ) {
+ if ( p != First )
+ return std::cout << "\r";
+ return std::cout;
+ }
+
+ if ( p == Last )
+ return std::cout;
+
+ return null;
+ }
+
+ void parent()
+ {
+ ::close( child.fd );
+ setupIO();
+
+ journal->started( id() );
+ silent_start = start = time( 0 );
+
+ progress( First ) << tag( "running" ) << pretty() << std::flush;
+ if ( options.verbose || options.interactive )
+ progress() << std::endl;
+
+ while ( monitor() )
+ /* empty */ ;
+
+ Journal::R r = Journal::UNKNOWN;
+
+ if ( timeout ) {
+ r = Journal::TIMEOUT;
+ } else if ( WIFEXITED( status ) ) {
+ if ( WEXITSTATUS( status ) == 0 )
+ r = Journal::PASSED;
+ else if ( WEXITSTATUS( status ) == 200 )
+ r = Journal::SKIPPED;
+ else
+ r = Journal::FAILED;
+ } else if ( interrupt && WIFSIGNALED( status ) && WTERMSIG( status ) == SIGINT )
+ r = Journal::INTERRUPTED;
+ else
+ r = Journal::FAILED;
+
+ if ( r == Journal::PASSED && io.observer().warnings )
+ r = Journal::WARNED;
+
+ io.close();
+
+ if ( iobuf && ( r == Journal::FAILED || r == Journal::TIMEOUT ) )
+ iobuf->dump( std::cout );
+
+ journal->done( id(), r );
+
+ if ( options.batch ) {
+ int spaces = std::max( 64 - int(pretty().length()), 0 );
+ progress( Last ) << " " << std::string( spaces, '.' ) << " "
+ << std::left << std::setw( 9 ) << std::setfill( ' ' ) << r;
+ if ( r != Journal::SKIPPED )
+ progress( First ) << " " << rusage();
+ progress( Last ) << std::endl;
+ } else
+ progress( Last ) << tag( r ) << pretty() << std::endl;
+
+ io.clear();
+ }
+
+ void run() {
+ pipe();
+ pid = kill_pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ exit(201);
+ } else if (pid == 0) {
+ io.close();
+ chdir( options.workdir.c_str() );
+ if ( !options.flavour_envvar.empty() )
+ setenv( options.flavour_envvar.c_str(), flavour.c_str(), 1 );
+ child.exec();
+ } else {
+ parent();
+ }
+ }
+
+ void setupIO() {
+ iobuf = 0;
+ if ( options.verbose || options.interactive )
+ io.sinks.push_back( new FdSink( 1 ) );
+ else if ( !options.batch )
+ io.sinks.push_back( iobuf = new BufSink() );
+
+ std::string n = id();
+ std::replace( n.begin(), n.end(), '/', '_' );
+ std::string fn = options.outdir + "/" + n + ".txt";
+ io.sinks.push_back( new FileSink( fn ) );
+
+ for ( std::vector< std::string >::iterator i = options.watch.begin();
+ i != options.watch.end(); ++i )
+ io.sources.push_back( new FileSource( *i ) );
+ if ( options.kmsg )
+ io.sources.push_back( new KMsg );
+ }
+
+ TestCase( Journal &j, Options opt, std::string path, std::string _name, std::string _flavour )
+ : child( path ), name( _name ), flavour( _flavour ), timeout( false ),
+ last_update( 0 ), last_heartbeat( 0 ), options( opt ), journal( &j )
+ {
+ }
+};
+
+struct Main {
+ bool die;
+ time_t start;
+
+ typedef std::vector< TestCase > Cases;
+ typedef std::vector< std::string > Flavours;
+
+ Journal journal;
+ Options options;
+ Cases cases;
+
+ void setup() {
+ Listing l = listdir( options.testdir, true );
+ std::sort( l.begin(), l.end() );
+
+ for ( Flavours::iterator flav = options.flavours.begin();
+ flav != options.flavours.end(); ++flav ) {
+
+ for ( Listing::iterator i = l.begin(); i != l.end(); ++i ) {
+ if ( ( i->length() < 3 ) || ( i->substr( i->length() - 3, i->length() ) != ".sh" ) )
+ continue;
+ if ( i->substr( 0, 4 ) == "lib/" )
+ continue;
+ bool filter = !options.filter.empty();
+
+ for ( std::vector< std::string >::iterator filt = options.filter.begin();
+ filt != options.filter.end(); ++filt ) {
+ if ( i->find( *filt ) != std::string::npos )
+ filter = false;
+ }
+ if ( filter )
+ continue;
+ cases.push_back( TestCase( journal, options, options.testdir + *i, *i, *flav ) );
+ cases.back().options = options;
+ }
+ }
+
+ if ( options.cont )
+ journal.read();
+ else
+ ::unlink( journal.location.c_str() );
+ }
+
+ int run() {
+ setup();
+ start = time( 0 );
+ std::cerr << "running " << cases.size() << " tests" << std::endl;
+
+ for ( Cases::iterator i = cases.begin(); i != cases.end(); ++i ) {
+
+ if ( options.cont && journal.done( i->id() ) )
+ continue;
+
+ i->run();
+
+ if ( options.fatal_timeouts && journal.timeouts >= 2 ) {
+ journal.started( i->id() ); // retry the test on --continue
+ std::cerr << "E: Hit 2 timeouts in a row with --fatal-timeouts" << std::endl;
+ std::cerr << "Suspending (please restart the VM)." << std::endl;
+ sleep( 3600 );
+ die = 1;
+ }
+
+ if ( time(0) - start > 3 * 3600 ) {
+ std::cerr << "3 hours passed, giving up..." << std::endl;
+ die = 1;
+ }
+
+ if ( die || fatal_signal )
+ break;
+ }
+
+ journal.banner();
+ if ( die || fatal_signal )
+ return 1;
+
+ return journal.count( Journal::FAILED ) ? 1 : 0;
+ }
+
+ Main( Options o ) : die( false ), journal( o.outdir ), options( o ) {}
+};
+
+namespace {
+
+void handler( int sig ) {
+ signal( sig, SIG_DFL ); /* die right away next time */
+ if ( kill_pid > 0 )
+ kill( -kill_pid, sig );
+ fatal_signal = true;
+ if ( sig == SIGINT )
+ interrupt = true;
+}
+
+void setup_handlers() {
+ /* set up signal handlers */
+ for ( int i = 0; i <= 32; ++i )
+ switch (i) {
+ case SIGCHLD: case SIGWINCH: case SIGURG:
+ case SIGKILL: case SIGSTOP: break;
+ default: signal(i, handler);
+ }
+}
+
+}
+
+/* TODO remove in favour of brick-commandline.h */
+struct Args {
+ typedef std::vector< std::string > V;
+ V args;
+
+ Args( int argc, const char **argv ) {
+ for ( int i = 1; i < argc; ++ i )
+ args.push_back( argv[ i ] );
+ }
+
+ bool has( std::string fl ) {
+ return std::find( args.begin(), args.end(), fl ) != args.end();
+ }
+
+ // TODO: This does not handle `--option=VALUE`:
+ std::string opt( std::string fl ) {
+ V::iterator i = std::find( args.begin(), args.end(), fl );
+ if ( i == args.end() || i + 1 == args.end() )
+ return "";
+ return *(i + 1);
+ }
+};
+
+namespace {
+
+bool hasenv( const char *name ) {
+ const char *v = getenv( name );
+ if ( !v )
+ return false;
+ if ( strlen( v ) == 0 || !strcmp( v, "0" ) )
+ return false;
+ return true;
+}
+
+template< typename C >
+void split( std::string s, C &c ) {
+ std::stringstream ss( s );
+ std::string item;
+ while ( std::getline( ss, item, ',' ) )
+ c.push_back( item );
+}
+
+}
+
+const char *DEF_FLAVOURS="ndev-vanilla";
+
+std::string resolve_path(std::string a_path, const char *default_path=".")
+{
+ char temp[PATH_MAX];
+ const char *p;
+ p = a_path.empty() ? default_path : a_path.c_str();
+ if ( !realpath( p, temp ) )
+ throw syserr( "Failed to resolve path", p );
+ return temp;
+}
+
+static int run( int argc, const char **argv, std::string fl_envvar = "TEST_FLAVOUR" )
+{
+ Args args( argc, argv );
+ Options opt;
+
+ if ( args.has( "--help" ) ) {
+ std::cout <<
+ " lvm2-testsuite - Run a lvm2 testsuite.\n\n"
+ "lvm2-testsuite"
+ "\n\t"
+ " [--flavours FLAVOURS]"
+ " [--only TESTS]"
+ "\n\t"
+ " [--outdir OUTDIR]"
+ " [--testdir TESTDIR]"
+ " [--workdir WORKDIR]"
+ "\n\t"
+ " [--batch|--verbose|--interactive]"
+ "\n\t"
+ " [--fatal-timeouts]"
+ " [--continue]"
+ " [--heartbeat]"
+ " [--watch WATCH]"
+ " [--timeout TIMEOUT]"
+ " [--nokmsg]\n\n"
+ /* TODO: list of flavours:
+ "lvm2-testsuite"
+ "\n\t"
+ " --list-flavours [--testdir TESTDIR]"
+ */
+ "\n\n"
+ "OPTIONS:\n\n"
+ // TODO: looks like this could be worth a man page...
+ "Filters:\n"
+ " --flavours FLAVOURS\n\t\t- comma separated list of flavours to run.\n\t\t For the list of flavours see `$TESTDIR/lib/flavour-*`.\n\t\t Default: \"" << DEF_FLAVOURS << "\".\n"
+ " --only TESTS\t- comma separated list of tests to run. Default: All tests.\n"
+ "\n"
+ "Directories:\n"
+ " --testdir TESTDIR\n\t\t- directory where tests reside. Default: \"" TESTSUITE_DATA "\".\n"
+ " --workdir WORKDIR\n\t\t- directory to change to when running tests.\n\t\t This is directory containing testing libs. Default: TESTDIR.\n"
+ " --outdir OUTDIR\n\t\t- directory where all the output files should go. Default: \".\".\n"
+ "\n"
+ "Formatting:\n"
+ " --batch\t- Brief format for automated runs.\n"
+ " --verbose\t- More verbose format for automated runs displaying progress on stdout.\n"
+ " --interactive\t- Verbose format for interactive runs.\n"
+ "\n"
+ "Other:\n"
+ " --fatal-timeouts\n\t\t- exit after encountering 2 timeouts in a row.\n"
+ " --continue\t- If set append to journal. Otherwise it will be overwritten.\n"
+ " --heartbeat HEARTBEAT\n\t\t- Name of file to update periodicaly while running.\n"
+ " --watch WATCH\t- Comma separated list of files to watch and print.\n"
+ " --timeout TIMEOUT\n\t\t- Period of silence in seconds considered a timeout. Default: 180.\n"
+ " --nokmsg\t- Do not try to read kernel messages.\n"
+ "\n\n"
+ "ENV.VARIABLES:\n\n"
+ " T\t\t- see --only\n"
+ " INTERACTIVE\t- see --interactive\n"
+ " VERBOSE\t- see --verbose\n"
+ " BATCH\t\t- see --batch\n"
+ " LVM_TEST_CAN_CLOBBER_DMESG\n\t\t- when set and non-empty tests are allowed to flush\n\t\t kmsg in an attempt to read it."
+ "\n\n"
+ "FORMATS:\n\n"
+ "When multiple formats are specified interactive overrides verbose\n"
+ "which overrides batch. Command line options override environment\n"
+ "variables.\n\n"
+ ;
+ return 0;
+ }
+
+ opt.flavour_envvar = fl_envvar;
+
+ if ( args.has( "--continue" ) )
+ opt.cont = true;
+
+ if ( args.has( "--only" ) )
+ split( args.opt( "--only" ), opt.filter );
+ else if ( hasenv( "T" ) )
+ split( getenv( "T" ), opt.filter );
+
+ if ( args.has( "--fatal-timeouts" ) )
+ opt.fatal_timeouts = true;
+
+ if ( args.has( "--heartbeat" ) )
+ opt.heartbeat = args.opt( "--heartbeat" );
+
+ if ( args.has( "--batch" ) || args.has( "--verbose" ) || args.has( "--interactive" ) ) {
+ if ( args.has( "--batch" ) ) {
+ opt.verbose = false;
+ opt.batch = true;
+ }
+
+ if ( args.has( "--verbose" ) ) {
+ opt.batch = false;
+ opt.verbose = true;
+ }
+
+ if ( args.has( "--interactive" ) ) {
+ opt.verbose = false;
+ opt.batch = false;
+ opt.interactive = true;
+ }
+ } else {
+ if ( hasenv( "BATCH" ) ) {
+ opt.verbose = false;
+ opt.batch = true;
+ }
+
+ if ( hasenv( "VERBOSE" ) ) {
+ opt.batch = false;
+ opt.verbose = true;
+ }
+
+ if ( hasenv( "INTERACTIVE" ) ) {
+ opt.verbose = false;
+ opt.batch = false;
+ opt.interactive = true;
+ }
+ }
+
+ if ( args.has( "--flavours" ) )
+ split( args.opt( "--flavours" ), opt.flavours );
+ else
+ split( DEF_FLAVOURS, opt.flavours );
+
+ if ( args.has( "--watch" ) )
+ split( args.opt( "--watch" ), opt.watch );
+
+ if ( args.has( "--timeout" ) )
+ opt.timeout = atoi( args.opt( "--timeout" ).c_str() );
+
+ if ( args.has( "--nokmsg" ) )
+ opt.kmsg = false;
+
+ opt.testdir = resolve_path( args.opt( "--testdir" ), TESTSUITE_DATA ) + "/";
+ opt.workdir = resolve_path( args.opt( "--workdir" ), opt.testdir.c_str() );
+ opt.outdir = resolve_path( args.opt( "--outdir" ), "." );
+
+ setup_handlers();
+
+ Main main( opt );
+ return main.run();
+}
+
+}
+}
+
+#endif
+
+#ifdef BRICK_DEMO
+
+int main( int argc, const char **argv ) {
+ return brick::shelltest::run( argc, argv );
+}
+
+#endif
+
+// vim: syntax=cpp tabstop=4 shiftwidth=4 expandtab
diff --git a/test/lib/check.sh b/test/lib/check.sh
index add8fb8f4..a6a9b3823 100644
--- a/test/lib/check.sh
+++ b/test/lib/check.sh
@@ -202,6 +202,10 @@ in_sync() {
# 6th argument is the sync ratio for RAID
idx=6
type=${a[3]}
+ if [ ${a[$(($idx + 1))]} != "idle" ]; then
+ echo "$lvm_name ($type$snap) is not in-sync"
+ return 1
+ fi
elif [ ${a[2]} = "mirror" ]; then
# 4th Arg tells us how far to the sync ratio
idx=$((${a[3]} + 4))
@@ -212,7 +216,7 @@ in_sync() {
b=( $(echo ${a[$idx]} | sed s:/:' ':) )
- if [ ${b[0]} != ${b[1]} ]; then
+ if [ ${b[0]} -eq 0 -o ${b[0]} != ${b[1]} ]; then
echo "$lvm_name ($type$snap) is not in-sync"
return 1
fi
@@ -222,7 +226,6 @@ in_sync() {
fi
echo "$lvm_name ($type$snap) is in-sync"
- return 0
}
active() {
@@ -365,6 +368,16 @@ dev_md5sum() {
die "LV $1/$2 has different MD5 check sum!")
}
+sysfs() {
+ # read maj min and also convert hex to decimal
+ local maj=$(($(stat -L --printf=0x%t "$1")))
+ local min=$(($(stat -L --printf=0x%T "$1")))
+ local P="/sys/dev/block/$maj:$min/$2"
+ local val=$(< "$P") || return 0 # no sysfs ?
+ test "$val" -eq "$3" || \
+ die "$1: $P = $val differs from expected value $3!"
+}
+
#set -x
unset LVM_VALGRIND
"$@"
diff --git a/test/lib/flavour-ndev-cluster-lvmpolld.sh b/test/lib/flavour-ndev-cluster-lvmpolld.sh
new file mode 100644
index 000000000..fe31bb4a9
--- /dev/null
+++ b/test/lib/flavour-ndev-cluster-lvmpolld.sh
@@ -0,0 +1,2 @@
+export LVM_TEST_LOCKING=3
+export LVM_TEST_LVMPOLLD=1
diff --git a/test/lib/flavour-ndev-cluster.sh b/test/lib/flavour-ndev-cluster.sh
new file mode 100644
index 000000000..362906952
--- /dev/null
+++ b/test/lib/flavour-ndev-cluster.sh
@@ -0,0 +1 @@
+export LVM_TEST_LOCKING=3
diff --git a/test/lib/flavour-ndev-lvmetad-lvmpolld.sh b/test/lib/flavour-ndev-lvmetad-lvmpolld.sh
new file mode 100644
index 000000000..496b3bcff
--- /dev/null
+++ b/test/lib/flavour-ndev-lvmetad-lvmpolld.sh
@@ -0,0 +1,3 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
+export LVM_TEST_LVMPOLLD=1
diff --git a/test/lib/flavour-ndev-lvmetad.sh b/test/lib/flavour-ndev-lvmetad.sh
new file mode 100644
index 000000000..cb3c49e80
--- /dev/null
+++ b/test/lib/flavour-ndev-lvmetad.sh
@@ -0,0 +1,2 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
diff --git a/test/lib/flavour-ndev-lvmpolld.sh b/test/lib/flavour-ndev-lvmpolld.sh
new file mode 100644
index 000000000..0a7070308
--- /dev/null
+++ b/test/lib/flavour-ndev-lvmpolld.sh
@@ -0,0 +1,2 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMPOLLD=1
diff --git a/test/lib/flavour-ndev-vanilla.sh b/test/lib/flavour-ndev-vanilla.sh
new file mode 100644
index 000000000..1899c948e
--- /dev/null
+++ b/test/lib/flavour-ndev-vanilla.sh
@@ -0,0 +1 @@
+export LVM_TEST_LOCKING=1
diff --git a/test/lib/flavour-udev-cluster-lvmpolld.sh b/test/lib/flavour-udev-cluster-lvmpolld.sh
new file mode 100644
index 000000000..abf76e9af
--- /dev/null
+++ b/test/lib/flavour-udev-cluster-lvmpolld.sh
@@ -0,0 +1,3 @@
+export LVM_TEST_LOCKING=3
+export LVM_TEST_LVMPOLLD=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-cluster.sh b/test/lib/flavour-udev-cluster.sh
new file mode 100644
index 000000000..a9025a618
--- /dev/null
+++ b/test/lib/flavour-udev-cluster.sh
@@ -0,0 +1,2 @@
+export LVM_TEST_LOCKING=3
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-lvmetad-lvmpolld.sh b/test/lib/flavour-udev-lvmetad-lvmpolld.sh
new file mode 100644
index 000000000..64253d1fa
--- /dev/null
+++ b/test/lib/flavour-udev-lvmetad-lvmpolld.sh
@@ -0,0 +1,4 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
+export LVM_TEST_LVMPOLLD=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-lvmetad.sh b/test/lib/flavour-udev-lvmetad.sh
new file mode 100644
index 000000000..13be32ea2
--- /dev/null
+++ b/test/lib/flavour-udev-lvmetad.sh
@@ -0,0 +1,3 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-lvmlockd-dlm.sh b/test/lib/flavour-udev-lvmlockd-dlm.sh
new file mode 100644
index 000000000..5bd274911
--- /dev/null
+++ b/test/lib/flavour-udev-lvmlockd-dlm.sh
@@ -0,0 +1,6 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
+export LVM_TEST_LVMPOLLD=1
+export LVM_TEST_LVMLOCKD=1
+export LVM_TEST_LOCK_TYPE_DLM=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-lvmlockd-sanlock.sh b/test/lib/flavour-udev-lvmlockd-sanlock.sh
new file mode 100644
index 000000000..859ee2e66
--- /dev/null
+++ b/test/lib/flavour-udev-lvmlockd-sanlock.sh
@@ -0,0 +1,6 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMETAD=1
+export LVM_TEST_LVMPOLLD=1
+export LVM_TEST_LVMLOCKD=1
+export LVM_TEST_LOCK_TYPE_SANLOCK=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-lvmpolld.sh b/test/lib/flavour-udev-lvmpolld.sh
new file mode 100644
index 000000000..c7f40b5a8
--- /dev/null
+++ b/test/lib/flavour-udev-lvmpolld.sh
@@ -0,0 +1,3 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_LVMPOLLD=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/flavour-udev-vanilla.sh b/test/lib/flavour-udev-vanilla.sh
new file mode 100644
index 000000000..ca778a6d8
--- /dev/null
+++ b/test/lib/flavour-udev-vanilla.sh
@@ -0,0 +1,2 @@
+export LVM_TEST_LOCKING=1
+export LVM_TEST_DEVDIR=/dev
diff --git a/test/lib/get.sh b/test/lib/get.sh
index b6fbf822e..ef1ddc4be 100644
--- a/test/lib/get.sh
+++ b/test/lib/get.sh
@@ -87,6 +87,10 @@ lv_tree_devices() {
lv_tree_devices_ "$@" | sort | uniq
}
+first_extent_sector() {
+ pv_field "$@" pe_start --units s --nosuffix
+}
+
#set -x
unset LVM_VALGRIND
"$@"
diff --git a/test/lib/inittest.sh b/test/lib/inittest.sh
index 731cd7afd..a5eeb57ab 100644
--- a/test/lib/inittest.sh
+++ b/test/lib/inittest.sh
@@ -29,33 +29,41 @@ TESTOLDPWD=$(pwd)
COMMON_PREFIX="LVMTEST"
PREFIX="${COMMON_PREFIX}$$"
-TESTDIR=$(mkdtemp "${LVM_TEST_DIR:-$TESTOLDPWD}" "$PREFIX.XXXXXXXXXX") || \
+if test -z "$LVM_TEST_DIR"; then LVM_TEST_DIR=$TMPDIR; fi
+TESTDIR=$(mkdtemp "${LVM_TEST_DIR:-/tmp}" "$PREFIX.XXXXXXXXXX") || \
die "failed to create temporary directory in ${LVM_TEST_DIR:-$TESTOLDPWD}"
-RUNNING_DMEVENTD=$(pgrep dmeventd) || true
+RUNNING_DMEVENTD=$(pgrep dmeventd || true)
export TESTOLDPWD TESTDIR COMMON_PREFIX PREFIX RUNNING_DMEVENTD
+export LVM_LOG_FILE_EPOCH=DEBUG
+export LVM_EXPECTED_EXIT_STATUS=1
test -n "$BASH" && trap 'set +vx; STACKTRACE; set -vx' ERR
trap 'aux teardown' EXIT # don't forget to clean up
cd "$TESTDIR"
+mkdir lib
+
+# Setting up symlink from $i to $TESTDIR/lib
+test -n "$abs_top_builddir" && \
+ find "$abs_top_builddir/daemons/dmeventd/plugins/" -name '*.so' \
+ -exec ln -s -t lib "{}" +
+find "$TESTOLDPWD/lib" ! \( -name '*.sh' -o -name '*.[cdo]' \
+ -o -name '*~' \) -exec ln -s -t lib "{}" +
if test -n "$LVM_TEST_FLAVOUR"; then
- touch flavour_overrides
- env | grep ^${LVM_TEST_FLAVOUR} | while read var; do
- (echo -n "export "; echo $var | sed -e s,^${LVM_TEST_FLAVOUR}_,,) >> flavour_overrides
- done
- . flavour_overrides
+ . lib/flavour-$LVM_TEST_FLAVOUR
fi
DM_DEFAULT_NAME_MANGLING_MODE=none
DM_DEV_DIR="$TESTDIR/dev"
LVM_SYSTEM_DIR="$TESTDIR/etc"
-mkdir "$LVM_SYSTEM_DIR" "$TESTDIR/lib" "$DM_DEV_DIR"
+mkdir "$LVM_SYSTEM_DIR" "$DM_DEV_DIR"
if test -n "$LVM_TEST_DEVDIR" ; then
+ test -d "$LVM_TEST_DEVDIR" || die "Test device directory LVM_TEST_DEVDIR=\"$LVM_TEST_DEVDIR\" is not valid."
DM_DEV_DIR=$LVM_TEST_DEVDIR
else
- mknod "$DM_DEV_DIR/testnull" c 1 3 || die "mknod failed";
+ mknod "$DM_DEV_DIR/testnull" c 1 3 || die "mknod failed"
echo >"$DM_DEV_DIR/testnull" || \
die "Filesystem does support devices in $DM_DEV_DIR (mounted with nodev?)"
mkdir "$DM_DEV_DIR/mapper"
@@ -70,33 +78,41 @@ echo "$TESTNAME" >TESTNAME
echo "Kernel is $(uname -a)"
# Report SELinux mode
-if which getenforce &>/dev/null ; then
- echo "Selinux mode is \"$(getenforce 2>/dev/null)\"."
-else
- echo "Selinux mode is not installed."
-fi
-
-# Setting up symlink from $i to $TESTDIR/lib
-find "$abs_top_builddir/daemons/dmeventd/plugins/" -name '*.so' \
- -exec ln -s -t lib "{}" +
-find "$abs_top_builddir/test/lib" ! \( -name '*.sh' -o -name '*.[cdo]' \
- -o -name '*~' \) -exec ln -s -t lib "{}" +
+echo "Selinux mode is $(getenforce 2>/dev/null || echo not installed)."
+free -m || true
# Set vars from utils now that we have TESTDIR/PREFIX/...
prepare_test_vars
test -n "$BASH" && set -eE -o pipefail
-aux lvmconf
-aux prepare_clvmd
-test -n "$LVM_TEST_LVMETAD" && {
+# Vars for harness
+echo "@TESTDIR=$TESTDIR"
+echo "@PREFIX=$PREFIX"
+
+if test -n "$LVM_TEST_LVMETAD" ; then
export LVM_LVMETAD_SOCKET="$TESTDIR/lvmetad.socket"
export LVM_LVMETAD_PIDFILE="$TESTDIR/lvmetad.pid"
aux prepare_lvmetad
+else
+ # lvmetad prepares its own lvmconf
+ aux lvmconf
+ aux prepare_clvmd
+fi
+
+test -n "$LVM_TEST_LVMPOLLD" && {
+ export LVM_LVMPOLLD_SOCKET="$TESTDIR/lvmpolld.socket"
+ export LVM_LVMPOLLD_PIDFILE="$TESTDIR/lvmpolld.pid"
+ aux prepare_lvmpolld
}
-# Vars for harness
-echo "@TESTDIR=$TESTDIR"
-echo "@PREFIX=$PREFIX"
+if test -n "$LVM_TEST_LVMLOCKD" ; then
+ if test -n "$LVM_TEST_LOCK_TYPE_SANLOCK" ; then
+ aux lvmconf 'local/host_id = 1'
+ fi
+ export SHARED="--shared"
+fi
+
+echo "<======== Processing test: \"$TESTNAME\" ========>"
set -vx
diff --git a/test/lib/lvm-wrapper.sh b/test/lib/lvm-wrapper.sh
index 082c6405b..178c45b58 100644
--- a/test/lib/lvm-wrapper.sh
+++ b/test/lib/lvm-wrapper.sh
@@ -24,16 +24,30 @@ test -z "$LVM_GDB" || exec gdb --readnow --args "$abs_top_builddir/tools/lvm" $C
# Multiple level of LVM_VALGRIND support
# the higher level the more commands are traced
if test -n "$LVM_VALGRIND"; then
- RUN_VALGRIND="aux run_valgrind";
- case "$CMD" in
- lvs|pvs|vgs|vgck|vgscan)
- test "$LVM_VALGRIND" -gt 2 || unset RUN_VALGRIND ;;
- pvcreate|pvremove|lvremove|vgcreate|vgremove)
- test "$LVM_VALGRIND" -gt 1 || unset RUN_VALGRIND ;;
- *)
- test "$LVM_VALGRIND" -gt 0 || unset RUN_VALGRIND ;;
- esac
+ RUN_DBG="${VALGRIND:-valgrind}";
fi
+if test -n "$LVM_STRACE"; then
+ RUN_DBG="strace $LVM_STRACE -o strace.log"
+fi
+
+case "$CMD" in
+ lvs|pvs|vgs|vgck|vgscan)
+ test ${LVM_DEBUG_LEVEL:-0} -lt 2 && RUN_DBG= ;;
+ pvcreate|pvremove|lvremove|vgcreate|vgremove)
+ test ${LVM_DEBUG_LEVEL:-0} -lt 1 && RUN_DBG= ;;
+esac
+
+# Capture parallel users of debug.log file
+#test -z "$(fuser debug.log 2>/dev/null)" || {
+# echo "TEST WARNING: \"debug.log\" is still in use while running $CMD $@" >&2
+# fuser -v debug.log >&2
+#}
+
# the exec is important, because otherwise fatal signals inside "not" go unnoticed
-exec $RUN_VALGRIND "$abs_top_builddir/tools/lvm" $CMD "$@"
+if test -n "$abs_top_builddir"; then
+ exec $RUN_DBG "$abs_top_builddir/tools/lvm" $CMD "$@"
+else # we are testing the lvm on $PATH
+ PATH=`echo $PATH | sed -e s,[^:]*lvm2-testsuite[^:]*:,,g`
+ exec $RUN_DBG lvm $CMD "$@"
+fi
diff --git a/test/lib/not.c b/test/lib/not.c
index 1cb12f915..93c115a27 100644
--- a/test/lib/not.c
+++ b/test/lib/not.c
@@ -14,17 +14,35 @@
#include <unistd.h>
#include <stdio.h>
+#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
-static int finished(const char *cmd, int status) {
+static int _finished(const char *cmd, int status, int pid) {
+ int ret;
if (!strcmp(cmd, "not"))
return !status;
if (!strcmp(cmd, "should")) {
- if (status)
+ if (status) {
fprintf(stderr, "TEST WARNING: Ignoring command failure.\n");
+ /* TODO: avoid using shell here */
+ /* Show log for failing command which should be passing */
+ ret = system("ls debug.log*${LVM_LOG_FILE_EPOCH}* 2>/dev/null");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret) == 0) {
+ printf("## timing off\n<======== Debug log ========>\n"); /* timing off */
+ fflush(stdout);
+ if (system("sed -e 's,^,## DEBUG: ,' debug.log*${LVM_LOG_FILE_EPOCH}* 2>/dev/null")) {
+ /* Ignore result code */;
+ }
+ printf("## timing on\n"); /* timing on */
+ if (system("rm -f debug.log*${LVM_LOG_FILE_EPOCH}*")) {
+ /* Ignore result code */;
+ }
+ fflush(stdout);
+ }
+ }
return 0;
} else if (!strcmp(cmd, "invalid")) {
if (status == 3)
@@ -39,6 +57,7 @@ static int finished(const char *cmd, int status) {
}
int main(int args, char **argv) {
+ const char *val = NULL;
pid_t pid;
int status;
int FAILURE = 6;
@@ -53,6 +72,16 @@ int main(int args, char **argv) {
fprintf(stderr, "Could not fork\n");
return FAILURE;
} else if (pid == 0) { /* child */
+ if (!strcmp(argv[0], "not"))
+ val = ">1";
+ else if (!strcmp(argv[0], "invalid"))
+ val = "3";
+ else if (!strcmp(argv[0], "fail"))
+ val = "5";
+
+ if (val)
+ setenv("LVM_EXPECTED_EXIT_STATUS", val, 1);
+
execvp(argv[1], &argv[1]);
/* should not be accessible */
return FAILURE;
@@ -67,7 +96,7 @@ int main(int args, char **argv) {
return FAILURE;
}
- return finished(argv[0], WEXITSTATUS(status));
+ return _finished(argv[0], WEXITSTATUS(status), pid);
}
/* not accessible */
return FAILURE;
diff --git a/test/lib/runner.cpp b/test/lib/runner.cpp
new file mode 100644
index 000000000..d00aa457c
--- /dev/null
+++ b/test/lib/runner.cpp
@@ -0,0 +1,40 @@
+/* -*- C++ -*- copyright (c) 2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "brick-shelltest.h"
+
+int main(int argc, const char **argv)
+{
+ try {
+ return brick::shelltest::run( argc, argv, "LVM_TEST_FLAVOUR" );
+ } catch (std::exception const& e) {
+ std::cout << "Exception: " << e.what() << "\n";
+ }
+
+ return 1;
+}
+
diff --git a/test/lib/test-corosync-conf b/test/lib/test-corosync-conf
new file mode 100644
index 000000000..ccc958f1d
--- /dev/null
+++ b/test/lib/test-corosync-conf
@@ -0,0 +1,19 @@
+# created by lvm test suite
+totem {
+ version: 2
+ secauth: off
+ cluster_name: test
+}
+nodelist {
+ node {
+ ring0_addr: @LOCAL_NODE@
+ nodeid: 1
+ }
+}
+quorum {
+ provider: corosync_votequorum
+}
+logging {
+ to_syslog: yes
+}
+
diff --git a/test/lib/test-dlm-conf b/test/lib/test-dlm-conf
new file mode 100644
index 000000000..a93c93fca
--- /dev/null
+++ b/test/lib/test-dlm-conf
@@ -0,0 +1,4 @@
+# created by lvm test suite
+log_debug=1
+enable_fencing=0
+
diff --git a/test/lib/test-sanlock-conf b/test/lib/test-sanlock-conf
new file mode 100644
index 000000000..d1df598b0
--- /dev/null
+++ b/test/lib/test-sanlock-conf
@@ -0,0 +1,2 @@
+# created by lvm test suite
+SANLOCKOPTS="-U sanlock -G sanlock -w 0"
diff --git a/test/lib/utils.sh b/test/lib/utils.sh
index b4cfdebda..24c9076d4 100644
--- a/test/lib/utils.sh
+++ b/test/lib/utils.sh
@@ -57,6 +57,8 @@ mkdtemp() {
destdir=$1
template=$2
+ test -d "$destdir" || die "DIR ('$destdir') does not exist."
+
case "$template" in
*XXXX) ;;
*) die "Invalid template: $template (must have a suffix of at least 4 X's)";;
@@ -100,6 +102,11 @@ mkdtemp() {
die "$err"
}
+# Like grep, just always print 1st. line
+grep1_() {
+ awk -v pattern="${1}" 'NR==1 || $0~pattern' "${@:2}"
+}
+
STACKTRACE() {
trap - ERR
local i=0
@@ -112,7 +119,7 @@ STACKTRACE() {
test "${LVM_TEST_PARALLEL:-0}" -eq 1 -o -n "$RUNNING_DMEVENTD" -o -f LOCAL_DMEVENTD || {
pgrep dmeventd &>/dev/null && \
- die "** During test dmeventd has been started!"
+ die "ERROR: The test started dmeventd unexpectedly."
}
# Get backtraces from coredumps
@@ -127,11 +134,48 @@ STACKTRACE() {
done
fi
- test -z "$LVM_TEST_NODEBUG" -a -f debug.log && {
- sed -e "s,^,## DEBUG: ,;s,$top_srcdir/\?,," < debug.log
- }
-
test -f SKIP_THIS_TEST && exit 200
+
+ test -z "$LVM_TEST_NODEBUG" -a -f TESTNAME && {
+ local name
+ local idx
+ for i in debug.log* ; do
+ name=${i##debug.log_}
+ name=${name%%_*}
+ test "$name" = "DEBUG" && { name="$name$idx" ; idx=$(($idx + 1)) ; }
+ echo "<======== Debug log $i ========>"
+ sed -e "s,^,## $name: ," $i
+ done
+ if test -e strace.log ; then
+ echo "<======== Strace debug log ========>"
+ sed -e "s,^,## STRACE: ," strace.log
+ fi
+ dmsetup info -c | grep1_ "$PREFIX" > out
+ if test $(wc -l < out) -gt 1 ; then
+ echo "<======== Info ========>"
+ sed -e "s,^,## DMINFO: ," out
+ echo "<======== Active table ========>"
+ dmsetup table | grep "$PREFIX" | sed -e "s,^,## DMTABLE: ,"
+ echo "<======== Inactive table ========>"
+ dmsetup table --inactive | grep "$PREFIX" | sed -e "s,^,## DMITABLE: ,"
+ echo "<======== Status ========>"
+ dmsetup status | grep "$PREFIX" | sed -e "s,^,## DMSTATUS: ,"
+ echo "<======== Tree ========>"
+ dmsetup ls --tree | sed -e "s,^,## DMTREE: ,"
+ echo "<======== Recursive list of $DM_DEV_DIR ========>"
+ ls -Rl --hide=shm --hide=bus --hide=snd --hide=input --hide=dri \
+ --hide=net --hide=hugepages --hide=mqueue --hide=pts \
+ "$DM_DEV_DIR" | sed -e "s,^,## LSLR: ,"
+ echo "<======== Udev DB content ========>"
+ for i in /sys/block/dm-* /sys/block/loop* ; do
+ udevadm info --query=all --path "$i" 2>/dev/null || true
+ done | sed -e "s,^,## UDEV: ,"
+ fi
+ echo "<======== Script file \"$(< TESTNAME)\" ========>"
+ local script=$0
+ test -f "$script" || script="$TESTOLDPWD/$0"
+ awk '{print "## Line:", NR, "\t", $0}' "$script"
+ }
}
init_udev_transaction() {
@@ -167,25 +211,11 @@ dm_table() {
}
skip() {
+ test "$#" -eq 0 || echo "TEST SKIPPED: $@"
touch SKIP_THIS_TEST
exit 200
}
-kernel_at_least() {
- local major=$(uname -r | cut -d. -f1)
- local minor=$(uname -r | cut -d. -f2 | cut -d- -f1)
-
- test "$major" -gt "$1" && return 0
- test "$major" -eq "$1" || return 1
- test "$minor" -gt "$2" && return 0
- test "$minor" -eq "$2" || return 1
- test -z "$3" && return 0
-
- local minor2=$(uname -r | cut -d. -f3 | cut -d- -f1)
- test -z "$minor2" -a "$3" -ne 0 && return 1
- test "$minor2" -ge "$3" 2>/dev/null || return 1
-}
-
get_devs() {
local IFS=$IFS_NL
DEVICES=( $(<DEVICES) )
@@ -207,18 +237,21 @@ prepare_test_vars() {
done
}
-# check if $abs_top_builddir was already set via 'lib/paths'
-test -n "${abs_top_builddir+varset}" || . lib/paths || die "you must run make first"
-
-case "$PATH" in
-*"$abs_top_builddir/test/lib"*) ;;
-*)
- PATH="$abs_top_builddir/test/lib":"$abs_top_builddir/test/api":$PATH
- for i in `find $abs_top_builddir -name \*.so`; do
- p=`dirname $i`
- LD_LIBRARY_PATH="$p":$LD_LIBRARY_PATH
- done
- export PATH LD_LIBRARY_PATH ;;
-esac
+if test -z "${abs_top_builddir+varset}" && test -z "${installed_testsuite+varset}"; then
+ . lib/paths || die "something went wrong -- lib/paths is missing?"
+fi
+
+if test -z "${installed_testsuite+varset}"; then
+ case "$PATH" in
+ *"$abs_top_builddir/test/lib"*) ;;
+ *)
+ PATH="$abs_top_builddir/test/lib":"$abs_top_builddir/test/api":$PATH
+ for i in `find $abs_top_builddir -name \*.so`; do
+ p=`dirname $i`
+ LD_LIBRARY_PATH="$p":$LD_LIBRARY_PATH
+ done
+ export PATH LD_LIBRARY_PATH ;;
+ esac
+fi
test -z "$PREFIX" || prepare_test_vars
diff --git a/test/shell/000-basic.sh b/test/shell/000-basic.sh
index 7ad4d9f05..573bbd195 100644
--- a/test/shell/000-basic.sh
+++ b/test/shell/000-basic.sh
@@ -11,8 +11,12 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
lvm version
+test -n "$abs_top_builddir" || skip
+
v=$abs_top_builddir/lib/misc/lvm-version.h
sed -n "/#define LVM_VERSION ./s///p" "$v" | sed "s/ .*//" > expected
diff --git a/test/shell/activate-minor.sh b/test/shell/activate-minor.sh
index 1c5386c15..d7ae5f90d 100644
--- a/test/shell/activate-minor.sh
+++ b/test/shell/activate-minor.sh
@@ -14,6 +14,7 @@
# Just skip this test if minor is already in use...
dmsetup info | tee info
egrep "^Major, minor: *[0-9]+, 123" info && skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_vg 2
lvcreate -a n --zero n -l 1 -n foo $vg
diff --git a/test/shell/activate-missing-segment.sh b/test/shell/activate-missing-segment.sh
index 5f43c7269..988c9cbb8 100644
--- a/test/shell/activate-missing-segment.sh
+++ b/test/shell/activate-missing-segment.sh
@@ -18,6 +18,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2
lvcreate -l100%FREE -n span $vg
diff --git a/test/shell/activate-missing.sh b/test/shell/activate-missing.sh
index 8e2ff1e7a..e33a6ef6c 100644
--- a/test/shell/activate-missing.sh
+++ b/test/shell/activate-missing.sh
@@ -18,6 +18,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 4
lvcreate -l1 -n linear1 $vg "$dev1"
diff --git a/test/shell/activate-partial.sh b/test/shell/activate-partial.sh
index 0b5d1b649..3720e1d26 100644
--- a/test/shell/activate-partial.sh
+++ b/test/shell/activate-partial.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
lvcreate -aey --type mirror -m 1 -l 1 --nosync -n mirror $vg
diff --git a/test/shell/activation-skip.sh b/test/shell/activation-skip.sh
index ca562a7cd..5259d9dfd 100644
--- a/test/shell/activation-skip.sh
+++ b/test/shell/activation-skip.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# Test skip activation flag -k|--setactivationskip
aux prepare_vg
diff --git a/test/shell/clvmd-restart.sh b/test/shell/clvmd-restart.sh
index 4a9f12e6b..afe48346c 100644
--- a/test/shell/clvmd-restart.sh
+++ b/test/shell/clvmd-restart.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2011-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -16,12 +16,27 @@ export LVM_BINARY=lvm
. lib/inittest
# only clvmd based test, skip otherwise
+test -e LOCAL_LVMPOLLD && skip
test -e LOCAL_CLVMD || skip
read LOCAL_CLVMD < LOCAL_CLVMD
-aux prepare_pvs 1
+# TODO read from build, for now hardcoded
+CLVMD_SOCKET="/var/run/lvm/clvmd.sock"
-vgcreate --clustered y $vg $(cat DEVICES)
+restart_clvmd_() {
+ "$LVM_CLVMD_BINARY" -S
+ ls -la $CLVMD_SOCKET || true
+
+ for i in $(seq 1 20) ; do
+ test -S "$CLVMD_SOCKET" && break
+ sleep .1
+ done
+ # restarted clvmd has the same PID (no fork, only execvp)
+ NEW_LOCAL_CLVMD=$(pgrep clvmd)
+ test "$LOCAL_CLVMD" -eq "$NEW_LOCAL_CLVMD"
+}
+
+aux prepare_vg
lvcreate -an --zero n -n $lv1 -l1 $vg
lvcreate -an --zero n -n $lv2 -l1 $vg
@@ -30,19 +45,10 @@ lvcreate -l1 $vg
lvchange -aey $vg/$lv1
lvchange -aey $vg/$lv2
-"$LVM_CLVMD_BINARY" -S
-sleep .2
-# restarted clvmd has the same PID (no fork, only execvp)
-NEW_LOCAL_CLVMD=$(pgrep clvmd)
-test "$LOCAL_CLVMD" -eq "$NEW_LOCAL_CLVMD"
+restart_clvmd_
# try restart once more
-
-"$LVM_CLVMD_BINARY" -S
-sleep .2
-# restarted clvmd has the same PID (no fork, only execvp)
-NEW_LOCAL_CLVMD=$(pgrep clvmd)
-test "$LOCAL_CLVMD" -eq "$NEW_LOCAL_CLVMD"
+restart_clvmd_
# FIXME: Hmm - how could we test exclusivity is preserved in singlenode ?
lvchange -an $vg/$lv1
@@ -55,7 +61,7 @@ vgchange -an $vg
# Test what happens after 'reboot'
kill "$LOCAL_CLVMD"
-while test -e "/var/run/clvmd.pid"; do echo -n .; sleep .1; done # wait for the pid removal
+while test -e "$CLVMD_PIDFILE"; do echo -n .; sleep .1; done # wait for the pid removal
aux prepare_clvmd
vgchange -ay $vg
diff --git a/test/shell/covercmd.sh b/test/shell/covercmd.sh
index c84c967de..5e47bd92c 100644
--- a/test/shell/covercmd.sh
+++ b/test/shell/covercmd.sh
@@ -16,6 +16,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 5
get_devs
diff --git a/test/shell/discards-thin.sh b/test/shell/discards-thin.sh
index a2cac5219..fbd0d8702 100644
--- a/test/shell/discards-thin.sh
+++ b/test/shell/discards-thin.sh
@@ -13,8 +13,12 @@
# test support of thin discards
#
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/dlm-hello-world.sh b/test/shell/dlm-hello-world.sh
new file mode 100644
index 000000000..3f5fc5701
--- /dev/null
+++ b/test/shell/dlm-hello-world.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Hello world for vgcreate with lvmlockd and dlm'
+
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_DLM" ] && skip;
+
+aux prepare_devs 1
+
+vgcreate $SHARED $vg "$dev1"
+
+vgs -o+locktype,lockargs $vg
+
+check vg_field $vg vg_locktype dlm
+
+vgremove $vg
+
diff --git a/test/shell/dlm-prepare.sh b/test/shell/dlm-prepare.sh
new file mode 100644
index 000000000..c4f02a480
--- /dev/null
+++ b/test/shell/dlm-prepare.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Set up things to run tests with dlm'
+
+. lib/utils
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_DLM" ] && skip;
+
+COROSYNC_CONF="/etc/corosync/corosync.conf"
+COROSYNC_NODE="$(hostname)"
+create_corosync_conf() {
+ if test -a $COROSYNC_CONF; then
+ if ! grep "created by lvm test suite" $COROSYNC_CONF; then
+ rm $COROSYNC_CONF
+ else
+ mv $COROSYNC_CONF $COROSYNC_CONF.prelvmtest
+ fi
+ fi
+
+ sed -e "s/@LOCAL_NODE@/$COROSYNC_NODE/" lib/test-corosync-conf > $COROSYNC_CONF
+ echo "created new $COROSYNC_CONF"
+}
+
+DLM_CONF="/etc/dlm/dlm.conf"
+create_dlm_conf() {
+ if test -a $DLM_CONF; then
+ if ! grep "created by lvm test suite" $DLM_CONF; then
+ rm $DLM_CONF
+ else
+ mv $DLM_CONF $DLM_CONF.prelvmtest
+ fi
+ fi
+
+ cp lib/test-dlm-conf $DLM_CONF
+ echo "created new $DLM_CONF"
+}
+
+prepare_lvmlockd_dlm() {
+ if pgrep lvmlockd ; then
+ echo "Cannot run while existing lvmlockd process exists"
+ exit 1
+ fi
+
+ if pgrep dlm_controld ; then
+ echo "Cannot run while existing dlm_controld process exists"
+ exit 1
+ fi
+
+ if pgrep corosync; then
+ echo "Cannot run while existing corosync process exists"
+ exit 1
+ fi
+
+ create_corosync_conf
+ create_dlm_conf
+
+ systemctl start corosync
+ sleep 1
+ if ! pgrep corosync; then
+ echo "Failed to start corosync"
+ exit 1
+ fi
+
+ systemctl start dlm
+ sleep 1
+ if ! pgrep dlm_controld; then
+ echo "Failed to start dlm"
+ exit 1
+ fi
+
+ lvmlockd
+ sleep 1
+ if ! pgrep lvmlockd ; then
+ echo "Failed to start lvmlockd"
+ exit 1
+ fi
+}
+
+prepare_lvmlockd_dlm
+
diff --git a/test/shell/dlm-remove.sh b/test/shell/dlm-remove.sh
new file mode 100644
index 000000000..d7af46f67
--- /dev/null
+++ b/test/shell/dlm-remove.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Remove the dlm test setup'
+
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_DLM" ] && skip;
+
+systemctl stop dlm
+systemctl stop corosync
+killall lvmlockd
diff --git a/test/shell/dmeventd-restart.sh b/test/shell/dmeventd-restart.sh
index a94ede180..a64509ebe 100644
--- a/test/shell/dmeventd-restart.sh
+++ b/test/shell/dmeventd-restart.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_dmeventd
aux prepare_vg 5
@@ -43,9 +45,12 @@ not pgrep dmeventd
rm LOCAL_DMEVENTD
# set dmeventd path
-aux lvmconf "dmeventd/executable=\"$abs_top_builddir/test/lib/dmeventd\""
+if test -n "$abs_top_builddir"; then
+ aux lvmconf "dmeventd/executable=\"$abs_top_builddir/test/lib/dmeventd\""
+fi
+
lvchange --monitor y --verbose $vg/3way 2>&1 | tee lvchange.out
-pgrep dmeventd >LOCAL_DMEVENTD
+pgrep -o dmeventd >LOCAL_DMEVENTD
not grep 'already monitored' lvchange.out
vgremove -ff $vg
diff --git a/test/shell/dumpconfig.sh b/test/shell/dumpconfig.sh
index f52157fc7..0d33761c5 100644
--- a/test/shell/dumpconfig.sh
+++ b/test/shell/dumpconfig.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
flatten() {
cat > flatten.config
for s in `egrep '^[a-z]+ {$' flatten.config | sed -e s,{$,,`; do
diff --git a/test/shell/error-usage.sh b/test/shell/error-usage.sh
index 61e282b66..f617b61f2 100644
--- a/test/shell/error-usage.sh
+++ b/test/shell/error-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which md5sum || skip
aux prepare_pvs 1
diff --git a/test/shell/fsadm.sh b/test/shell/fsadm.sh
index 368e7471c..12f159312 100644
--- a/test/shell/fsadm.sh
+++ b/test/shell/fsadm.sh
@@ -13,6 +13,8 @@ test_description='Exercise fsadm filesystem resize'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 1 100
# set to "skip" to avoid testing given fs and test warning result
diff --git a/test/shell/inconsistent-metadata.sh b/test/shell/inconsistent-metadata.sh
index 69d1f3328..a60633092 100644
--- a/test/shell/inconsistent-metadata.sh
+++ b/test/shell/inconsistent-metadata.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3 12
lvcreate -aye --type mirror -m 1 -l 1 -n mirror $vg
diff --git a/test/shell/listings.sh b/test/shell/listings.sh
index fcf6308e1..d01851075 100644
--- a/test/shell/listings.sh
+++ b/test/shell/listings.sh
@@ -15,6 +15,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 5
get_devs
diff --git a/test/shell/lock-blocking.sh b/test/shell/lock-blocking.sh
index e05fa5068..5c004c9d3 100644
--- a/test/shell/lock-blocking.sh
+++ b/test/shell/lock-blocking.sh
@@ -13,14 +13,16 @@ test_description='test some blocking / non-blocking multi-vg operations'
. lib/inittest
-aux prepare_devs 3
+test -e LOCAL_LVMPOLLD && skip
test -e LOCAL_CLVMD && skip
+
+aux prepare_devs 3
pvcreate "$dev1" "$dev2"
vgcreate $vg "$dev1" "$dev2"
# if wait_for_locks set, vgremove should wait for orphan lock
# flock process should have exited by the time first vgremove completes
-flock -w 5 $TESTDIR/var/lock/lvm/P_orphans -c "sleep 10" &
+flock -w 5 $TESTDIR/var/lock/lvm/P_orphans sleep 10 &
while ! test -f $TESTDIR/var/lock/lvm/P_orphans ; do sleep .1 ; done
vgremove --config 'global { wait_for_locks = 1 }' $vg
@@ -31,7 +33,7 @@ test ! -f $TESTDIR/var/lock/lvm/P_orphans
# if wait_for_locks not set, vgremove should fail on non-blocking lock
# we must wait for flock process at the end - vgremove won't wait
vgcreate $vg "$dev1" "$dev2"
-flock -w 5 $TESTDIR/var/lock/lvm/P_orphans -c "sleep 10" &
+flock -w 5 $TESTDIR/var/lock/lvm/P_orphans sleep 10 &
while ! test -f $TESTDIR/var/lock/lvm/P_orphans ; do sleep .1 ; done
flock_pid=`jobs -p`
diff --git a/test/shell/lock-parallel.sh b/test/shell/lock-parallel.sh
index 993175fc9..ebe6336d2 100644
--- a/test/shell/lock-parallel.sh
+++ b/test/shell/lock-parallel.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -14,7 +14,10 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext3 || skip
+which fsck || skip
aux prepare_vg
@@ -23,12 +26,15 @@ lvcreate -l1 -n $lv2 $vg
mkfs.ext3 "$DM_DEV_DIR/$vg/$lv1"
# Slowdown PV for resized LV
-aux delay_dev "$dev1" 20 20
+aux delay_dev "$dev1" 50 50 $(get first_extent_sector "$dev1"):
lvresize -L-5 -r $vg/$lv1 &
# Let's wait till resize starts
-sleep 2
+for i in $(seq 1 300); do
+ pgrep fsck && break
+ sleep .1
+done
lvremove -f $vg/$lv2
diff --git a/test/shell/lvchange-cache.sh b/test/shell/lvchange-cache.sh
index 87a217ae1..3efd76289 100644
--- a/test/shell/lvchange-cache.sh
+++ b/test/shell/lvchange-cache.sh
@@ -11,17 +11,30 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
aux prepare_vg 3
+aux lvmconf 'global/cache_disabled_features = [ "policy_smq" ]'
+
lvcreate --type cache-pool -an -v -L 2 -n cpool $vg
lvcreate -H -L 4 -n corigin --cachepool $vg/cpool
lvcreate -n noncache -l 1 $vg
+# cannot change major minor for pools
+not lvchange --yes -M y --minor 235 --major 253 $vg/cpool
+not lvchange -M n $vg/cpool
+
not lvchange --cachepolicy mq $vg/noncache
not lvchange --cachesettings foo=bar $vg/noncache
+lvchange --cachepolicy cleaner $vg/corigin
+dmsetup status | grep $vg-corigin | grep 'cleaner'
+
lvchange --cachepolicy mq --cachesettings migration_threshold=333 $vg/corigin
+dmsetup status | grep $vg-corigin | not grep 'cleaner'
+dmsetup status | grep $vg-corigin | grep 'mq'
dmsetup status | grep $vg-corigin | grep 'migration_threshold 333'
lvchange --refresh $vg/corigin
dmsetup status | grep $vg-corigin | grep 'migration_threshold 333'
diff --git a/test/shell/lvchange-mirror.sh b/test/shell/lvchange-mirror.sh
index f28e486f6..bd4c868c6 100644
--- a/test/shell/lvchange-mirror.sh
+++ b/test/shell/lvchange-mirror.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# FIXME RESYNC doesn't work in cluster with exclusive activation
# seriously broken!
test -e LOCAL_CLVMD && skip
diff --git a/test/shell/lvchange-partial-raid10.sh b/test/shell/lvchange-partial-raid10.sh
index 8014abd40..d83d19366 100644
--- a/test/shell/lvchange-partial-raid10.sh
+++ b/test/shell/lvchange-partial-raid10.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 3 2 || skip
aux prepare_vg 4
diff --git a/test/shell/lvchange-partial.sh b/test/shell/lvchange-partial.sh
index c65c07b87..7b1f68d3d 100644
--- a/test/shell/lvchange-partial.sh
+++ b/test/shell/lvchange-partial.sh
@@ -11,6 +11,7 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
aux prepare_vg 4
diff --git a/test/shell/lvchange-raid.sh b/test/shell/lvchange-raid.sh
index 00b526568..4ce776fe8 100644
--- a/test/shell/lvchange-raid.sh
+++ b/test/shell/lvchange-raid.sh
@@ -209,7 +209,7 @@ run_refresh_check() {
# Disable dev2 and do some I/O to make the kernel notice
aux disable_dev "$dev2"
- dd if=/dev/urandom of=$DM_DEV_DIR/$sizelv bs=1k count=$size
+ dd if=/dev/urandom of="$DM_DEV_DIR/$sizelv" bs=1k count=$size
sync
# Check for 'p'artial flag
diff --git a/test/shell/lvchange-raid10.sh b/test/shell/lvchange-raid10.sh
index 0d17928b9..574ac7d84 100644
--- a/test/shell/lvchange-raid10.sh
+++ b/test/shell/lvchange-raid10.sh
@@ -13,6 +13,8 @@ TEST_RAID=raid10
. shell/lvchange-raid.sh
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 5 2 || skip
run_types raid10 -m 1 -i 2 "$dev1" "$dev2" "$dev3" "$dev4"
diff --git a/test/shell/lvchange-raid456.sh b/test/shell/lvchange-raid456.sh
index b1dd1dbbd..bdce2e685 100644
--- a/test/shell/lvchange-raid456.sh
+++ b/test/shell/lvchange-raid456.sh
@@ -13,6 +13,8 @@ TEST_RAID=raid456
. shell/lvchange-raid.sh
+test -e LOCAL_LVMPOLLD && skip
+
aux raid456_replace_works || skip
aux have_raid 1 5 2 || skip
diff --git a/test/shell/lvchange-syncaction-raid.sh b/test/shell/lvchange-syncaction-raid.sh
index 7ef4e01a0..558157b35 100644
--- a/test/shell/lvchange-syncaction-raid.sh
+++ b/test/shell/lvchange-syncaction-raid.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# Proper mismatch count 1.5.2+ upstream, 1.3.5 < x < 1.4.0 in RHEL6
aux have_raid 1 3 5 &&
! aux have_raid 1 4 0 ||
@@ -27,17 +29,28 @@ START=$(get pv_field "$dev2" pe_start --units 1k)
METASIZE=$(get lv_field $vg/${lv1}_rmeta_1 size -a --units 1k)
SEEK=$((${START%\.00k} + ${METASIZE%\.00k}))
# Overwrite some portion of _rimage_1
+
+#aux delay_dev "$dev2" 10 10
dd if=/dev/urandom of="$dev2" bs=1K count=1 seek=$SEEK oflag=direct
+# FIXME
+# Some delay - there is currently race in upstream kernel
+# test may occasionaly fail with:
+# device-mapper: message ioctl on failed: Device or resource busy
+#
+# Heinz's kernel seems to fix this particular issue but
+# has some other problem for now
+aux udev_wait
-aux wait_for_sync $vg $lv1
lvchange --syncaction check $vg/$lv1
+
+# Wait till scrubbing is finished
+aux wait_for_sync $vg $lv1
+
check lv_field $vg/$lv1 raid_mismatch_count "128"
# Let's deactivate
lvchange -an $vg/$lv1
-# Slow down write by 100ms
-aux delay_dev "$dev2" 0 50
lvchange -ay $vg/$lv1
# noone has it open and target is read & running
dmsetup info -c | grep $vg
@@ -55,9 +68,17 @@ dmsetup info -c | grep $vg
# As solution for now - user needs to run --synaction on synchronous raid array
#
aux wait_for_sync $vg $lv1
-should lvchange --syncaction check $vg/$lv1
-aux enable_dev "$dev2"
+# Check raid array doesn't know about error yet
check lv_field $vg/$lv1 raid_mismatch_count "0"
+# Start scrubbing
+lvchange --syncaction check $vg/$lv1
+
+# Wait till scrubbing is finished
+aux wait_for_sync $vg $lv1
+
+# Retest mistmatch exists
+check lv_field $vg/$lv1 raid_mismatch_count "128"
+
vgremove -ff $vg
diff --git a/test/shell/lvchange-thin.sh b/test/shell/lvchange-thin.sh
index 4d3356330..b66c5ee5c 100644
--- a/test/shell/lvchange-thin.sh
+++ b/test/shell/lvchange-thin.sh
@@ -9,8 +9,12 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 0 0 || skip
aux prepare_pvs 3
@@ -54,8 +58,9 @@ lvchange -r auto $vg/$lv1
lvchange --yes -M y --minor 234 --major 253 $vg/$lv1
lvchange -M n $vg/$lv1
-lvchange --yes -M y --minor 235 --major 253 $vg/pool
-lvchange -M n $vg/pool
+# cannot change major minor for pools
+not lvchange --yes -M y --minor 235 --major 253 $vg/pool
+not lvchange -M n $vg/pool
# addtag_ARG
lvchange --addtag foo $vg/$lv1
diff --git a/test/shell/lvconvert-cache-raid.sh b/test/shell/lvconvert-cache-raid.sh
index faab25169..5f51fee4a 100644
--- a/test/shell/lvconvert-cache-raid.sh
+++ b/test/shell/lvconvert-cache-raid.sh
@@ -13,9 +13,13 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
aux have_raid 1 0 0 || skip
+aux lvmconf 'global/cache_disabled_features = [ "policy_smq" ]'
+
aux prepare_vg 5 80
# Bug 1095843
diff --git a/test/shell/lvconvert-cache-smq.sh b/test/shell/lvconvert-cache-smq.sh
new file mode 100644
index 000000000..cdc783873
--- /dev/null
+++ b/test/shell/lvconvert-cache-smq.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Exercise conversion of cache and cache pool
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux have_cache 1 8 0 || skip
+
+aux prepare_vg 5 80
+
+lvcreate --type cache-pool -an -v -L 2 -n cpool $vg
+
+lvcreate -H --cachepolicy smq -L 4 -n corigin --cachepool $vg/cpool
+
+check lv_field $vg/corigin cache_policy "smq"
+
+lvconvert --splitcache $vg/corigin
+
+lvs -o+cache_policy -a $vg
+
+vgremove -f $vg
diff --git a/test/shell/lvconvert-cache-thin.sh b/test/shell/lvconvert-cache-thin.sh
index 87256a4d2..12b8dfe3a 100644
--- a/test/shell/lvconvert-cache-thin.sh
+++ b/test/shell/lvconvert-cache-thin.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
aux have_thin 1 0 0 || skip
diff --git a/test/shell/lvconvert-cache.sh b/test/shell/lvconvert-cache.sh
index ed414eddb..6ac62c41b 100644
--- a/test/shell/lvconvert-cache.sh
+++ b/test/shell/lvconvert-cache.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
aux prepare_vg 5 80
diff --git a/test/shell/lvconvert-mirror-basic.sh b/test/shell/lvconvert-mirror-basic.sh
index 07bafb950..4a3e62f44 100644
--- a/test/shell/lvconvert-mirror-basic.sh
+++ b/test/shell/lvconvert-mirror-basic.sh
@@ -9,6 +9,9 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
log_name_to_count() {
@@ -110,7 +113,6 @@ test_lvconvert() {
fi
}
-
aux prepare_pvs 5 5
vgcreate -s 32k $vg $(cat DEVICES)
diff --git a/test/shell/lvconvert-mirror-updown.sh b/test/shell/lvconvert-mirror-updown.sh
index 18618fd28..4d401fbf8 100644
--- a/test/shell/lvconvert-mirror-updown.sh
+++ b/test/shell/lvconvert-mirror-updown.sh
@@ -28,6 +28,7 @@ lvconvert -m+1 -b $vg/$lv1 "$dev3"
#lvconvert $vg/$lv1
lvs -a $vg
+
#
# It fails so use 'should' and -vvvv for now
#
diff --git a/test/shell/lvconvert-mirror.sh b/test/shell/lvconvert-mirror.sh
index d8f057a9b..844f8e945 100644
--- a/test/shell/lvconvert-mirror.sh
+++ b/test/shell/lvconvert-mirror.sh
@@ -9,6 +9,9 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
aux prepare_pvs 5 10
@@ -96,7 +99,7 @@ lvremove -ff $vg
lvcreate -aey -l5 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:0"
check mirror $vg $lv1
check mirror_legs $vg $lv1 2
-lvconvert -m+1 -b $vg/$lv1 "$dev4"
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4"
# Next convert should fail b/c we can't have 2 at once
should not lvconvert -m+1 $vg/$lv1 "$dev5"
@@ -153,7 +156,7 @@ lvremove -ff $vg
# "remove newly added mirror"
lvcreate -aey -l2 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE"
-lvconvert -m+1 -b $vg/$lv1 "$dev4"
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4"
lvconvert -m-1 $vg/$lv1 "$dev4"
lvconvert $vg/$lv1 # wait
@@ -164,7 +167,7 @@ lvremove -ff $vg
# "remove one of newly added mirrors"
lvcreate -aey -l2 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE"
-lvconvert -m+2 -b $vg/$lv1 "$dev4" "$dev5"
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+2 -b $vg/$lv1 "$dev4" "$dev5"
lvconvert -m-1 $vg/$lv1 "$dev4"
lvconvert $vg/$lv1 # wait
@@ -175,7 +178,7 @@ lvremove -ff $vg
# "remove from original mirror (the original is still mirror)"
lvcreate -aey -l2 --type mirror -m2 -n $lv1 $vg "$dev1" "$dev2" "$dev5" "$dev3:$DEVRANGE"
-lvconvert -m+1 -b $vg/$lv1 "$dev4"
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4"
# FIXME: Extra wait here for mirror upconvert synchronization
# otherwise we may fail her on parallel upconvert and downconvert
# lvconvert-mirror-updown.sh tests this errornous case separately
@@ -190,7 +193,7 @@ lvremove -ff $vg
# "remove from original mirror (the original becomes linear)"
lvcreate -aey -l2 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE"
-lvconvert -m+1 -b $vg/$lv1 "$dev4"
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4"
# FIXME: Extra wait here for mirror upconvert synchronization
# otherwise we may fail her on parallel upconvert and downconvert
# lvconvert-mirror-updown.sh tests this errornous case separately
diff --git a/test/shell/lvconvert-raid-allocation.sh b/test/shell/lvconvert-raid-allocation.sh
index a1b1c3931..f118d9c48 100644
--- a/test/shell/lvconvert-raid-allocation.sh
+++ b/test/shell/lvconvert-raid-allocation.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 3 0 || skip
aux prepare_pvs 5
diff --git a/test/shell/lvconvert-raid.sh b/test/shell/lvconvert-raid.sh
index 8621311a3..6529ee220 100644
--- a/test/shell/lvconvert-raid.sh
+++ b/test/shell/lvconvert-raid.sh
@@ -9,8 +9,13 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
get_image_pvs() {
local d
local images
@@ -121,6 +126,12 @@ check active $vg $lv2
# FIXME: ensure no residual devices
lvremove -ff $vg
+# 4-way
+lvcreate --type raid1 -m 4 -l 2 -n $lv1 $vg
+aux wait_for_sync $vg $lv1
+lvconvert --yes --splitmirrors 1 --name $lv2 $vg/$lv1 "$dev2"
+lvremove -ff $vg
+
###########################################
# RAID1 split + trackchanges / merge
###########################################
diff --git a/test/shell/lvconvert-raid10.sh b/test/shell/lvconvert-raid10.sh
index c4b92f18d..244a79e32 100644
--- a/test/shell/lvconvert-raid10.sh
+++ b/test/shell/lvconvert-raid10.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
get_image_pvs() {
local d
local images
diff --git a/test/shell/lvconvert-raid456.sh b/test/shell/lvconvert-raid456.sh
index 833b10e0c..0fb5a5b39 100644
--- a/test/shell/lvconvert-raid456.sh
+++ b/test/shell/lvconvert-raid456.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
get_image_pvs() {
local d
local images
diff --git a/test/shell/lvconvert-repair-dmeventd.sh b/test/shell/lvconvert-repair-dmeventd.sh
index eed08197c..2acf461ce 100644
--- a/test/shell/lvconvert-repair-dmeventd.sh
+++ b/test/shell/lvconvert-repair-dmeventd.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
aux mirror_recovery_works || skip
diff --git a/test/shell/lvconvert-repair-policy.sh b/test/shell/lvconvert-repair-policy.sh
index ace52fca0..96a69d00d 100644
--- a/test/shell/lvconvert-repair-policy.sh
+++ b/test/shell/lvconvert-repair-policy.sh
@@ -12,8 +12,8 @@
. lib/inittest
aux prepare_vg 4
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
# Clean-up and create a 2-way mirror, where the the
# leg devices are always on $dev[12] and the log
diff --git a/test/shell/lvconvert-repair-raid-dmeventd.sh b/test/shell/lvconvert-repair-raid-dmeventd.sh
index eeb424ef0..df5544911 100644
--- a/test/shell/lvconvert-repair-raid-dmeventd.sh
+++ b/test/shell/lvconvert-repair-raid-dmeventd.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext3 || skip
aux have_raid 1 3 0 || skip
diff --git a/test/shell/lvconvert-repair-raid.sh b/test/shell/lvconvert-repair-raid.sh
index 4f0175a09..580516e6c 100644
--- a/test/shell/lvconvert-repair-raid.sh
+++ b/test/shell/lvconvert-repair-raid.sh
@@ -11,11 +11,13 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 3 0 || skip
aux raid456_replace_works || skip
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
aux prepare_vg 8
diff --git a/test/shell/lvconvert-repair-replace.sh b/test/shell/lvconvert-repair-replace.sh
index 3c0dc53e6..bf031be48 100644
--- a/test/shell/lvconvert-repair-replace.sh
+++ b/test/shell/lvconvert-repair-replace.sh
@@ -12,8 +12,8 @@
. lib/inittest
aux prepare_vg 6
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
# 3-way, disk log
# multiple failures, full replace
@@ -67,7 +67,7 @@ check mirror $vg mirror2
vgs $vg
vgremove -ff $vg
-if kernel_at_least 3 0 0; then
+if aux kernel_at_least 3 0 0; then
# 2-way, mirrored log
# Double log failure, full replace
vgcreate $vg "$dev1" "$dev2" "$dev3" "$dev4" "$dev5" "$dev6"
diff --git a/test/shell/lvconvert-repair-snapshot.sh b/test/shell/lvconvert-repair-snapshot.sh
index 473d56160..73f2f880e 100644
--- a/test/shell/lvconvert-repair-snapshot.sh
+++ b/test/shell/lvconvert-repair-snapshot.sh
@@ -11,9 +11,11 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 5
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
lvcreate -aey --type mirror -m 3 --ignoremonitoring -L 2M -n 4way $vg "$dev1" "$dev2" "$dev3" "$dev4" "$dev5":0
lvcreate -s $vg/4way -L 2M -n snap
@@ -26,4 +28,5 @@ aux enable_dev "$dev2" "$dev4"
lvs -a -o +devices $vg
check mirror $vg 4way "$dev5"
+vgchange -an $vg
vgremove -ff $vg
diff --git a/test/shell/lvconvert-repair-thin.sh b/test/shell/lvconvert-repair-thin.sh
index 0e9534b1b..c699e1255 100644
--- a/test/shell/lvconvert-repair-thin.sh
+++ b/test/shell/lvconvert-repair-thin.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
#
@@ -24,7 +26,9 @@ aux have_thin 1 0 0 || skip
aux prepare_vg 4
# Create LV
-lvcreate -T -L20 -V10 -n $lv1 $vg/pool "$dev1" "$dev2"
+# TODO: investigate problem with --zero n and my repairable damage trick
+#lvcreate -T -L20 -V10 -n $lv1 $vg/pool --discards ignore --zero n --chunksize 128 "$dev1" "$dev2"
+lvcreate -T -L20 -V10 -n $lv1 $vg/pool --chunksize 128 --discards ignore "$dev1" "$dev2"
lvcreate -T -V10 -n $lv2 $vg/pool
mkfs.ext2 "$DM_DEV_DIR/$vg/$lv1"
@@ -71,6 +75,11 @@ lvchange -an $vg
# Swap repaired metadata back
lvconvert -y -f --poolmetadata $vg/fixed --thinpool $vg/pool
+# Check pool still preserves its original settings
+check lv_field $vg/pool chunksize "128.00k"
+check lv_field $vg/pool discards "ignore"
+check lv_field $vg/pool zero "zero"
+
# Activate pool - this should now work
vgchange -ay $vg
diff --git a/test/shell/lvconvert-repair-transient-dmeventd.sh b/test/shell/lvconvert-repair-transient-dmeventd.sh
index 5347d8803..30fd47c85 100644
--- a/test/shell/lvconvert-repair-transient-dmeventd.sh
+++ b/test/shell/lvconvert-repair-transient-dmeventd.sh
@@ -11,9 +11,11 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
+aux prepare_dmeventd
aux mirror_recovery_works || skip
aux prepare_vg 5
-aux prepare_dmeventd
lvcreate -aey --type mirror -m 3 --ignoremonitoring -L 1 -n 4way $vg
lvchange --monitor y $vg/4way
diff --git a/test/shell/lvconvert-repair-transient.sh b/test/shell/lvconvert-repair-transient.sh
index 99018ef4d..52e1825c9 100644
--- a/test/shell/lvconvert-repair-transient.sh
+++ b/test/shell/lvconvert-repair-transient.sh
@@ -11,12 +11,15 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux mirror_recovery_works || skip
aux prepare_vg 5
# ordinary mirrors
lvcreate -aey --type mirror -m 3 --ignoremonitoring -L 1 -n 4way $vg
+aux wait_for_sync $vg 4way
aux disable_dev --error --silent "$dev2" "$dev4"
mkfs.ext3 "$DM_DEV_DIR/$vg/4way" &
sleep 1
@@ -24,7 +27,7 @@ dmsetup status
echo n | lvconvert --repair $vg/4way 2>&1 | tee 4way.out
aux enable_dev --silent "$dev2" "$dev4"
-lvs -a -o +devices | tee out
+lvs -a -o +devices $vg | tee out
not grep unknown out
vgreduce --removemissing $vg
check mirror $vg 4way
diff --git a/test/shell/lvconvert-snapshot.sh b/test/shell/lvconvert-snapshot.sh
index 55fcb1a1c..ad500e2ca 100644
--- a/test/shell/lvconvert-snapshot.sh
+++ b/test/shell/lvconvert-snapshot.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 1
vgcreate -s 1k $vg $(cat DEVICES)
diff --git a/test/shell/lvconvert-striped-raid0.sh b/test/shell/lvconvert-striped-raid0.sh
new file mode 100644
index 000000000..90ef9ceca
--- /dev/null
+++ b/test/shell/lvconvert-striped-raid0.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+. lib/inittest
+
+########################################################
+# MAIN
+########################################################
+aux have_raid 1 7 0 || skip
+
+aux prepare_pvs 6 20 # 6 devices for striped test
+vgcreate -s 128k $vg $(cat DEVICES)
+
+############################################
+# Create striped LV, convert to raid0* tests
+############################################
+# Create striped 6-way and cycle conversions
+lvcreate -y -i 6 -l 50%FREE -n $lv1 $vg
+lvconvert -y --type raid0 $vg/$lv1
+lvconvert -y --type raid0_meta $vg/$lv1
+lvconvert -y --type striped $vg/$lv1
+lvremove -ff $vg
+
+# Create raid0 5-way and cycle conversions
+lvcreate -y --type raid0 -i 5 -l 50%FREE -n $lv1 $vg
+lvconvert -y --type raid0_meta $vg/$lv1
+lvconvert -y --type striped $vg/$lv1
+lvconvert -y --type raid0 $vg/$lv1
+lvremove -ff $vg
+
+# Create raid0_meta 4-way and cycle conversions
+lvcreate -y --type raid0_meta -i 4 -l 50%FREE -n $lv1 $vg
+lvconvert -y --type raid0 $vg/$lv1
+lvconvert -y --type striped $vg/$lv1
+lvconvert -y --type raid0_meta $vg/$lv1
+lvremove -ff $vg
+
+# Create striped 3-way cosuming all vg space
+lvcreate -y -i 3 -l 100%FREE -n $lv1 $vg
+lvconvert -y --type raid0 $vg/$lv1
+not lvconvert -y --type raid0_meta $vg/$lv1
+lvconvert -y --type striped $vg/$lv1
+lvremove -ff $vg
+
+# Not enough drives
+not lvcreate -y -i3 -l1 $vg "$dev1" "$dev2"
+not lvcreate -y --type raid0 -i3 -l1 $vg "$dev1" "$dev2"
+not lvcreate -y --type raid0_meta -i4 -l1 $vg "$dev1" "$dev2" "$dev3"
+
+# Create 2..6-way raid0 LV and cycle conversions
+for s in $(seq 2..6)
+do
+ lvcreate -y --type raid0 -l 95%FREE -i $s -n $lv1 $vg
+ lvconvert -y --type raid0_meta $vg/$lv1
+ lvconvert -y --type raid0 $vg/$lv1
+ lvconvert -y --type striped $vg/$lv1
+ lvconvert -y --type raid0 $vg/$lv1
+ lvconvert -y --type raid0_meta $vg/$lv1
+ lvremove -ff $vg
+done
+
+# Not enough drives for 7-way
+not lvcreate -y --type raid0 -l 7 -i 7 -n $lv1 $vg
+
+vgremove -ff $vg
+
+exit 0
diff --git a/test/shell/lvconvert-thin-external.sh b/test/shell/lvconvert-thin-external.sh
index 1e9887a9c..479d23cb0 100644
--- a/test/shell/lvconvert-thin-external.sh
+++ b/test/shell/lvconvert-thin-external.sh
@@ -12,8 +12,12 @@
# Test conversion to thin external origin
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
which fsck || skip
@@ -53,6 +57,8 @@ lvcreate -l10 -T $vg/pool1 -c 192k
not lvconvert -T --thinpool $vg/pool1 $vg/pool --originname origin
# Create pool1 chunk_size unaligned LV and check failing conversion
lvcreate -l2 -n $lv1 $vg
+# Newer thin-pool target (>= 1.13) supports unaligned external origin
+aux lvmconf 'global/thin_disabled_features = [ "external_origin_extend" ]'
not lvconvert -T --thinpool $vg/pool1 $vg/$lv1
lvremove -f $vg/pool1 $vg/$lv1
diff --git a/test/shell/lvconvert-thin-raid.sh b/test/shell/lvconvert-thin-raid.sh
index d7a353b09..bd554b202 100644
--- a/test/shell/lvconvert-thin-raid.sh
+++ b/test/shell/lvconvert-thin-raid.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -10,16 +10,22 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 0 0 || skip
aux have_raid 1 4 0 || skip
aux prepare_vg 4
# create RAID LVs for data and metadata volumes
-lvcreate -aey --nosync -L10M --type raid1 -m1 -n $lv1 $vg
-lvcreate -aey --nosync -L8M --type raid1 -m1 -n $lv2 $vg
+lvcreate -aey -L10M --type raid1 -m3 -n $lv1 $vg
+lvcreate -aey -L8M --type raid1 -m3 -n $lv2 $vg
+aux wait_for_sync $vg $lv1
+aux wait_for_sync $vg $lv2
lvchange -an $vg/$lv1
# conversion fails for internal volumes
@@ -28,4 +34,22 @@ invalid lvconvert --yes --thinpool $vg/$lv1 --poolmetadata $vg/${lv2}_rimage_0
lvconvert --yes --thinpool $vg/$lv1 --poolmetadata $vg/$lv2
+lvchange -ay $vg
+
+lvconvert --splitmirrors 1 --name data2 $vg/${lv1}_tdata "$dev2"
+lvconvert --splitmirrors 1 --name data3 $vg/${lv1}_tdata "$dev3"
+lvconvert --splitmirrors 1 --trackchanges $vg/${lv1}_tdata "$dev4"
+
+lvconvert --splitmirrors 1 --name meta1 $vg/${lv1}_tmeta "$dev1"
+lvconvert --splitmirrors 1 --name meta2 $vg/${lv1}_tmeta "$dev2"
+lvconvert --splitmirrors 1 --trackchanges $vg/${lv1}_tmeta "$dev4"
+
+lvremove -ff $vg/data2 $vg/data3 $vg/meta1 $vg/meta2
+
+lvconvert --merge $vg/${lv1}_tdata_rimage_1
+lvconvert --merge $vg/${lv1}_tmeta_rimage_1
+
+lvconvert -m+1 $vg/${lv1}_tdata "$dev2"
+lvconvert -m+1 $vg/${lv1}_tmeta "$dev1"
+
vgremove -ff $vg
diff --git a/test/shell/lvconvert-thin.sh b/test/shell/lvconvert-thin.sh
index 951f6943e..b4b3a72b3 100644
--- a/test/shell/lvconvert-thin.sh
+++ b/test/shell/lvconvert-thin.sh
@@ -10,8 +10,12 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
prepare_lvs() {
lvremove -f $vg
lvcreate -L10M -n $lv1 $vg
@@ -32,7 +36,7 @@ vgcreate $vg1 $(head -n 3 DEVICES)
# maybe uname -m [ x86_64 | i686 ]
TSIZE=64T
aux can_use_16T || TSIZE=15T
-lvcreate -s -l 100%FREE -n $lv $vg1 --virtualsize $TSIZE
+lvcreate --type snapshot -l 100%FREE -n $lv $vg1 --virtualsize $TSIZE
aux extend_filter_LVMTEST
pvcreate "$DM_DEV_DIR/$vg1/$lv"
@@ -70,7 +74,7 @@ lvremove -f $vg
# Swaping of metadata volume
lvcreate -L1T -n $lv1 $vg
lvcreate -L32 -n $lv2 $vg
-lvconvert --yes -c 8M --type thin-pool $vg/$lv1 |& tee err
+lvconvert --yes -c 8M --type thin-pool $vg/$lv1 2>&1 | tee err
# Check tther is warning for large chunk size and zeroing enabled
grep "Pool zeroing and large" err
UUID=$(get lv_field $vg/$lv2 uuid)
@@ -107,7 +111,7 @@ invalid lvconvert -c 88 --thinpool $vg/$lv1 --poolmetadata $vg/$lv2
invalid lvconvert --yes --thinpool $vg/$lv3 -T $vg/$lv3
# Warning about smaller then suggested
-lvconvert --yes -c 256 --thinpool $vg/$lv1 --poolmetadata $vg/$lv2 |& tee err
+lvconvert --yes -c 256 --thinpool $vg/$lv1 --poolmetadata $vg/$lv2 2>&1 | tee err
grep "WARNING: Chunk size is smaller" err
lvremove -f $vg
@@ -115,7 +119,7 @@ lvremove -f $vg
lvcreate -L1T -n $lv1 $vg
lvcreate -L32G -n $lv2 $vg
# Warning about bigger then needed
-lvconvert --yes --thinpool $vg/$lv1 --poolmetadata $vg/$lv2 |& tee err
+lvconvert --yes --thinpool $vg/$lv1 --poolmetadata $vg/$lv2 2>&1 | tee err
grep "WARNING: Maximum" err
lvremove -f $vg
@@ -123,7 +127,7 @@ lvremove -f $vg
if test "$TSIZE" = 64T; then
lvcreate -L24T -n $lv1 $vg
# Warning about bigger then needed (24T data and 16G -> 128K chunk)
-lvconvert --yes -c 64 --thinpool $vg/$lv1 |& tee err
+lvconvert --yes -c 64 --thinpool $vg/$lv1 2>&1 | tee err
grep "WARNING: Chunk size is too small" err
lvremove -f $vg
fi
diff --git a/test/shell/lvcreate-cache.sh b/test/shell/lvcreate-cache.sh
index 89efc4dc6..b92190911 100644
--- a/test/shell/lvcreate-cache.sh
+++ b/test/shell/lvcreate-cache.sh
@@ -16,11 +16,14 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
# FIXME: parallel cache metadata allocator is crashing when used value 8000!
aux prepare_vg 5 80000
+aux lvmconf 'global/cache_disabled_features = [ "policy_smq" ]'
#######################
# Cache_Pool creation #
@@ -49,7 +52,7 @@ fail lvcreate -l 1 -H --name $vg/$lv4 --cachepool pool7
fail lvcreate -l 1 --cachepool pool8 $vg
# no size specified
-invalid lvcreate --cachepool pool $vg |& tee err
+invalid lvcreate --cachepool pool $vg 2>&1 | tee err
grep "specify either size or extents" err
# Check nothing has been created yet
diff --git a/test/shell/lvcreate-large-raid.sh b/test/shell/lvcreate-large-raid.sh
index 182b2676e..c59ededaf 100644
--- a/test/shell/lvcreate-large-raid.sh
+++ b/test/shell/lvcreate-large-raid.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# FIXME update test to make something useful on <16T
aux can_use_16T || skip
diff --git a/test/shell/lvcreate-large-raid10.sh b/test/shell/lvcreate-large-raid10.sh
index de1cc0274..47dda2b6a 100644
--- a/test/shell/lvcreate-large-raid10.sh
+++ b/test/shell/lvcreate-large-raid10.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# FIXME update test to make something useful on <16T
aux can_use_16T || skip
diff --git a/test/shell/lvcreate-large.sh b/test/shell/lvcreate-large.sh
index 29bdd3344..e547fcc85 100644
--- a/test/shell/lvcreate-large.sh
+++ b/test/shell/lvcreate-large.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# FIXME update test to make something useful on <16T
aux can_use_16T || skip
diff --git a/test/shell/lvcreate-mirror.sh b/test/shell/lvcreate-mirror.sh
index a15112695..275c036da 100644
--- a/test/shell/lvcreate-mirror.sh
+++ b/test/shell/lvcreate-mirror.sh
@@ -10,9 +10,12 @@
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 5 80
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
# 2-way mirror with corelog, 2 PVs
lvcreate -aey -l2 --type mirror -m1 --mirrorlog core -n $lv1 $vg "$dev1" "$dev2"
diff --git a/test/shell/lvcreate-missing.sh b/test/shell/lvcreate-missing.sh
index 4553e98e8..012962fc9 100644
--- a/test/shell/lvcreate-missing.sh
+++ b/test/shell/lvcreate-missing.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2
aux disable_dev "$dev1"
diff --git a/test/shell/lvcreate-operation.sh b/test/shell/lvcreate-operation.sh
index fac721924..3947bf850 100644
--- a/test/shell/lvcreate-operation.sh
+++ b/test/shell/lvcreate-operation.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
cleanup_lvs() {
lvremove -ff $vg
(dm_table | not grep $vg) || \
diff --git a/test/shell/lvcreate-pvtags.sh b/test/shell/lvcreate-pvtags.sh
index b7c814f75..99a7f0732 100644
--- a/test/shell/lvcreate-pvtags.sh
+++ b/test/shell/lvcreate-pvtags.sh
@@ -11,9 +11,11 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 3
-aux lvmconf 'allocation/maximise_cling = 0'
-aux lvmconf 'allocation/mirror_logs_require_separate_pvs = 1'
+aux lvmconf 'allocation/maximise_cling = 0' \
+ 'allocation/mirror_logs_require_separate_pvs = 1'
# not required, just testing
aux pvcreate --metadatacopies 0 "$dev1"
diff --git a/test/shell/lvcreate-raid.sh b/test/shell/lvcreate-raid.sh
index 6bbc3fe48..f5e25c66a 100644
--- a/test/shell/lvcreate-raid.sh
+++ b/test/shell/lvcreate-raid.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
lv_devices() {
test $3 -eq $(get lv_devices $1/$2 | wc -w)
}
@@ -76,32 +78,31 @@ done
# Create RAID using 100%FREE
############################
-# 6 PVs with 18.5m in each PV.
+# 6 PVs with 19m in each PV.
# 1 metadata LV = 1 extent = .5m
-# 1 image = 36+37+37 extents = 55.00m = lv_size
+# 1 image = 37+38+38 extents = 56.50m = lv_size
lvcreate --type raid1 -m 1 -l 100%FREE -an -Zn -n raid1 $vg
-check lv_field $vg/raid1 size "55.00m"
+check lv_field $vg/raid1 size "56.50m"
lvremove -ff $vg
# 1 metadata LV = 1 extent
-# 1 image = 36 extents
-# 5 images = 180 extents = 90.00m = lv_size
+# 1 image = 37 extents = 18.5m
+# 5 images = 185 extents = 92.5m = lv_size
+lvs -a $vg
lvcreate --type raid5 -i 5 -l 100%FREE -an -Zn -n raid5 $vg
-should check lv_field $vg/raid5 size "90.00m"
-#FIXME: Currently allocates incorrectly at 87.50m
+check lv_field $vg/raid5 size "92.50m"
lvremove -ff $vg
-# 1 image = 36+37 extents
-# 2 images = 146 extents = 73.00m = lv_size
+# 1 image = 37+38 extents
+# 2 images = 150 extents = 75.00m = lv_size
lvcreate --type raid5 -i 2 -l 100%FREE -an -Zn -n raid5 $vg
-check lv_field $vg/raid5 size "73.00m"
+check lv_field $vg/raid5 size "75.00m"
lvremove -ff $vg
-# 1 image = 36 extents
-# 4 images = 144 extents = 72.00m = lv_size
+# 1 image = 37 extents
+# 4 images = 148 extents = 74.00m = lv_size
lvcreate --type raid6 -i 4 -l 100%FREE -an -Zn -n raid6 $vg
-should check lv_field $vg/raid6 size "72.00m"
-#FIXME: Currnently allocates incorrectly at 70.00m
+check lv_field $vg/raid6 size "74.00m"
lvremove -ff $vg
###
@@ -111,9 +112,9 @@ EAT_SIZE=$(get lv_field $vg/eat_space size)
# Using 100% free should take the rest of dev1 and equal from dev2
# 1 meta takes 1 extent
-# 1 image = 18 extents = 9.00m = lv_size
+# 1 image = 19 extents = 9.50m = lv_size
lvcreate --type raid1 -m 1 -l 100%FREE -an -Zn -n raid1 $vg "$dev1" "$dev2"
-check lv_field $vg/raid1 size "9.00m"
+check lv_field $vg/raid1 size "9.50m"
# Ensure image size is the same as the RAID1 size
check lv_field $vg/raid1 size $(get lv_field $vg/raid1_rimage_0 size -a)
# Amount remaining in dev2 should equal the amount taken by 'lv' in dev1
@@ -122,10 +123,10 @@ lvremove -ff $vg/raid1
# Using 100% free should take the rest of dev1 and equal amount from the rest
# 1 meta takes 1 extent
-# 1 image = 18 extents = 9.00m
-# 5 images = 90 extents = 45.00m = lv_size
+# 1 image = 19 extents = 9.50m
+# 5 images = 95 extents = 47.50m = lv_size
lvcreate --type raid5 -i 5 -l 100%FREE -an -Zn -n raid5 $vg
-check lv_field $vg/raid5 size "45.00m"
+check lv_field $vg/raid5 size "47.50m"
# Amount remaining in dev6 should equal the amount taken by 'lv' in dev1
check pv_field "$dev6" pv_free "$EAT_SIZE"
lvremove -ff $vg/raid5
@@ -133,26 +134,26 @@ lvremove -ff $vg/raid5
# Using 100% free should take the rest of dev1, an equal amount
# from 2 more devs, and all extents from 3 additional devs
# 1 meta takes 1 extent
-# 1 image = 18+37 extents
-# 2 images = 110 extents = 55.00m = lv_size
+# 1 image = 19+39 extents
+# 2 images = 114 extents = 57.00m = lv_size
lvcreate --type raid5 -i 2 -l 100%FREE -an -Zn -n raid5 $vg
-check lv_field $vg/raid5 size "55.00m"
+check lv_field $vg/raid5 size "57.00m"
lvremove -ff $vg/raid5
# Let's do some stripe tests too
# Using 100% free should take the rest of dev1 and an equal amount from rest
-# 1 image = 19 extents
-# 6 images = 114 extents = 57.00m = lv_size
+# 1 image = 20 extents
+# 6 images = 120 extents = 60.00m = lv_size
lvcreate -i 6 -l 100%FREE -an -Zn -n stripe $vg
-check lv_field $vg/stripe size "57.00m"
+check lv_field $vg/stripe size "60.00m"
lvremove -ff $vg/stripe
# Using 100% free should take the rest of dev1, an equal amount from
# one more dev, and all of the remaining 4
-# 1 image = 19+37+37 extents
-# 2 images = 186 extents = 93.00m = lv_size
+# 1 image = 20+38+38 extents
+# 2 images = 192 extents = 96.00m = lv_size
lvcreate -i 2 -l 100%FREE -an -Zn -n stripe $vg
-check lv_field $vg/stripe size "93.00m"
+check lv_field $vg/stripe size "96.00m"
lvremove -ff $vg
# end of use of '$vg/eat_space'
diff --git a/test/shell/lvcreate-raid10.sh b/test/shell/lvcreate-raid10.sh
index e334bd14a..884653b46 100644
--- a/test/shell/lvcreate-raid10.sh
+++ b/test/shell/lvcreate-raid10.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
lv_devices() {
test $3 -eq $(get lv_devices $1/$2 | wc -w)
}
@@ -50,11 +52,11 @@ aux wait_for_sync $vg $lv2
lvremove -ff $vg
# Test 100%FREE option
-# 37 extents / device
-# 1 image = 36 extents (1 for meta)
-# 3 images = 108 extents = 54.00m
+# 38 extents / device
+# 1 image = 37 extents (1 for meta)
+# 3 images = 111 extents = 55.50m
lvcreate --type raid10 -i 3 -l 100%FREE -an -Zn -n raid10 $vg
-check lv_field $vg/raid10 size "54.00m"
+check lv_field $vg/raid10 size "55.50m"
lvremove -ff $vg
# Create RAID (implicit stripe count based on PV count)
diff --git a/test/shell/lvcreate-repair.sh b/test/shell/lvcreate-repair.sh
index 169056547..469bf3946 100644
--- a/test/shell/lvcreate-repair.sh
+++ b/test/shell/lvcreate-repair.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
# fail multiple devices
diff --git a/test/shell/lvcreate-signature-wiping.sh b/test/shell/lvcreate-signature-wiping.sh
index 32ef36381..d2d23c595 100644
--- a/test/shell/lvcreate-signature-wiping.sh
+++ b/test/shell/lvcreate-signature-wiping.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
init_lv_() {
mkswap "$DM_DEV_DIR/$vg/$lv1"
}
diff --git a/test/shell/lvcreate-small-snap.sh b/test/shell/lvcreate-small-snap.sh
index b29c5f8f8..b5610c782 100644
--- a/test/shell/lvcreate-small-snap.sh
+++ b/test/shell/lvcreate-small-snap.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs
vgcreate -s 1k $vg $(cat DEVICES)
diff --git a/test/shell/lvcreate-striped-mirror.sh b/test/shell/lvcreate-striped-mirror.sh
index eaa7fb859..735b396bd 100644
--- a/test/shell/lvcreate-striped-mirror.sh
+++ b/test/shell/lvcreate-striped-mirror.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 9
lvcreate -aey --nosync -i2 -l2 --type mirror -m1 --mirrorlog core -n $lv1 $vg 2>&1 | tee log
diff --git a/test/shell/lvcreate-thin-big.sh b/test/shell/lvcreate-thin-big.sh
new file mode 100644
index 000000000..dbd098684
--- /dev/null
+++ b/test/shell/lvcreate-thin-big.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# test currently needs to drop
+# 'return NULL' in _lv_create_an_lv after log_error("Can't create %s without using "
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux have_thin 1 0 0 || skip
+
+# Test --poolmetadatasize range
+# allocating large devices for testing
+aux prepare_pvs 10 16500
+vgcreate $vg -s 64K $(cat DEVICES)
+
+# Size 0 is not valid
+invalid lvcreate -L4M --chunksize 128 --poolmetadatasize 0 -T $vg/pool1 2>out
+lvcreate -L4M --chunksize 128 --poolmetadatasize 16k -T $vg/pool1 2>out
+grep "WARNING: Minimum" out
+# FIXME: metadata allocation fails, if PV doesn't have at least 16GB
+# i.e. pool metadata device cannot be multisegment
+lvcreate -L4M --chunksize 64k --poolmetadatasize 17G -T $vg/pool2 2>out
+grep "WARNING: Maximum" out
+check lv_field $vg/pool1_tmeta size "2.00m"
+check lv_field $vg/pool2_tmeta size "16.00g"
+lvremove -ff $vg
+
+# Test automatic calculation of pool metadata size
+lvcreate -L160G -T $vg/pool
+check lv_field $vg/pool lv_metadata_size "80.00m"
+check lv_field $vg/pool chunksize "128.00k"
+lvremove -ff $vg/pool
+
+lvcreate -L10G --chunksize 256 -T $vg/pool1
+lvcreate -L60G --chunksize 1024 -T $vg/pool2
+check lv_field $vg/pool1_tmeta size "2.50m"
+check lv_field $vg/pool2_tmeta size "3.75m"
+lvremove -ff $vg
+
+# Block size of multiple 64KB needs >= 1.4
+if aux have_thin 1 4 0 ; then
+# Test chunk size is rounded to 64KB boundary
+lvcreate -L10G --poolmetadatasize 4M -T $vg/pool
+check lv_field $vg/pool chunk_size "192.00k"
+fi
+# Old thinpool target required rounding to power of 2
+aux lvmconf "global/thin_disabled_features = [ \"block_size\" ]"
+lvcreate -L10G --poolmetadatasize 4M -T $vg/pool_old
+check lv_field $vg/pool_old chunk_size "256.00k"
+lvremove -ff $vg
+# reset
+#aux lvmconf "global/thin_disabled_features = []"
+
+vgremove -ff $vg
diff --git a/test/shell/lvcreate-thin-external-size.sh b/test/shell/lvcreate-thin-external-size.sh
new file mode 100644
index 000000000..d8e98f9dd
--- /dev/null
+++ b/test/shell/lvcreate-thin-external-size.sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Test unaligned size of external origin and thin pool chunk size
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+which cmp || skip
+
+#
+# Main
+#
+
+# Test needs thin-pool target with unaligned ext-orig size support
+aux have_thin 1 13 0 || skip
+
+aux prepare_pvs 2 640
+
+# Use 8K extent size
+vgcreate $vg -s 8K $(cat DEVICES)
+
+# Prepare some numeric pattern with ~64K size
+seq -s ' ' -w 0 10922 > 64K
+
+d1="$DM_DEV_DIR/$vg/$lv1"
+d2="$DM_DEV_DIR/$vg/$lv2"
+
+# Prepare external origin LV with size not being a multiple of thin pool chunk size
+lvcreate -l47 -n $lv1 $vg
+
+# Fill end with pattern
+dd if=64K of="$d1" bs=8192 seek=45 count=2
+
+# Switch to read-only volume
+lvchange -an $vg/$lv1
+lvchange -pr $vg/$lv1
+
+lvcreate -L2M -T $vg/pool -c 192K
+lvcreate -s $vg/$lv1 --name $lv2 --thinpool $vg/pool
+
+# Check the tail of $lv2 matches $lv1
+dd if="$d2" of=16K bs=8192 skip=45 count=2
+cmp -n 16384 -l 64K 16K
+
+# Now extend and rewrite
+lvextend -l+2 $vg/$lv2
+
+dd if=64K of="$d2" bs=8192 seek=46 count=3 oflag=direct
+dd if="$d2" of=24K bs=8192 skip=46 count=3 iflag=direct
+cmp -n 24576 -l 64K 24K
+
+# Consumes 2 192K chunks -> 66.67%
+check lv_field $vg/$lv2 data_percent "66.67"
+
+lvreduce -f -l-24 $vg/$lv2
+
+dd if=64K of="$d2" bs=8192 seek=24 count=1 oflag=direct
+dd if="$d2" of=8K bs=8192 skip=24 count=1 iflag=direct
+cmp -n 8192 -l 64K 8K
+
+# Check extension still works
+lvextend -l+2 $vg/$lv2
+
+lvremove -f $vg/pool
+
+lvcreate -L256M -T $vg/pool -c 64M
+lvcreate -s $vg/$lv1 --name $lv2 --thinpool $vg/pool
+lvextend -l+2 $vg/$lv2
+
+dd if=64K of="$d2" bs=8192 seek=45 count=4 oflag=direct
+dd if="$d2" of=32K bs=8192 skip=45 count=4 iflag=direct
+cmp -n 32768 -l 64K 32K
+
+lvextend -L+64M $vg/$lv2
+
+# Consumes 64M chunk -> 50%
+check lv_field $vg/$lv2 data_percent "50.00"
+
+vgremove -ff $vg
diff --git a/test/shell/lvcreate-thin-external.sh b/test/shell/lvcreate-thin-external.sh
index 65401fbdb..85078366e 100644
--- a/test/shell/lvcreate-thin-external.sh
+++ b/test/shell/lvcreate-thin-external.sh
@@ -12,8 +12,12 @@
# Test creation of thin snapshots using external origin
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
which fsck || skip
@@ -26,6 +30,10 @@ aux prepare_pvs 2 64
vgcreate $vg -s 64K $(cat DEVICES)
+# Newer thin-pool target (>= 1.13) supports unaligned external origin
+# But this test is written to test and expect older behavior
+aux lvmconf 'global/thin_disabled_features = [ "external_origin_extend" ]'
+
# Test validation for external origin being multiple of thin pool chunk size
lvcreate -L10M -T $vg/pool192 -c 192k
lvcreate -an -pr -Zn -l1 -n $lv1 $vg
diff --git a/test/shell/lvcreate-thin-power2.sh b/test/shell/lvcreate-thin-power2.sh
index ed62db3f8..4d3d175b2 100644
--- a/test/shell/lvcreate-thin-power2.sh
+++ b/test/shell/lvcreate-thin-power2.sh
@@ -13,8 +13,12 @@
# test support for non-power-of-2 thin chunk size
#
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/lvcreate-thin-snap.sh b/test/shell/lvcreate-thin-snap.sh
index 784eb5d29..76929ccec 100644
--- a/test/shell/lvcreate-thin-snap.sh
+++ b/test/shell/lvcreate-thin-snap.sh
@@ -10,8 +10,12 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
check_lv_field_modules_()
{
mod=$1
diff --git a/test/shell/lvcreate-thin.sh b/test/shell/lvcreate-thin.sh
index 2884d5331..56d7e354f 100644
--- a/test/shell/lvcreate-thin.sh
+++ b/test/shell/lvcreate-thin.sh
@@ -13,8 +13,12 @@
# test currently needs to drop
# 'return NULL' in _lv_create_an_lv after log_error("Can't create %s without using "
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
check_lv_field_modules_()
{
mod=$1
@@ -232,49 +236,3 @@ not lvcreate -T mirpool -L4M --alloc anywhere -m1 $vg
not lvcreate --thinpool mirpool -L4M --alloc anywhere -m1 $vg
vgremove -ff $vg
-
-# Test --poolmetadatasize range
-# allocating large devices for testing
-aux teardown_devs
-aux prepare_pvs 10 16500
-vgcreate $vg -s 64K $(cat DEVICES)
-
-# Size 0 is not valid
-invalid lvcreate -L4M --chunksize 128 --poolmetadatasize 0 -T $vg/pool1 2>out
-lvcreate -L4M --chunksize 128 --poolmetadatasize 16k -T $vg/pool1 2>out
-grep "WARNING: Minimum" out
-# FIXME: metadata allocation fails, if PV doesn't have at least 16GB
-# i.e. pool metadata device cannot be multisegment
-lvcreate -L4M --chunksize 64k --poolmetadatasize 17G -T $vg/pool2 2>out
-grep "WARNING: Maximum" out
-check lv_field $vg/pool1_tmeta size "2.00m"
-check lv_field $vg/pool2_tmeta size "16.00g"
-lvremove -ff $vg
-
-# Test automatic calculation of pool metadata size
-lvcreate -L160G -T $vg/pool
-check lv_field $vg/pool lv_metadata_size "80.00m"
-check lv_field $vg/pool chunksize "128.00k"
-lvremove -ff $vg/pool
-
-lvcreate -L10G --chunksize 256 -T $vg/pool1
-lvcreate -L60G --chunksize 1024 -T $vg/pool2
-check lv_field $vg/pool1_tmeta size "2.50m"
-check lv_field $vg/pool2_tmeta size "3.75m"
-lvremove -ff $vg
-
-# Block size of multiple 64KB needs >= 1.4
-if aux have_thin 1 4 0 ; then
-# Test chunk size is rounded to 64KB boundary
-lvcreate -L10G --poolmetadatasize 4M -T $vg/pool
-check lv_field $vg/pool chunk_size "192.00k"
-fi
-# Old thinpool target required rounding to power of 2
-aux lvmconf "global/thin_disabled_features = [ \"block_size\" ]"
-lvcreate -L10G --poolmetadatasize 4M -T $vg/pool_old
-check lv_field $vg/pool_old chunk_size "256.00k"
-lvremove -ff $vg
-# reset
-#aux lvmconf "global/thin_disabled_features = []"
-
-vgremove -ff $vg
diff --git a/test/shell/lvcreate-usage.sh b/test/shell/lvcreate-usage.sh
index fe8200565..d9355032f 100644
--- a/test/shell/lvcreate-usage.sh
+++ b/test/shell/lvcreate-usage.sh
@@ -125,7 +125,7 @@ lvcreate -aey -L 32m -n $lv --regionsize 4m --type mirror -m 1 $vg
check lv_field $vg/$lv regionsize "4.00m"
# -m0 is creating non-mirrored segment and give info about redundant option
-lvcreate -m 0 -l1 -n $lv1 $vg |& tee err
+lvcreate -m 0 -l1 -n $lv1 $vg 2>&1 | tee err
grep "Redundant" err
check lv_field $vg/$lv1 segtype "linear"
lvremove -ff $vg
@@ -175,9 +175,9 @@ fail lvcreate -Mn --major 234 -l1 $vg
fail lvcreate --persistent n --minor 234 -l1 $vg
# out-of-range minor value
fail lvcreate --minor 9999999 -l1 $vg
-if kernel_at_least 2 4 0; then
+if aux kernel_at_least 2 4 0; then
# On >2.4 we ignore --major
-lvcreate --major 234 -l1 $vg |& tee err;
+lvcreate --major 234 -l1 $vg 2>&1 | tee err;
grep "Ignoring" err
# Try some bigger possibly unused minor
if test ! -d /sys/block/dm-2345; then
diff --git a/test/shell/lvextend-percent-extents.sh b/test/shell/lvextend-percent-extents.sh
index 9b5dc5d09..ec98e5913 100644
--- a/test/shell/lvextend-percent-extents.sh
+++ b/test/shell/lvextend-percent-extents.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2 128
vgcreate $vg $(cat DEVICES)
diff --git a/test/shell/lvextend-snapshot-dmeventd.sh b/test/shell/lvextend-snapshot-dmeventd.sh
index 3d9ce1f26..aeb18f104 100644
--- a/test/shell/lvextend-snapshot-dmeventd.sh
+++ b/test/shell/lvextend-snapshot-dmeventd.sh
@@ -11,13 +11,14 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
extend() {
lvextend --use-policies --config "activation { snapshot_autoextend_threshold = $1 }" $vg/snap
}
write_() {
- dd if=/dev/zero of="$DM_DEV_DIR/$vg/snap" bs=1k count=$2 seek=$1
- sync
+ dd if=/dev/zero of="$DM_DEV_DIR/$vg/snap" bs=1k count=$2 seek=$1 oflag=direct
}
percent_() {
@@ -26,7 +27,7 @@ percent_() {
wait_for_change_() {
# dmeventd only checks every 10 seconds :(
- for i in $(seq 1 15) ; do
+ for i in $(seq 1 25) ; do
test "$(percent_)" != "$1" && return
sleep 1
done
@@ -47,6 +48,10 @@ lvchange --monitor y $vg/snap
write_ 1000 1700
pre=$(percent_)
+# Normally the usage should be ~66% here, however on slower systems
+# dmeventd could be actually 'fast' enough to have COW already resized now
+# so mark test skipped if we are below 50% by now
+test $pre -gt 50 || skip
wait_for_change_ $pre
test $pre -gt $(percent_)
@@ -56,6 +61,8 @@ test $pre -gt $(percent_)
write_ 2700 2000
pre=$(percent_)
+# Mark test as skipped if already resized...
+test $pre -gt 70 || skip
wait_for_change_ $pre
test $pre -gt $(percent_)
diff --git a/test/shell/lvextend-snapshot-policy.sh b/test/shell/lvextend-snapshot-policy.sh
index b0a611eff..c76e36edc 100644
--- a/test/shell/lvextend-snapshot-policy.sh
+++ b/test/shell/lvextend-snapshot-policy.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
extend() {
diff --git a/test/shell/lvextend-thin-metadata-dmeventd.sh b/test/shell/lvextend-thin-metadata-dmeventd.sh
index d7be3a2ac..4e80f7516 100644
--- a/test/shell/lvextend-thin-metadata-dmeventd.sh
+++ b/test/shell/lvextend-thin-metadata-dmeventd.sh
@@ -10,8 +10,13 @@
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Test autoextension of thin metadata volume
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
meta_percent_() {
get lv_field $vg/pool metadata_percent | cut -d. -f1
}
@@ -56,9 +61,11 @@ aux prepare_pvs 3 256
vgcreate -s 1M $vg $(cat DEVICES)
# Testing dmeventd autoresize
-lvcreate -L200M -V1G -n thin -T $vg/pool
+lvcreate -L200M -V500M -n thin -T $vg/pool 2>&1 | tee out
+not grep "WARNING: Sum" out
+lvcreate -V2M -n thin2 $vg/pool
lvcreate -L2M -n $lv1 $vg
-lvchange -an $vg/thin $vg/pool
+lvchange -an $vg/thin $vg/thin2 $vg/pool
# Prepare some fake metadata with unmatching id
# Transaction_id is lower by 1 and there are no message -> ERROR
@@ -71,7 +78,7 @@ grep expected out
check inactive $vg pool_tmeta
# Transaction_id is higher by 1
-fake_metadata_ 10 2 >data
+fake_metadata_ 10 3 >data
"$LVM_TEST_THIN_RESTORE_CMD" -i data -o "$DM_DEV_DIR/mapper/$vg-$lv1"
lvconvert -y --thinpool $vg/pool --poolmetadata $vg/$lv1
not vgchange -ay $vg 2>&1 | tee out
@@ -80,7 +87,7 @@ grep expected out
check inactive $vg pool_tmeta
# Prepare some fake metadata prefilled to ~81% (>70%)
-fake_metadata_ 400 1 >data
+fake_metadata_ 400 2 >data
"$LVM_TEST_THIN_RESTORE_CMD" -i data -o "$DM_DEV_DIR/mapper/$vg-$lv1"
# Swap volume with restored fake metadata
diff --git a/test/shell/lvextend-thin.sh b/test/shell/lvextend-thin.sh
index f9e24cea6..1e64e42b4 100644
--- a/test/shell/lvextend-thin.sh
+++ b/test/shell/lvextend-thin.sh
@@ -9,8 +9,12 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 0 0 || skip
aux prepare_vg 3
diff --git a/test/shell/lvm-init.sh b/test/shell/lvm-init.sh
index c3f7ecfd4..cfdd4cc58 100644
--- a/test/shell/lvm-init.sh
+++ b/test/shell/lvm-init.sh
@@ -15,6 +15,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 5
# invalid units
diff --git a/test/shell/lvmcache-exercise.sh b/test/shell/lvmcache-exercise.sh
index 2aaf65095..f0d3ee949 100644
--- a/test/shell/lvmcache-exercise.sh
+++ b/test/shell/lvmcache-exercise.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 5
vgcreate $vg1 "$dev1"
diff --git a/test/shell/lvmetad-ambiguous.sh b/test/shell/lvmetad-ambiguous.sh
index 2e7c6c515..777a63e83 100644
--- a/test/shell/lvmetad-ambiguous.sh
+++ b/test/shell/lvmetad-ambiguous.sh
@@ -12,6 +12,7 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_pvs 2
diff --git a/test/shell/lvmetad-client-filter.sh b/test/shell/lvmetad-client-filter.sh
index 3002a47f3..5e6d967cf 100644
--- a/test/shell/lvmetad-client-filter.sh
+++ b/test/shell/lvmetad-client-filter.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2
pvs --config 'devices { filter = [ "r%.*%" ] }' 2>&1 | grep rejected
diff --git a/test/shell/lvmetad-disabled.sh b/test/shell/lvmetad-disabled.sh
index efdca1681..9c71717a3 100644
--- a/test/shell/lvmetad-disabled.sh
+++ b/test/shell/lvmetad-disabled.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
kill $(< LOCAL_LVMETAD)
while test -e "$TESTDIR/lvmetad.socket"; do echo -n .; sleep .1; done # wait for the socket close
test ! -e "$LVM_LVMETAD_PIDFILE"
diff --git a/test/shell/lvmetad-dump.sh b/test/shell/lvmetad-dump.sh
index 14d57532c..631636ea6 100644
--- a/test/shell/lvmetad-dump.sh
+++ b/test/shell/lvmetad-dump.sh
@@ -10,7 +10,9 @@
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
. lib/inittest
+
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2"
diff --git a/test/shell/lvmetad-lvm1.sh b/test/shell/lvmetad-lvm1.sh
index f071b1420..f234b154c 100644
--- a/test/shell/lvmetad-lvm1.sh
+++ b/test/shell/lvmetad-lvm1.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate --metadatatype 1 "$dev1"
should vgscan --cache
diff --git a/test/shell/lvmetad-lvscan-cache.sh b/test/shell/lvmetad-lvscan-cache.sh
index e63fd20a1..220e464c7 100644
--- a/test/shell/lvmetad-lvscan-cache.sh
+++ b/test/shell/lvmetad-lvscan-cache.sh
@@ -12,11 +12,12 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2"
-lvcreate -n testlv -m 1 -l 1 $vg1
+lvcreate -n testlv --type mirror -m 1 -l 1 $vg1
vgs | grep $vg1
lvscan --cache $vg1/testlv
diff --git a/test/shell/lvmetad-no-cluster.sh b/test/shell/lvmetad-no-cluster.sh
index 85c1ea920..83fee47fd 100644
--- a/test/shell/lvmetad-no-cluster.sh
+++ b/test/shell/lvmetad-no-cluster.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_CLVMD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2
aux prepare_lvmetad
vgs -vv 2> errs
diff --git a/test/shell/lvmetad-override.sh b/test/shell/lvmetad-override.sh
index ea76b3536..15ada7b01 100644
--- a/test/shell/lvmetad-override.sh
+++ b/test/shell/lvmetad-override.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2"
diff --git a/test/shell/lvmetad-pvs.sh b/test/shell/lvmetad-pvs.sh
index 7f254905f..81174f4cd 100644
--- a/test/shell/lvmetad-pvs.sh
+++ b/test/shell/lvmetad-pvs.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 1 20000
pvs $(cat DEVICES) | grep "$dev1"
diff --git a/test/shell/lvmetad-pvscan-cache.sh b/test/shell/lvmetad-pvscan-cache.sh
index 5c488173b..33b4d942c 100644
--- a/test/shell/lvmetad-pvscan-cache.sh
+++ b/test/shell/lvmetad-pvscan-cache.sh
@@ -12,6 +12,7 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_pvs 2
@@ -22,4 +23,17 @@ pvscan --cache
vgs | grep $vg1
+# When MDA is ignored on PV, do not read any VG
+# metadata from such PV as it may contain old
+# metadata which hasn't been updated for some
+# time and also since the MDA is marked as ignored,
+# it should really be *ignored*!
+pvchange --metadataignore y "$dev1"
+aux disable_dev "$dev2"
+pvscan --cache
+check pv_field "$dev1" vg_name ""
+aux enable_dev "$dev2"
+pvscan --cache
+check pv_field "$dev1" vg_name "$vg1"
+
vgremove -ff $vg1
diff --git a/test/shell/lvmetad-pvscan-filter.sh b/test/shell/lvmetad-pvscan-filter.sh
index a3af1b20b..b7bdf339b 100644
--- a/test/shell/lvmetad-pvscan-filter.sh
+++ b/test/shell/lvmetad-pvscan-filter.sh
@@ -12,14 +12,17 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
aux prepare_pvs 2
maj=$(($(stat -L --printf=0x%t "$dev2")))
min=$(($(stat -L --printf=0x%T "$dev2")))
+# Filter out device, pvscan should trigger
+# clearing of the device from lvmetad cache.
aux hide_dev "$dev2"
-not pvscan --cache "$dev2" 2>&1 | grep "not found"
+pvscan --cache "$dev2" 2>&1 | grep "not found"
# pvscan with --major/--minor does not fail: lvmetad needs to
# be notified about device removal on REMOVE uevent, hence
# this should not fail so udev does not grab a "failed" state
diff --git a/test/shell/lvmetad-pvscan-md.sh b/test/shell/lvmetad-pvscan-md.sh
index 449369d10..9c5e3fa58 100644
--- a/test/shell/lvmetad-pvscan-md.sh
+++ b/test/shell/lvmetad-pvscan-md.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -12,66 +12,26 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
-which mdadm || skip
+test -e LOCAL_LVMPOLLD && skip
test -f /proc/mdstat && grep -q raid0 /proc/mdstat || \
modprobe raid0 || skip
-aux lvmconf 'devices/md_component_detection = 1'
-aux extend_filter_LVMTEST
-aux extend_filter "a|/dev/md.*|"
-
aux prepare_devs 2
-# TODO factor out the following MD-creation code into lib/
-
-# Have MD use a non-standard name to avoid colliding with an existing MD device
-# - mdadm >= 3.0 requires that non-standard device names be in /dev/md/
-# - newer mdadm _completely_ defers to udev to create the associated device node
-mdadm_maj=$(mdadm --version 2>&1 | perl -pi -e 's|.* v(\d+).*|\1|')
-[ $mdadm_maj -ge 3 ] && \
- mddev=/dev/md/md_lvm_test0 || \
- mddev=/dev/md_lvm_test0
-
-cleanup_md() {
- # sleeps offer hack to defeat: 'md: md127 still in use'
- # see: https://bugzilla.redhat.com/show_bug.cgi?id=509908#c25
- aux udev_wait
- mdadm --stop "$mddev" || true
- aux udev_wait
- if [ -b "$mddev" ]; then
- # mdadm doesn't always cleanup the device node
- sleep 2
- rm -f "$mddev"
- fi
-}
-
-cleanup_md_and_teardown() {
- cleanup_md
- aux teardown
-}
-
# create 2 disk MD raid0 array (stripe_width=128K)
-test -b "$mddev" && skip
-mdadm --create --metadata=1.0 "$mddev" --auto=md --level 0 --raid-devices=2 --chunk 64 "$dev1" "$dev2"
-trap 'cleanup_md_and_teardown' EXIT # cleanup this MD device at the end of the test
-test -b "$mddev" || skip
-cp -LR "$mddev" "$DM_DEV_DIR" # so that LVM/DM can see the device
-lvmdev="$DM_DEV_DIR/md_lvm_test0"
-
-# TODO end MD-creation code
+aux prepare_md_dev 0 64 2 "$dev1" "$dev2"
-# maj=$(($(stat -L --printf=0x%t "$dev2")))
-# min=$(($(stat -L --printf=0x%T "$dev2")))
+aux lvmconf 'devices/md_component_detection = 1'
+aux extend_filter_LVMTEST
+aux extend_filter "a|/dev/md.*|"
-pvcreate $lvmdev
+pvdev=$(< MD_DEV_PV)
-pvscan --cache "$lvmdev"
+pvcreate "$pvdev"
# ensure that lvmetad can only see the toplevel MD device
-not pvscan --cache "$dev1" 2>&1 | grep "not found"
-not pvscan --cache "$dev2" 2>&1 | grep "not found"
-
-pvs | grep $lvmdev
-pvs | not grep "$dev1"
-pvs | not grep "$dev2"
+pvs | tee out
+grep "$pvdev" out
+not grep "$dev1" out
+not grep "$dev2" out
diff --git a/test/shell/lvmetad-pvscan-nomda-bg.sh b/test/shell/lvmetad-pvscan-nomda-bg.sh
new file mode 100644
index 000000000..e17dfd27b
--- /dev/null
+++ b/test/shell/lvmetad-pvscan-nomda-bg.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+. lib/inittest
+
+test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
+kill $(< LOCAL_LVMETAD)
+rm LOCAL_LVMETAD
+
+aux prepare_devs 2
+
+pvcreate --metadatacopies 0 "$dev1"
+pvcreate --metadatacopies 1 "$dev2"
+vgcreate $vg1 "$dev1" "$dev2"
+lvcreate -n foo -l 1 -an --zero n $vg1
+
+# start lvmetad but make sure it doesn't know about $dev1 or $dev2
+aux disable_dev "$dev1" "$dev2"
+aux prepare_lvmetad
+lvs
+mv LOCAL_LVMETAD XXX
+aux enable_dev "$dev2" "$dev1"
+mv XXX LOCAL_LVMETAD
+
+aux lvmconf 'global/use_lvmetad = 0'
+check inactive $vg1 foo
+aux lvmconf 'global/use_lvmetad = 1'
+
+pvscan --cache --background "$dev2" -aay
+
+aux lvmconf 'global/use_lvmetad = 0'
+# FIXME: inconclusive. may be a timing issue
+check inactive $vg1 foo
+aux lvmconf 'global/use_lvmetad = 1'
+
+pvscan --cache --background "$dev1" -aay
+
+aux lvmconf 'global/use_lvmetad = 0'
+i=100
+while ! check active $vg1 foo; do
+ test $i -lt 0 && fail "Failed to autoactivate"
+ sleep .1
+ i=$((i-1))
+done
+aux lvmconf 'global/use_lvmetad = 1'
+
+vgremove -ff $vg1
diff --git a/test/shell/lvmetad-pvscan-nomda.sh b/test/shell/lvmetad-pvscan-nomda.sh
index 39604d056..f7e13a72b 100644
--- a/test/shell/lvmetad-pvscan-nomda.sh
+++ b/test/shell/lvmetad-pvscan-nomda.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
kill $(< LOCAL_LVMETAD)
rm LOCAL_LVMETAD
@@ -23,13 +25,11 @@ vgcreate $vg1 "$dev1" "$dev2"
lvcreate -n foo -l 1 -an --zero n $vg1
# start lvmetad but make sure it doesn't know about $dev1 or $dev2
-aux disable_dev "$dev1"
-aux disable_dev "$dev2"
+aux disable_dev "$dev1" "$dev2"
aux prepare_lvmetad
lvs
mv LOCAL_LVMETAD XXX
-aux enable_dev "$dev2"
-aux enable_dev "$dev1"
+aux enable_dev "$dev2" "$dev1"
mv XXX LOCAL_LVMETAD
aux lvmconf 'global/use_lvmetad = 0'
diff --git a/test/shell/lvmetad-restart.sh b/test/shell/lvmetad-restart.sh
index e4136df4d..8805bc8b8 100644
--- a/test/shell/lvmetad-restart.sh
+++ b/test/shell/lvmetad-restart.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2"
diff --git a/test/shell/lvmetad-test.sh b/test/shell/lvmetad-test.sh
index 6ebd9adb4..146a7f19e 100644
--- a/test/shell/lvmetad-test.sh
+++ b/test/shell/lvmetad-test.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2" --test
diff --git a/test/shell/lvmetad-warning.sh b/test/shell/lvmetad-warning.sh
index 52bd885bc..44edb125a 100644
--- a/test/shell/lvmetad-warning.sh
+++ b/test/shell/lvmetad-warning.sh
@@ -12,6 +12,8 @@
. lib/inittest
test -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 2
vgcreate $vg1 "$dev1" "$dev2"
diff --git a/test/shell/lvresize-mirror.sh b/test/shell/lvresize-mirror.sh
index 0b9f526f7..423c9ed47 100644
--- a/test/shell/lvresize-mirror.sh
+++ b/test/shell/lvresize-mirror.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 5
for deactivate in true false; do
diff --git a/test/shell/lvresize-raid.sh b/test/shell/lvresize-raid.sh
index 8c7d909f6..1a1a75f02 100644
--- a/test/shell/lvresize-raid.sh
+++ b/test/shell/lvresize-raid.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 3 0 || skip
aux prepare_pvs 6 80
diff --git a/test/shell/lvresize-raid10.sh b/test/shell/lvresize-raid10.sh
index aead38534..4fe49f693 100644
--- a/test/shell/lvresize-raid10.sh
+++ b/test/shell/lvresize-raid10.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_raid 1 3 0 || skip
aux prepare_vg 5
diff --git a/test/shell/lvresize-rounding.sh b/test/shell/lvresize-rounding.sh
index d06dcf190..398740e0b 100644
--- a/test/shell/lvresize-rounding.sh
+++ b/test/shell/lvresize-rounding.sh
@@ -11,9 +11,11 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 3 22
-vgcreate -s 32K $vg "$dev1" "$dev2" "$dev3"
+vgcreate -s 32K $vg $(cat DEVICES)
lvcreate -an -Zn -l4 -i3 -I64 $vg
@@ -31,7 +33,9 @@ lvresize -l+64 -i3 -I128 $vg/$lv1
vgremove -f $vg
# 15 extents
+LVM_TEST_AUX_TRACE=yes
aux prepare_vg 3 22
+unset LVM_TEST_AUX_TRACE
# Block some extents
lvcreate -an -Zn -l4 -i3 $vg
diff --git a/test/shell/lvresize-thin-external-origin.sh b/test/shell/lvresize-thin-external-origin.sh
index e4463e2bd..090cb064f 100644
--- a/test/shell/lvresize-thin-external-origin.sh
+++ b/test/shell/lvresize-thin-external-origin.sh
@@ -10,12 +10,17 @@
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Test resize of thin volume with external origin
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 2 0 || skip
# Pretend we miss the external_origin_extend feature
-aux lvmconf "global/thin_disabled_features = [ \"external_origin_extend\" ]"
+aux lvmconf 'global/thin_disabled_features = [ "external_origin_extend" ]'
aux prepare_pvs 2
@@ -36,7 +41,11 @@ not lvresize -L+10 $vg/$lv1
# But reduction works
lvresize -L-5 -f $vg/$lv1
+check lv_field $vg/$lv1 lv_size "5.00" --units m --nosuffix
+
not lvresize -L+15 -y $vg/$lv1
+check lv_field $vg/$lv1 lv_size "5.00" --units m --nosuffix
+
# Try to resize again back up to the size of external origin
-# But for now we do not support zeroing for rexetended areas.
-not lvresize -L+5 -f $vg/$lv1
+lvresize -L+5 -f $vg/$lv1
+check lv_field $vg/$lv1 lv_size "10.00" --units m --nosuffix
diff --git a/test/shell/lvresize-thin-metadata.sh b/test/shell/lvresize-thin-metadata.sh
index 9e2ae4d0f..82ac898be 100644
--- a/test/shell/lvresize-thin-metadata.sh
+++ b/test/shell/lvresize-thin-metadata.sh
@@ -9,8 +9,12 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 10 0 || skip
aux prepare_pvs 3 1256
diff --git a/test/shell/lvresize-usage.sh b/test/shell/lvresize-usage.sh
index a3ba2cf00..00bae1426 100644
--- a/test/shell/lvresize-usage.sh
+++ b/test/shell/lvresize-usage.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2 80
lvcreate -L 10M -n lv -i2 $vg
diff --git a/test/shell/lvs-cache.sh b/test/shell/lvs-cache.sh
index 08e83bdad..73706b760 100644
--- a/test/shell/lvs-cache.sh
+++ b/test/shell/lvs-cache.sh
@@ -16,6 +16,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_cache 1 3 0 || skip
aux prepare_vg 5 8000
diff --git a/test/shell/mda-rollback.sh b/test/shell/mda-rollback.sh
index fa8e376d9..5f348fb9e 100644
--- a/test/shell/mda-rollback.sh
+++ b/test/shell/mda-rollback.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
vgcreate --metadatasize 128k $vg1 "$dev1" "$dev2" "$dev3"
diff --git a/test/shell/mdata-strings.sh b/test/shell/mdata-strings.sh
index a1598ad32..529e59990 100644
--- a/test/shell/mdata-strings.sh
+++ b/test/shell/mdata-strings.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# For udev impossible to create
test "$LVM_TEST_DEVDIR" = "/dev" && skip
diff --git a/test/shell/metadata-balance.sh b/test/shell/metadata-balance.sh
index 5ee0eba6a..869297e28 100644
--- a/test/shell/metadata-balance.sh
+++ b/test/shell/metadata-balance.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 6
echo Make sure we can ignore / un-ignore mdas on a per-PV basis
diff --git a/test/shell/metadata-dirs.sh b/test/shell/metadata-dirs.sh
index f4a41bc3c..eed651224 100644
--- a/test/shell/metadata-dirs.sh
+++ b/test/shell/metadata-dirs.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
pvcreate --metadatacopies 0 $(cat DEVICES)
diff --git a/test/shell/metadata.sh b/test/shell/metadata.sh
index 3adb68432..8e3503f74 100644
--- a/test/shell/metadata.sh
+++ b/test/shell/metadata.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 5
pvcreate "$dev1"
@@ -57,6 +59,9 @@ check pv_field "$dev3" pe_start $pv_align
pvs --units k -o name,pe_start,vg_mda_size,vg_name $(cat DEVICES)
+# vgconvert -M does not work with lvmetad
+test -e LOCAL_LVMETAD && exit 0
+
# upgrade from v1 to v2 metadata
vgconvert -M2 $vg
diff --git a/test/shell/mirror-names.sh b/test/shell/mirror-names.sh
index 9022ff99c..22372e163 100644
--- a/test/shell/mirror-names.sh
+++ b/test/shell/mirror-names.sh
@@ -42,6 +42,22 @@ lv_convert_lv_() {
get lv_field $1 convert_lv
}
+enable_devs() {
+ aux enable_dev "$dev1"
+ aux enable_dev "$dev2"
+ aux enable_dev "$dev3"
+ aux enable_dev "$dev4"
+ aux enable_dev "$dev5"
+}
+
+delay_devs() {
+ aux delay_dev "$dev1" 0 1000 $(get first_extent_sector "$dev1"):
+ aux delay_dev "$dev2" 0 1000 $(get first_extent_sector "$dev2"):
+ aux delay_dev "$dev3" 0 1000 $(get first_extent_sector "$dev3"):
+ aux delay_dev "$dev4" 0 1000 $(get first_extent_sector "$dev4"):
+ aux delay_dev "$dev5" 0 1000 $(get first_extent_sector "$dev5"):
+}
+
# ---------------------------------------------------------------------
# Common environment setup/cleanup for each sub testcases
@@ -101,12 +117,14 @@ check_and_cleanup_lvs_
#COMM "converting mirror names is ${lv1}_mimagetmp_2"
lvcreate -aey -l2 --type mirror -m1 -n $lv1 $vg
-lvconvert -m+1 -i+40 -b $vg/$lv1
+delay_devs
+LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -i+40 -b $vg/$lv1
convlv=$(lv_convert_lv_ $vg/$lv1)
test $convlv = ${lv1}_mimagetmp_2
lv_devices_ $vg/$lv1 $convlv ${lv1}_mimage_2
lv_devices_ $vg/$convlv ${lv1}_mimage_0 ${lv1}_mimage_1
lv_mirror_log_ $vg/$convlv ${lv1}_mlog
+enable_devs
#COMM "mirror log name after re-adding is ${lv1}_mlog" \
lvconvert -f --mirrorlog core $vg/$lv1
diff --git a/test/shell/mirror-vgreduce-removemissing.sh b/test/shell/mirror-vgreduce-removemissing.sh
index fd7169659..40a65039d 100644
--- a/test/shell/mirror-vgreduce-removemissing.sh
+++ b/test/shell/mirror-vgreduce-removemissing.sh
@@ -12,8 +12,13 @@
test_description="ensure that 'vgreduce --removemissing' works on mirrored LV"
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
lv_is_on_ ()
{
local lv=$vg/$1
diff --git a/test/shell/name-mangling.sh b/test/shell/name-mangling.sh
index ee3499456..6bdf4a1d9 100644
--- a/test/shell/name-mangling.sh
+++ b/test/shell/name-mangling.sh
@@ -15,6 +15,7 @@
# so skip duplicate CLMVD and lvmetad test
test -e LOCAL_CLVMD && skip
test -e LOCAL_LVMETAD && skip
+test -e LOCAL_LVMPOLLD && skip
CHARACTER_WHITELIST="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#+-.:=@_"
FAIL_MIXED_STR="contains mixed mangled and unmangled characters"
diff --git a/test/shell/nomda-missing.sh b/test/shell/nomda-missing.sh
index 7de458b2f..39c2ac9d0 100644
--- a/test/shell/nomda-missing.sh
+++ b/test/shell/nomda-missing.sh
@@ -12,6 +12,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 4
pvcreate "$dev1" "$dev2"
pvcreate --metadatacopies 0 "$dev3" "$dev4"
diff --git a/test/shell/nomda-restoremissing.sh b/test/shell/nomda-restoremissing.sh
index 01a77d068..9380c8dba 100644
--- a/test/shell/nomda-restoremissing.sh
+++ b/test/shell/nomda-restoremissing.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
pvchange --metadataignore y "$dev1"
diff --git a/test/shell/orphan-ondisk.sh b/test/shell/orphan-ondisk.sh
index 839507307..f4e18f440 100644
--- a/test/shell/orphan-ondisk.sh
+++ b/test/shell/orphan-ondisk.sh
@@ -11,5 +11,7 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2
vgreduce $vg "$dev1" 2>&1 | not grep -i 'parse error'
diff --git a/test/shell/pool-labels.sh b/test/shell/pool-labels.sh
index a2163e87f..70d5a64bd 100644
--- a/test/shell/pool-labels.sh
+++ b/test/shell/pool-labels.sh
@@ -13,6 +13,7 @@
# lvmetad does not handle pool labels so skip test.
test ! -e LOCAL_LVMETAD || skip
+test -e LOCAL_LVMPOLLD && skip
env printf "" || skip # skip if printf is not available
diff --git a/test/shell/process-each-duplicate-pvs.sh b/test/shell/process-each-duplicate-pvs.sh
new file mode 100644
index 000000000..353b29618
--- /dev/null
+++ b/test/shell/process-each-duplicate-pvs.sh
@@ -0,0 +1,135 @@
+#!/bin/sh
+# Copyright (C) 2008-2013 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+
+test_description='Test duplicate PVs'
+
+. lib/inittest
+
+aux prepare_devs 2
+
+pvcreate "$dev1"
+vgcreate $vg1 "$dev1"
+
+# Clone the PV
+dd if="$dev1" of="$dev2" bs=256K count=1 iflag=direct oflag=direct
+aux notify_lvmetad "$dev2"
+
+# When there are cloned devices (same pvid), one will be referenced in
+# lvmcache as pv->dev, and the other will not be referenced from lvmcache,
+# it'll only be in device cache. The one referenced by lvmcache is
+# referred to as the "preferred" one, and is the one that is printed by a
+# standard 'pvs' command.
+#
+# We don't know if dev1 or dev2 will be preferred, so we first check that
+# and save it as PV1, the other as PV2.
+#
+# The rules that 'pvs' follows to choose which PVs to display are
+# somewhat strange and seem arbitrary from a user perspective;
+# the choice is driven largely by what's most practical in the code,
+# but also by what vgimportclone needs.
+#
+# Some of the rules that process_each_pv is using:
+# - When no pv arg is specified, print the one preferred dev.
+# - When pv args are specified, print one line per specified arg,
+# i.e. don't print all duplicate pvs when one is specified.
+# - Always print the preferred duplicate, even if it was not the
+# one specified, e.g. If there are two duplicates on A and B,
+# and A is the preferred device, then 'pvs A' will show A and
+# 'pvs B' will show A.
+# - If multiple duplicates are specified, then print each, e.g.
+# 'pvs A B' will show both A and B.
+# - If three duplicates exist on A, B, C, and the preferred is A,
+# and the command 'pvs B C' is run, then the A will be printed
+# first since we always print the preferred device, and then
+# either B or C will be printed. 'pvs A B C' will print all.
+# - 'pvs -a' should print all the duplicates and should show
+# the same VG for each.
+# - 'pvs -o+size ...' should show the correct size of the
+# devices being printed if they differ among the duplicates.
+# - By using 'pvs --config' with a filter, you can filter out
+# the duplicate devs you don't want so that pvs will
+# print the devs you do want to see.
+#
+# The tests below check these behaviors on up to two duplicates,
+# so if the process_each_pv logic changes regarding which
+# duplicates are chosen, then this test will need adjusting.
+
+# Verify that there is only one PV printed, i.e. the preferred
+pvs --noheading | tee out
+test $(wc -l < out) -eq 1
+
+# Set PV1 to the perferred/cached PV, and PV2 to the other.
+# Cannot use pvs -o pv_name because that command goes to
+# disk and does not represent what lvmetad thinks.
+PV1=$(pvs --noheading | awk '{ print $1 }')
+echo PV1 is $PV1
+if [ $PV1 == $dev1 ]; then
+ PV2=$dev2
+else
+ PV2=$dev1
+fi
+echo PV2 is $PV2
+
+# check listed pvs
+pvs --noheading | tee out
+grep $PV1 out
+not grep $PV2 out
+
+# check error messages
+pvs --noheading 2>&1 | tee out
+grep "Found duplicate" out >err
+grep "using $PV1 not $PV2" err
+
+# check listed pvs
+pvs --noheading "$dev1" | tee out
+grep $PV1 out
+not grep $PV2 out
+
+# check error messages
+pvs --noheading "$dev1" 2>&1 | tee out
+grep "Found duplicate" out >err
+grep "using $PV1 not $PV2" err
+
+# check listed pvs
+pvs --noheading "$dev2" | tee out
+grep $PV1 out
+not grep $PV2 out
+
+# check error messages
+pvs --noheading "$dev2" 2>&1 | tee out
+grep "Found duplicate" out >err
+grep "using $PV1 not $PV2" err
+
+# check listed pvs
+pvs --noheading "$dev1" "$dev2" | tee out
+grep $PV1 out
+grep $PV2 out
+
+# check error messages
+pvs --noheading "$dev1" "$dev2" 2>&1 | tee out
+grep "Found duplicate" out >err
+grep "using $PV1 not $PV2" err
+
+# check listed pvs
+pvs --noheading -a | tee out
+grep $PV1 out
+grep $PV2 out
+grep $PV1 out | grep $vg1
+grep $PV2 out | grep $vg1
+
+# check error messages
+pvs --noheading -a 2>&1 | tee out
+grep "Found duplicate" out >err
+grep "using $PV1 not $PV2" err
+
+
+# TODO: I'd like to test that a subsystem device is preferred
+# over a non-subsystem device, but all the devices used here
+# are DM devices, i.e. they are already subsystem devices,
+# so I can't just wrap a standard block device with a DM
+# identity mapping.
+
diff --git a/test/shell/process-each-duplicate-vgnames.sh b/test/shell/process-each-duplicate-vgnames.sh
index 5867fd166..f2b47650c 100644
--- a/test/shell/process-each-duplicate-vgnames.sh
+++ b/test/shell/process-each-duplicate-vgnames.sh
@@ -9,13 +9,14 @@ test_description='Test vgs with duplicate vg names'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate "$dev1"
pvcreate "$dev2"
-aux disable_dev "$dev1"
-aux disable_dev "$dev2"
+aux disable_dev "$dev1" "$dev2"
aux enable_dev "$dev1"
vgscan
@@ -35,8 +36,7 @@ vgscan
pvs "$dev1"
pvs "$dev2"
-vgs -o+vg_uuid >err
-cat err
+vgs -o+vg_uuid | tee err
grep $UUID1 err
grep $UUID2 err
@@ -45,18 +45,15 @@ grep $UUID2 err
# grep $UUID1 err
aux disable_dev "$dev2"
-vgs -o+vg_uuid >err
-cat err
+vgs -o+vg_uuid | tee err
grep $UUID1 err
not grep $UUID2 err
aux enable_dev "$dev2"
vgscan
aux disable_dev "$dev1"
-vgs -o+vg_uuid >err
-cat err
+vgs -o+vg_uuid | tee err
grep $UUID2 err
not grep $UUID1 err
aux enable_dev "$dev1"
vgscan
-
diff --git a/test/shell/process-each-lv.sh b/test/shell/process-each-lv.sh
index 0260951ca..52c97d4da 100644
--- a/test/shell/process-each-lv.sh
+++ b/test/shell/process-each-lv.sh
@@ -11,6 +11,9 @@
test_description='Exercise toollib process_each_lv'
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
aux prepare_devs 10
@@ -38,11 +41,11 @@ aux prepare_devs 10
prepare_vgs_() {
# set up vgs/lvs that we will remove
- vgcreate $vg1 "$dev1" "$dev2"
- vgcreate $vg2 "$dev3" "$dev4"
- vgcreate $vg3 "$dev5" "$dev6"
- vgcreate $vg4 "$dev7" "$dev8"
- vgcreate $vg5 "$dev9" "$dev10"
+ vgcreate $SHARED $vg1 "$dev1" "$dev2"
+ vgcreate $SHARED $vg2 "$dev3" "$dev4"
+ vgcreate $SHARED $vg3 "$dev5" "$dev6"
+ vgcreate $SHARED $vg4 "$dev7" "$dev8"
+ vgcreate $SHARED $vg5 "$dev9" "$dev10"
lvcreate -Zn -an -l 2 -n $lv1 $vg1
lvcreate -Zn -an -l 2 -n $lv1 $vg2
lvcreate -Zn -an -l 2 -n $lv2 $vg2
@@ -651,3 +654,5 @@ not grep $vg5-$lv2 err
not grep $vg5-$lv3 err
not grep $vg5-$lv4 err
not grep $vg5-$lv5 err
+
+vgremove -f $vg1 $vg2 $vg3 $vg4 $vg5
diff --git a/test/shell/process-each-pv-nomda-all.sh b/test/shell/process-each-pv-nomda-all.sh
index 93eb0bd1a..1a2af58c0 100644
--- a/test/shell/process-each-pv-nomda-all.sh
+++ b/test/shell/process-each-pv-nomda-all.sh
@@ -13,6 +13,8 @@ test_description='Test process_each_pv with zero mda'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 14
# for vg1
@@ -57,4 +59,3 @@ grep "$dev11" err
grep "$dev12" err
grep "$dev13" err
grep "$dev14" err
-
diff --git a/test/shell/process-each-pv-nomda.sh b/test/shell/process-each-pv-nomda.sh
index 82e6bc4ee..642f2e410 100644
--- a/test/shell/process-each-pv-nomda.sh
+++ b/test/shell/process-each-pv-nomda.sh
@@ -13,6 +13,8 @@ test_description='Test process_each_pv with zero mda'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate "$dev1" --metadatacopies 0
diff --git a/test/shell/process-each-pv.sh b/test/shell/process-each-pv.sh
index d3a1863e2..426c3f1e9 100644
--- a/test/shell/process-each-pv.sh
+++ b/test/shell/process-each-pv.sh
@@ -20,10 +20,9 @@ aux prepare_devs 14
# pvdisplay
# pvresize
# pvs
-# vgreduce
#
-# process-each-pvresize.sh covers pvresize,
-# the others are covered here.
+# process-each-pvresize.sh covers pvresize.
+# process-each-vgreduce.sh covers vgreduce.
#
@@ -34,9 +33,9 @@ aux prepare_devs 14
# dev1 matchines dev10,dev11,etc
#
-vgcreate $vg1 "$dev10"
-vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
-vgcreate $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
+vgcreate $SHARED $vg1 "$dev10"
+vgcreate $SHARED $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
+vgcreate $SHARED $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
pvchange --addtag V2D3 "$dev3"
pvchange --addtag V2D4 "$dev4"
@@ -712,173 +711,6 @@ not grep "$dev14" err
#
-# test vgreduce
-#
-
-# fail without dev
-not vgreduce $vg2
-
-
-# fail with dev and -a
-not vgreduce $vg2 "$dev2" -a
-check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-
-
-# remove one pv
-vgreduce $vg2 "$dev2"
-not check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev2"
-
-
-# remove two pvs
-vgreduce $vg2 "$dev2" "$dev3"
-not check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev2" "$dev3"
-pvchange --addtag V2D3 "$dev3"
-
-
-# remove one pv with tag
-vgreduce $vg2 @V2D3
-check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev3"
-pvchange --addtag V2D3 "$dev3"
-
-
-# remove two pvs, each with different tag
-vgreduce $vg2 @V2D3 @V2D4
-check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-not check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev3" "$dev4"
-pvchange --addtag V2D3 "$dev3"
-pvchange --addtag V2D4 "$dev4"
-pvchange --addtag V2D45 "$dev4"
-
-
-# remove two pvs, both with same tag
-vgreduce $vg2 @V2D45
-check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-not check pv_field "$dev4" vg_name $vg2
-not check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev4" "$dev5"
-pvchange --addtag V2D4 "$dev4"
-pvchange --addtag V2D45 "$dev4"
-pvchange --addtag V2D5 "$dev5"
-pvchange --addtag V2D45 "$dev5"
-
-
-# remove two pvs, one by name, one by tag
-vgreduce $vg2 "$dev2" @V2D3
-not check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev2" "$dev3"
-pvchange --addtag V2D3 "$dev3"
-
-
-# remove one pv by tag, where another vg has a pv with same tag
-pvchange --addtag V2D5V3D9 "$dev5"
-pvchange --addtag V2D5V3D9 "$dev9"
-vgreduce $vg2 @V2D5V3D9
-check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-not check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev5"
-pvchange --addtag V2D5 "$dev5"
-pvchange --addtag V2D45 "$dev5"
-
-
-# fail to remove last pv (don't know which will be last)
-not vgreduce -a $vg2
-# reset
-vgremove $vg2
-vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
-pvchange --addtag V2D3 "$dev3"
-pvchange --addtag V2D4 "$dev4"
-pvchange --addtag V2D45 "$dev4"
-pvchange --addtag V2D5 "$dev5"
-pvchange --addtag V2D45 "$dev5"
-
-
-# lvcreate on one pv to make it used
-# remove all unused pvs
-lvcreate -n $lv1 -l 2 $vg2 "$dev2"
-not vgreduce -a $vg2
-check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-not check pv_field "$dev4" vg_name $vg2
-not check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev3" "$dev4" "$dev5"
-pvchange --addtag V2D3 "$dev3"
-pvchange --addtag V2D4 "$dev4"
-pvchange --addtag V2D45 "$dev4"
-pvchange --addtag V2D5 "$dev5"
-pvchange --addtag V2D45 "$dev5"
-lvchange -an $vg2/$lv1
-lvremove $vg2/$lv1
-
-
-#
# tests including pvs without mdas
#
@@ -915,9 +747,9 @@ pvcreate "$dev14" --metadatacopies 0
# dev12
# dev13
-vgcreate $vg1 "$dev10"
-vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
-vgcreate $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
+vgcreate $SHARED $vg1 "$dev10"
+vgcreate $SHARED $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
+vgcreate $SHARED $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
pvchange --addtag V2D3 "$dev3"
pvchange --addtag V2D4 "$dev4"
@@ -1226,58 +1058,4 @@ grep "$dev12" err
grep "$dev13" err
grep "$dev14" err
-
-#
-# vgreduce including pvs without mdas
-#
-
-# remove pv without mda
-vgreduce $vg2 "$dev2"
-not check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev2"
-
-# remove pv with mda and pv without mda
-vgreduce $vg2 "$dev2" "$dev3"
-not check pv_field "$dev2" vg_name $vg2
-not check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-# reset
-vgextend $vg2 "$dev2"
-vgextend $vg2 "$dev3"
-
-# fail to remove only pv with mda
-not vgreduce $vg3 "$dev9"
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-
-# remove by tag a pv without mda
-vgreduce $vg3 @V3D8
-check pv_field "$dev6" vg_name $vg3
-check pv_field "$dev7" vg_name $vg3
-not check pv_field "$dev8" vg_name $vg3
-check pv_field "$dev9" vg_name $vg3
-check pv_field "$dev2" vg_name $vg2
-check pv_field "$dev3" vg_name $vg2
-check pv_field "$dev4" vg_name $vg2
-check pv_field "$dev5" vg_name $vg2
-# reset
-vgextend $vg3 "$dev8"
+vgremove $vg1 $vg2 $vg3
diff --git a/test/shell/process-each-pvresize.sh b/test/shell/process-each-pvresize.sh
index 02fe081c8..493c24bcf 100644
--- a/test/shell/process-each-pvresize.sh
+++ b/test/shell/process-each-pvresize.sh
@@ -13,6 +13,8 @@ test_description='Exercise toollib process_each_pv'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 14
#
diff --git a/test/shell/process-each-vg.sh b/test/shell/process-each-vg.sh
index e8690f51e..49bc4f91b 100644
--- a/test/shell/process-each-vg.sh
+++ b/test/shell/process-each-vg.sh
@@ -26,16 +26,16 @@ aux prepare_devs 6
#
# set up four vgs that we will remove
#
-vgcreate $vg1 "$dev1"
-vgcreate $vg2 "$dev2"
-vgcreate $vg3 "$dev3"
-vgcreate $vg4 "$dev4"
+vgcreate $SHARED $vg1 "$dev1"
+vgcreate $SHARED $vg2 "$dev2"
+vgcreate $SHARED $vg3 "$dev3"
+vgcreate $SHARED $vg4 "$dev4"
# these two vgs will not be removed
-vgcreate $vg5 "$dev5"
+vgcreate $SHARED $vg5 "$dev5"
vgchange --addtag tagvg5 $vg5
lvcreate -l 4 -n $lv1 $vg5
-vgcreate $vg6 "$dev6"
+vgcreate $SHARED $vg6 "$dev6"
lvcreate -l 4 -n $lv2 $vg6
# should fail without any arg
@@ -65,10 +65,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
# should do nothing and fail
@@ -93,10 +93,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
vgremove @tagfoo
@@ -111,10 +111,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
vgremove $vg1 @tagfoo2
@@ -129,10 +129,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
vgremove @foo @tagfoo2 $vg1 $vg2
@@ -145,10 +145,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
vgremove @tagfoo $vg1 @tagfoo @tagfoo2 $vg3 @tagbar
@@ -161,10 +161,10 @@ not vgs $vg4
#
# set up four vgs that we will remove
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
not vgremove garbage $vg1
@@ -196,10 +196,10 @@ not vgs $vg6
#
# set up four vgs that we will report
#
-vgcreate --addtag tagfoo $vg1 "$dev1"
-vgcreate --addtag tagfoo $vg2 "$dev2"
-vgcreate --addtag tagfoo2 $vg3 "$dev3"
-vgcreate --addtag tagbar $vg4 "$dev4"
+vgcreate $SHARED --addtag tagfoo $vg1 "$dev1"
+vgcreate $SHARED --addtag tagfoo $vg2 "$dev2"
+vgcreate $SHARED --addtag tagfoo2 $vg3 "$dev3"
+vgcreate $SHARED --addtag tagbar $vg4 "$dev4"
vgchange --addtag foo $vg4
vgs >err
@@ -262,3 +262,5 @@ not grep $vg1 err
not grep $vg2 err
not grep $vg3 err
+vgremove -f $vg1 $vg2 $vg3 $vg4
+
diff --git a/test/shell/process-each-vgreduce.sh b/test/shell/process-each-vgreduce.sh
new file mode 100644
index 000000000..9c8c6370a
--- /dev/null
+++ b/test/shell/process-each-vgreduce.sh
@@ -0,0 +1,327 @@
+#!/bin/sh
+# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Exercise toollib process_each_pv with vgreduce'
+
+. lib/inittest
+
+aux prepare_devs 14
+
+#
+# set up
+#
+# FIXME: some of the setup may not be used by the tests
+# since this was split out from process-each-pv, where
+# some of the setup was used by other tests that only
+# remain in process-each-pv.
+#
+# use use dev10 instead of dev1 because simple grep for
+# dev1 matchines dev10,dev11,etc
+#
+
+vgcreate $vg1 "$dev10"
+vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
+vgcreate $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
+
+pvchange --addtag V2D3 "$dev3"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+
+pvchange --addtag V3 "$dev6" "$dev7" "$dev8" "$dev9"
+pvchange --addtag V3D9 "$dev9"
+
+# orphan
+pvcreate "$dev11"
+
+# dev (a non-pv device)
+pvcreate "$dev12"
+pvremove "$dev12"
+
+# dev13 is intentionally untouched so we can
+# test that it is handled appropriately as a non-pv
+
+# orphan
+pvcreate "$dev14"
+
+
+# fail without dev
+not vgreduce $vg2
+
+
+# fail with dev and -a
+not vgreduce $vg2 "$dev2" -a
+check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+
+
+# remove one pv
+vgreduce $vg2 "$dev2"
+not check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev2"
+
+
+# remove two pvs
+vgreduce $vg2 "$dev2" "$dev3"
+not check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev2" "$dev3"
+pvchange --addtag V2D3 "$dev3"
+
+
+# remove one pv with tag
+vgreduce $vg2 @V2D3
+check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev3"
+pvchange --addtag V2D3 "$dev3"
+
+
+# remove two pvs, each with different tag
+vgreduce $vg2 @V2D3 @V2D4
+check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+not check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev3" "$dev4"
+pvchange --addtag V2D3 "$dev3"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+
+
+# remove two pvs, both with same tag
+vgreduce $vg2 @V2D45
+check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+not check pv_field "$dev4" vg_name $vg2
+not check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev4" "$dev5"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+
+
+# remove two pvs, one by name, one by tag
+vgreduce $vg2 "$dev2" @V2D3
+not check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev2" "$dev3"
+pvchange --addtag V2D3 "$dev3"
+
+
+# remove one pv by tag, where another vg has a pv with same tag
+pvchange --addtag V2D5V3D9 "$dev5"
+pvchange --addtag V2D5V3D9 "$dev9"
+vgreduce $vg2 @V2D5V3D9
+check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+not check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev5"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+
+
+# fail to remove last pv (don't know which will be last)
+not vgreduce -a $vg2
+# reset
+vgremove $vg2
+vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
+pvchange --addtag V2D3 "$dev3"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+
+
+# lvcreate on one pv to make it used
+# remove all unused pvs
+lvcreate -n $lv1 -l 2 $vg2 "$dev2"
+not vgreduce -a $vg2
+check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+not check pv_field "$dev4" vg_name $vg2
+not check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev3" "$dev4" "$dev5"
+pvchange --addtag V2D3 "$dev3"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+lvchange -an $vg2/$lv1
+lvremove $vg2/$lv1
+
+
+#
+# tests including pvs without mdas
+#
+
+# remove old config
+vgremove $vg1
+vgremove $vg2
+vgremove $vg3
+pvremove "$dev11"
+pvremove "$dev14"
+
+# new config with some pvs that have zero mdas
+
+# for vg1
+pvcreate "$dev10"
+
+# for vg2
+pvcreate "$dev2" --metadatacopies 0
+pvcreate "$dev3"
+pvcreate "$dev4"
+pvcreate "$dev5"
+
+# for vg3
+pvcreate "$dev6" --metadatacopies 0
+pvcreate "$dev7" --metadatacopies 0
+pvcreate "$dev8" --metadatacopies 0
+pvcreate "$dev9"
+
+# orphan with mda
+pvcreate "$dev11"
+# orphan without mda
+pvcreate "$dev14" --metadatacopies 0
+
+# non-pv devs
+# dev12
+# dev13
+
+vgcreate $vg1 "$dev10"
+vgcreate $vg2 "$dev2" "$dev3" "$dev4" "$dev5"
+vgcreate $vg3 "$dev6" "$dev7" "$dev8" "$dev9"
+
+pvchange --addtag V2D3 "$dev3"
+pvchange --addtag V2D4 "$dev4"
+pvchange --addtag V2D45 "$dev4"
+pvchange --addtag V2D5 "$dev5"
+pvchange --addtag V2D45 "$dev5"
+
+pvchange --addtag V3 "$dev6" "$dev7" "$dev8" "$dev9"
+pvchange --addtag V3D8 "$dev8"
+pvchange --addtag V3D9 "$dev9"
+
+
+#
+# vgreduce including pvs without mdas
+#
+
+# remove pv without mda
+vgreduce $vg2 "$dev2"
+not check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev2"
+
+# remove pv with mda and pv without mda
+vgreduce $vg2 "$dev2" "$dev3"
+not check pv_field "$dev2" vg_name $vg2
+not check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+# reset
+vgextend $vg2 "$dev2"
+vgextend $vg2 "$dev3"
+
+# fail to remove only pv with mda
+not vgreduce $vg3 "$dev9"
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+
+# remove by tag a pv without mda
+vgreduce $vg3 @V3D8
+check pv_field "$dev6" vg_name $vg3
+check pv_field "$dev7" vg_name $vg3
+not check pv_field "$dev8" vg_name $vg3
+check pv_field "$dev9" vg_name $vg3
+check pv_field "$dev2" vg_name $vg2
+check pv_field "$dev3" vg_name $vg2
+check pv_field "$dev4" vg_name $vg2
+check pv_field "$dev5" vg_name $vg2
+# reset
+vgextend $vg3 "$dev8"
+
+vgremove $vg1 $vg2 $vg3
diff --git a/test/shell/profiles-thin.sh b/test/shell/profiles-thin.sh
index 3519985f0..5940cc6d2 100644
--- a/test/shell/profiles-thin.sh
+++ b/test/shell/profiles-thin.sh
@@ -13,8 +13,12 @@
# test thin profile functionality
#
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
DEV_SIZE=32
# check we have thinp support compiled in
@@ -24,7 +28,14 @@ aux prepare_profiles "thin-performance"
# Create scsi debug dev with sector size of 4096B and 1MiB optimal_io_size
aux prepare_scsi_debug_dev $DEV_SIZE sector_size=4096 opt_blks=256 || skip
+EXPECT=1048576
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/optimal_io_size "$EXPECT"
aux prepare_pvs 1 $DEV_SIZE
+
+# Check we are not running on buggy kernel (broken lcm())
+# If so, turn chunk_size test into 'should'
+check sysfs "$dev1" queue/optimal_io_size "$EXPECT" || SHOULD=should
+
vgcreate $vg "$dev1"
# By default, "generic" policy is used to
@@ -43,11 +54,16 @@ check lv_field $vg/pool_generic zero "zero"
# under "thin-perforance" profile.
lvcreate --profile thin-performance -L8m -T $vg/pool_performance
check lv_field $vg/pool_performance profile "thin-performance"
-check lv_field $vg/pool_performance chunk_size 1.00m
+$SHOULD check lv_field $vg/pool_performance chunk_size 1.00m
check lv_field $vg/pool_performance zero ""
vgremove -ff $vg
+if test -d "$DM_DEV_DIR/$vg" ; then
+ should not echo "Udev has left \"$DM_DEV_DIR/$vg\"!"
+ rm -rf "$DM_DEV_DIR/$vg"
+fi
+
# The profile must be also applied if using the profile
# for the whole VG - any LVs inherit this profile then.
vgcreate --profile thin-performance $vg "$dev1"
@@ -55,5 +71,5 @@ lvcreate -L8m -T $vg/pool_performance_inherited
# ...the LV does not have the profile attached, but VG does!
check vg_field $vg profile "thin-performance"
check lv_field $vg/pool_performance_inherited profile ""
-check lv_field $vg/pool_performance_inherited chunk_size 1.00m
+$SHOULD check lv_field $vg/pool_performance_inherited chunk_size 1.00m
check lv_field $vg/pool_performance_inherited zero ""
diff --git a/test/shell/profiles.sh b/test/shell/profiles.sh
index e054c8887..81871296d 100644
--- a/test/shell/profiles.sh
+++ b/test/shell/profiles.sh
@@ -15,6 +15,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
MSG_FAILED_TO_APPLY_CMD_PROFILE="Failed to apply command profile"
MSG_IGNORING_INVALID_CMD_PROFILE="Ignoring invalid command profile"
MSG_FAILED_TO_APPLY_MDA_PROFILE="Failed to apply metadata profile"
diff --git a/test/shell/pv-duplicate-uuid.sh b/test/shell/pv-duplicate-uuid.sh
index 1763142a9..7b8b42ecf 100644
--- a/test/shell/pv-duplicate-uuid.sh
+++ b/test/shell/pv-duplicate-uuid.sh
@@ -12,6 +12,8 @@
# Test 'Found duplicate' is shown
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
pvcreate "$dev1"
@@ -19,7 +21,7 @@ UUID1=$(get pv_field "$dev1" uuid)
pvcreate --config "devices{filter=[\"a|$dev2|\",\"r|.*|\"]}" -u "$UUID1" --norestorefile "$dev2"
pvcreate --config "devices{filter=[\"a|$dev3|\",\"r|.*|\"]}" -u "$UUID1" --norestorefile "$dev3"
-pvs -o+uuid |& tee out
+pvs -o+uuid 2>&1 | tee out
COUNT=$(should grep --count "Found duplicate" out)
# FIXME lvmetad is not able to serve properly this case
diff --git a/test/shell/pv-duplicate.sh b/test/shell/pv-duplicate.sh
index ede960022..ecd61256f 100644
--- a/test/shell/pv-duplicate.sh
+++ b/test/shell/pv-duplicate.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2011-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -13,8 +13,14 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
+pvcreate "$dev1"
+
+dd if="$dev1" of=backup_dev1 bs=256K count=1
+
vgcreate --metadatasize 128k $vg1 "$dev1"
# copy mda
@@ -22,4 +28,12 @@ dd if="$dev1" of="$dev2" bs=256K count=1
dd if="$dev1" of="$dev3" bs=256K count=1
pvs "$dev3" -o pv_uuid
+
vgs $vg1
+
+dd if=backup_dev1 of="$dev3" bs=256K count=1
+pvs
+#-vvvv
+# TODO: Surely needs more inspecition about correct
+# behavior for such case
+# vgs $vg1
diff --git a/test/shell/pv-min-size.sh b/test/shell/pv-min-size.sh
index d29e94c9a..eb62c73a6 100644
--- a/test/shell/pv-min-size.sh
+++ b/test/shell/pv-min-size.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# use small default size - 512KB
aux lvmconf 'devices/pv_min_size = 512'
diff --git a/test/shell/pv-range-overflow.sh b/test/shell/pv-range-overflow.sh
index 4caee02a5..f6b96223d 100644
--- a/test/shell/pv-range-overflow.sh
+++ b/test/shell/pv-range-overflow.sh
@@ -15,7 +15,7 @@
aux prepare_vg 2
-lvcreate -L4 -n"$lv" $vg
+lvcreate -L4 -n $lv $vg
# Test for the bogus diagnostic reported in BZ 284771
# http://bugzilla.redhat.com/284771.
diff --git a/test/shell/pvchange-usage.sh b/test/shell/pvchange-usage.sh
index e5695b6a0..8cffb0689 100644
--- a/test/shell/pvchange-usage.sh
+++ b/test/shell/pvchange-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
check_changed_uuid_() {
test "$1" != "$(get pv_field "$2" uuid)" || die "UUID has not changed!"
}
diff --git a/test/shell/pvcreate-bootloaderarea.sh b/test/shell/pvcreate-bootloaderarea.sh
index 53a983973..68c08822f 100644
--- a/test/shell/pvcreate-bootloaderarea.sh
+++ b/test/shell/pvcreate-bootloaderarea.sh
@@ -13,9 +13,10 @@ test_description='Test pvcreate bootloader area support'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 1
-aux lvmconf 'global/suffix=0'
-aux lvmconf 'global/units="b"'
+aux lvmconf 'global/suffix=0' 'global/units="b"'
#COMM 'pvcreate sets/aligns bootloader area correctly'
pvcreate --dataalignment 262144b --bootloaderareasize 614400b "$dev1"
diff --git a/test/shell/pvcreate-ff.sh b/test/shell/pvcreate-ff.sh
index 1cbf0ff1b..4ac5a611e 100644
--- a/test/shell/pvcreate-ff.sh
+++ b/test/shell/pvcreate-ff.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate "$dev1"
vgcreate foo "$dev1"
diff --git a/test/shell/pvcreate-metadata0.sh b/test/shell/pvcreate-metadata0.sh
index 263bd88f7..48ebc83a2 100644
--- a/test/shell/pvcreate-metadata0.sh
+++ b/test/shell/pvcreate-metadata0.sh
@@ -17,6 +17,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2 128
#lv_snap=$lv2
diff --git a/test/shell/pvcreate-operation-md.sh b/test/shell/pvcreate-operation-md.sh
index f959c0bf0..db532168b 100644
--- a/test/shell/pvcreate-operation-md.sh
+++ b/test/shell/pvcreate-operation-md.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2009 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2009-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -11,12 +11,10 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# skip this test if mdadm or sfdisk (or others) aren't available
-which mdadm || skip
which sfdisk || skip
-which perl || skip
-which awk || skip
-which cut || skip
test -f /proc/mdstat && grep -q raid0 /proc/mdstat || \
modprobe raid0 || skip
@@ -27,132 +25,97 @@ aux extend_filter "a|/dev/md.*|"
aux prepare_devs 2
-# Have MD use a non-standard name to avoid colliding with an existing MD device
-# - mdadm >= 3.0 requires that non-standard device names be in /dev/md/
-# - newer mdadm _completely_ defers to udev to create the associated device node
-mdadm_maj=$(mdadm --version 2>&1 | perl -pi -e 's|.* v(\d+).*|\1|')
-[ $mdadm_maj -ge 3 ] && \
- mddev=/dev/md/md_lvm_test0 || \
- mddev=/dev/md_lvm_test0
-
-cleanup_md() {
- # sleeps offer hack to defeat: 'md: md127 still in use'
- # see: https://bugzilla.redhat.com/show_bug.cgi?id=509908#c25
- aux udev_wait
- mdadm --stop "$mddev" || true
- aux udev_wait
- if [ -b "$mddev" ]; then
- # mdadm doesn't always cleanup the device node
- sleep 2
- rm -f "$mddev"
- fi
-}
-
-cleanup_md_and_teardown() {
- cleanup_md
- aux teardown
-}
-
# create 2 disk MD raid0 array (stripe_width=128K)
-test -b "$mddev" && skip
-mdadm --create --metadata=1.0 "$mddev" --auto=md --level 0 --raid-devices=2 --chunk 64 "$dev1" "$dev2"
-trap 'cleanup_md_and_teardown' EXIT # cleanup this MD device at the end of the test
-test -b "$mddev" || skip
-cp -LR "$mddev" "$DM_DEV_DIR" # so that LVM/DM can see the device
-lvmdev="$DM_DEV_DIR/md_lvm_test0"
+aux prepare_md_dev 0 64 2 "$dev1" "$dev2"
+
+mddev=$(< MD_DEV)
+pvdev=$(< MD_DEV_PV)
# Test alignment of PV on MD without any MD-aware or topology-aware detection
# - should treat $mddev just like any other block device
-pv_align="1.00m"
pvcreate --metadatasize 128k \
--config 'devices {md_chunk_alignment=0 data_alignment_detection=0 data_alignment_offset_detection=0}' \
- "$lvmdev"
+ "$pvdev"
-check pv_field "$lvmdev" pe_start $pv_align
+check pv_field "$pvdev" pe_start "1.00m"
# Test md_chunk_alignment independent of topology-aware detection
-pv_align="1.00m"
pvcreate --metadatasize 128k \
--config 'devices {data_alignment_detection=0 data_alignment_offset_detection=0}' \
- "$lvmdev"
-check pv_field "$lvmdev" pe_start $pv_align
-
+ "$pvdev"
+check pv_field "$pvdev" pe_start "1.00m"
# Test newer topology-aware alignment detection
# - first added to 2.6.31 but not "reliable" until 2.6.33
-if kernel_at_least 2 6 33 ; then
- pv_align="1.00m"
+if aux kernel_at_least 2 6 33 ; then
# optimal_io_size=131072, minimum_io_size=65536
pvcreate --metadatasize 128k \
- --config 'devices { md_chunk_alignment=0 }' "$lvmdev"
- check pv_field "$lvmdev" pe_start $pv_align
+ --config 'devices { md_chunk_alignment=0 }' "$pvdev"
+ check pv_field "$pvdev" pe_start "1.00m"
+ pvremove "$pvdev"
fi
# partition MD array directly, depends on blkext in Linux >= 2.6.28
-if kernel_at_least 2 6 28 ; then
+if aux kernel_at_least 2 6 28 ; then
# create one partition
sfdisk "$mddev" <<EOF
,,83
EOF
+ # Wait till all partition links in udev are created
+ aux udev_wait
+
+ # Skip test if udev rule has not created proper links for partitions
+ test -b "${mddev}p1" || { ls -laR /dev ; skip "Missing partition link" ; }
+
pvscan
# make sure partition on MD is _not_ removed
# - tests partition -> parent lookup via sysfs paths
- not pvcreate --metadatasize 128k "$lvmdev"
+ not pvcreate --metadatasize 128k "$pvdev"
# verify alignment_offset is accounted for in pe_start
# - topology infrastructure is available in Linux >= 2.6.31
# - also tests partition -> parent lookup via sysfs paths
- # Oh joy: need to lookup /sys/block/md127 rather than /sys/block/md_lvm_test0
- mddev_maj_min=$(ls -lL "$mddev" | awk '{ print $5 $6 }' | perl -pi -e 's|,|:|')
- mddev_p_sysfs_name=$(echo /sys/dev/block/${mddev_maj_min}/*p1)
- base_mddev_p=`basename $mddev_p_sysfs_name`
- mddev_p=/dev/${base_mddev_p}
-
- # in case the system is running without devtmpfs /dev
- # wait here for created device node on tmpfs
- aux udev_wait "$mddev_p"
- test -b "$mddev_p" || skip
- cp -LR "$mddev_p" "$DM_DEV_DIR"
- lvmdev_p="$DM_DEV_DIR/$base_mddev_p"
-
# Checking for 'alignment_offset' in sysfs implies Linux >= 2.6.31
# but reliable alignment_offset support requires kernel.org Linux >= 2.6.33
- sysfs_alignment_offset=/sys/dev/block/${mddev_maj_min}/${base_mddev_p}/alignment_offset
- [ -f $sysfs_alignment_offset ] && kernel_at_least 2 6 33 && \
- alignment_offset=`cat $sysfs_alignment_offset` || \
- alignment_offset=0
-
- if [ $alignment_offset -gt 0 ]; then
- # default alignment is 1M, add alignment_offset
- pv_align=$((1048576+$alignment_offset))B
- pvcreate --metadatasize 128k "$lvmdev_p"
- check pv_field "$lvmdev_p" pe_start $pv_align --units b
- pvremove "$lvmdev_p"
+ if aux kernel_at_least 2 6 33 ; then
+ # in case the system is running without devtmpfs /dev
+ # wait here for created device node on tmpfs
+ test "$DM_DEV_DIR" != "/dev" && cp -LR "${mddev}p1" "$DM_DEV_DIR"
+
+ pvcreate --metadatasize 128k "${pvdev}p1"
+
+ maj=$(($(stat -L --printf=0x%t "${mddev}p1")))
+ min=$(($(stat -L --printf=0x%T "${mddev}p1")))
+
+ sysfs_alignment_offset="/sys/dev/block/$maj:$min/alignment_offset"
+ [ -f "$sysfs_alignment_offset" ] && \
+ alignment_offset=$(< "$sysfs_alignment_offset") || \
+ alignment_offset=0
+
+ # default alignment is 1M, add alignment_offset
+ pv_align=$((1048576+$alignment_offset))
+ check pv_field "${pvdev}p1" pe_start $pv_align --units b --nosuffix
+
+ pvremove "${pvdev}p1"
+ test "$DM_DEV_DIR" != "/dev" && rm -f "$DM_DEV_DIR/${mddev}p1"
fi
fi
# Test newer topology-aware alignment detection w/ --dataalignment override
-if kernel_at_least 2 6 33 ; then
- cleanup_md
- pvcreate -f "$dev1"
- pvcreate -f "$dev2"
-
- # create 2 disk MD raid0 array (stripe_width=2M)
- test -b "$mddev" && skip
- mdadm --create --metadata=1.0 "$mddev" --auto=md --level 0 --raid-devices=2 --chunk 1024 "$dev1" "$dev2"
- test -b "$mddev" || skip
+if aux kernel_at_least 2 6 33 ; then
+ # make sure we're clean for another test
+ dd if=/dev/zero of="$mddev" bs=512 count=1
+ aux prepare_md_dev 0 1024 2 "$dev1" "$dev2"
+ pvdev=$(< MD_DEV_PV)
# optimal_io_size=2097152, minimum_io_size=1048576
- pv_align="2.00m"
pvcreate --metadatasize 128k \
- --config 'devices { md_chunk_alignment=0 }' "$lvmdev"
- pvscan # Something is seriously broken.
- check pv_field "$lvmdev" pe_start $pv_align
+ --config 'devices { md_chunk_alignment=0 }' "$pvdev"
+ check pv_field "$pvdev" pe_start "2.00m"
# now verify pe_start alignment override using --dataalignment
- pv_align="192.00k"
pvcreate --dataalignment 64k --metadatasize 128k \
- --config 'devices { md_chunk_alignment=0 }' "$lvmdev"
- check pv_field "$lvmdev" pe_start $pv_align
+ --config 'devices { md_chunk_alignment=0 }' "$pvdev"
+ check pv_field "$pvdev" pe_start "192.00k"
fi
diff --git a/test/shell/pvcreate-operation.sh b/test/shell/pvcreate-operation.sh
index e72f2b923..6593877a9 100644
--- a/test/shell/pvcreate-operation.sh
+++ b/test/shell/pvcreate-operation.sh
@@ -10,6 +10,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux lvmconf 'devices/md_component_detection = 1'
aux prepare_devs 4
diff --git a/test/shell/pvcreate-restore.sh b/test/shell/pvcreate-restore.sh
new file mode 100644
index 000000000..733139611
--- /dev/null
+++ b/test/shell/pvcreate-restore.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+. lib/inittest
+
+aux prepare_vg 4
+
+lvcreate --type snapshot -s -L10 -n $lv1 $vg --virtualsize 2T
+lvcreate --type snapshot -s -L10 -n $lv2 $vg --virtualsize 4T
+lvcreate --type snapshot -s -L10 -n $lv3 $vg --virtualsize 4194300M
+
+aux extend_filter_LVMTEST
+
+vgcreate $vg1 "$DM_DEV_DIR/$vg/$lv2"
+
+vgcfgbackup -f vgback $vg1
+
+UUID=$(get pv_field "$DM_DEV_DIR/$vg/$lv2" uuid)
+pvremove -ff -y "$DM_DEV_DIR/$vg/$lv2"
+
+# too small to fit
+fail pvcreate --restorefile vgback --uuid $UUID "$DM_DEV_DIR/$vg/$lv1"
+
+# still does not fit
+fail pvcreate --restorefile vgback --uuid $UUID "$DM_DEV_DIR/$vg/$lv3"
+
+pvcreate --restorefile vgback --uuid $UUID "$DM_DEV_DIR/$vg/$lv2"
+
+vgremove -ff $vg
diff --git a/test/shell/pvcreate-usage.sh b/test/shell/pvcreate-usage.sh
index 48d2cebe9..ed3b4f461 100644
--- a/test/shell/pvcreate-usage.sh
+++ b/test/shell/pvcreate-usage.sh
@@ -14,6 +14,8 @@ PAGESIZE=$(getconf PAGESIZE)
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 4
#COMM 'pvcreate rejects negative setphysicalvolumesize'
@@ -131,18 +133,30 @@ check pv_field "$dev1" pv_mda_count 2
#COMM 'pv with LVM1 compatible data alignment can be convereted'
#compatible == LVM1_PE_ALIGN == 64k
+if test ! -e LOCAL_LVMETAD; then
pvcreate --dataalignment 256k "$dev1"
vgcreate -s 1m $vg "$dev1"
vgconvert -M1 $vg
vgconvert -M2 $vg
check pv_field "$dev1" pe_start 256.00k
vgremove $vg
+fi
#COMM 'pv with LVM1 incompatible data alignment cannot be convereted'
+if test ! -e LOCAL_LVMETAD; then
pvcreate --dataalignment 10k "$dev1"
vgcreate -s 1m $vg "$dev1"
not vgconvert -M1 $vg
vgremove $vg
+fi
+
+#COMM 'vgconvert -M is disallowed with lvmetad'
+if test -e LOCAL_LVMETAD; then
+pvcreate "$dev1"
+vgcreate $vg "$dev1"
+not vgconvert -M1 $vg
+vgremove $vg
+fi
#COMM 'vgcfgrestore allows pe_start=0'
#basically it produces nonsense, but it tests vgcfgrestore,
diff --git a/test/shell/pvmove-abort-all.sh b/test/shell/pvmove-abort-all.sh
new file mode 100644
index 000000000..2867163ea
--- /dev/null
+++ b/test/shell/pvmove-abort-all.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check pvmove --abort behaviour for all VGs and PVs
+
+# Ignore known failure when clvmd is processing sequences of commands for two VGs in parallel - 2015/07/17 agk
+# CLVMD: ioctl/libdm-iface.c:1940 Internal error: Performing unsafe table load while 3 device(s) are known to be suspended: (253:19)
+export DM_ABORT_ON_INTERNAL_ERRORS=0
+
+. lib/inittest
+
+aux prepare_pvs 6 60
+
+vgcreate -s 128k $vg "$dev1" "$dev2"
+pvcreate --metadatacopies 0 "$dev3"
+vgextend $vg "$dev3"
+vgcreate -s 128k $vg1 "$dev4" "$dev5"
+pvcreate --metadatacopies 0 "$dev6"
+vgextend $vg1 "$dev6"
+
+# Slowdown writes
+aux delay_dev "$dev3" 0 800 $(get first_extent_sector "$dev3"):
+aux delay_dev "$dev6" 0 800 $(get first_extent_sector "$dev6"):
+
+for mode in "--atomic" "" ;
+do
+for backgroundarg in "-b" "" ;
+do
+
+# Create multisegment LV
+lvcreate -an -Zn -l30 -n $lv1 $vg "$dev1"
+lvcreate -an -Zn -l30 -n $lv2 $vg "$dev2"
+lvcreate -an -Zn -l30 -n $lv1 $vg1 "$dev4"
+lvextend -l+30 -n $vg1/$lv1 "$dev5"
+
+cmd1=(pvmove -i1 $backgroundarg $mode "$dev1" "$dev3")
+cmd2=(pvmove -i1 $backgroundarg $mode "$dev2" "$dev3")
+cmd3=(pvmove -i1 $backgroundarg $mode -n $vg1/$lv1 "$dev4" "$dev6")
+
+if test -z "$backgroundarg" ; then
+ "${cmd1[@]}" &
+ aux wait_pvmove_lv_ready "$vg-pvmove0"
+ "${cmd2[@]}" &
+ aux wait_pvmove_lv_ready "$vg-pvmove1"
+ "${cmd3[@]}" &
+ aux wait_pvmove_lv_ready "$vg1-pvmove0"
+ lvs -a $vg $vg1
+else
+ LVM_TEST_TAG="kill_me_$PREFIX" "${cmd1[@]}"
+ LVM_TEST_TAG="kill_me_$PREFIX" "${cmd2[@]}"
+ LVM_TEST_TAG="kill_me_$PREFIX" "${cmd3[@]}"
+fi
+
+# test removal of all pvmove LVs
+pvmove --abort
+
+# check if proper pvmove was canceled
+get lv_field $vg name -a | tee out
+not grep "^\[pvmove" out
+get lv_field $vg1 name -a | tee out
+not grep "^\[pvmove" out
+
+lvremove -ff $vg $vg1
+
+wait
+aux kill_tagged_processes
+done
+done
+
+# Restore delayed device back
+aux enable_dev "$dev3" "$dev6"
+
+vgremove -ff $vg $vg1
diff --git a/test/shell/pvmove-abort.sh b/test/shell/pvmove-abort.sh
new file mode 100644
index 000000000..6b807acd4
--- /dev/null
+++ b/test/shell/pvmove-abort.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check pvmove --abort behaviour when specific device is requested
+
+. lib/inittest
+
+aux prepare_pvs 3 60
+
+vgcreate -s 128k $vg "$dev1" "$dev2"
+pvcreate --metadatacopies 0 "$dev3"
+vgextend $vg "$dev3"
+
+# Slowdown read/writes
+aux delay_dev "$dev3" 0 800 $(get first_extent_sector "$dev3"):
+
+for mode in "--atomic" "" ;
+do
+for backgroundarg in "-b" "" ;
+do
+
+# Create multisegment LV
+lvcreate -an -Zn -l30 -n $lv1 $vg "$dev1"
+lvcreate -an -Zn -l30 -n $lv2 $vg "$dev2"
+
+cmd1=(pvmove -i1 $backgroundarg $mode "$dev1" "$dev3")
+cmd2=(pvmove -i1 $backgroundarg $mode "$dev2" "$dev3")
+
+if test -z "$backgroundarg" ; then
+ "${cmd1[@]}" &
+ aux wait_pvmove_lv_ready "$vg-pvmove0"
+ "${cmd2[@]}" &
+ aux wait_pvmove_lv_ready "$vg-pvmove1"
+else
+ LVM_TEST_TAG="kill_me_$PREFIX" "${cmd1[@]}"
+ LVM_TEST_TAG="kill_me_$PREFIX" "${cmd2[@]}"
+fi
+
+# remove specific device
+pvmove --abort "$dev1"
+
+# check if proper pvmove was canceled
+get lv_field $vg name -a | tee out
+not grep "^\[pvmove0\]" out
+grep "^\[pvmove1\]" out
+
+# remove any remaining pvmoves in progress
+pvmove --abort
+
+lvremove -ff $vg
+
+wait
+aux kill_tagged_processes
+done
+done
+
+# Restore delayed device back
+aux enable_dev "$dev3"
+
+vgremove -ff $vg
diff --git a/test/shell/pvmove-all-segtypes.sh b/test/shell/pvmove-all-segtypes.sh
index 457054bc0..ce95059c1 100644
--- a/test/shell/pvmove-all-segtypes.sh
+++ b/test/shell/pvmove-all-segtypes.sh
@@ -58,7 +58,7 @@ lvremove -ff $vg
if test -e LOCAL_CLVMD ; then
#FIXME these tests currently fail end require cmirrord
-echo "TEST WARNING, FIXME!!! pvmove in clustered VG not fully supported!"
+echo "$(should false)FIXME!!! pvmove in clustered VG not fully supported!"
else
# Testing pvmove of mirror LV
diff --git a/test/shell/pvmove-background.sh b/test/shell/pvmove-background.sh
index 590c77b60..7cb21d5b0 100644
--- a/test/shell/pvmove-background.sh
+++ b/test/shell/pvmove-background.sh
@@ -20,10 +20,10 @@ do
lvcreate -aey -l1 -n $lv1 $vg "$dev1"
lvs -o +devices | grep "$dev1"
-pvmove $mode -i 1 -b "$dev1" "$dev2"
+LVM_TEST_TAG="kill_me_$PREFIX" pvmove $mode -i 1 -b "$dev1" "$dev2"
sleep 5 # arbitrary...
lvs -o +devices | not grep "pvmove"
lvs -o +devices | grep "$dev2"
lvremove -ff $vg
-done \ No newline at end of file
+done
diff --git a/test/shell/pvmove-basic.sh b/test/shell/pvmove-basic.sh
index c326dc3d1..42b004d9f 100644
--- a/test/shell/pvmove-basic.sh
+++ b/test/shell/pvmove-basic.sh
@@ -12,6 +12,9 @@
test_description="ensure that pvmove works with basic options"
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
which md5sum || skip
@@ -66,8 +69,8 @@ check_and_cleanup_lvs_() {
check dev_md5sum $vg $lv1
check dev_md5sum $vg $lv2
check dev_md5sum $vg $lv3
- get lv_field $vg name >out
- not grep ^pvmove out
+ get lv_field $vg name -a >out
+ not grep "^\[pvmove" out
vgchange -an $vg
lvremove -ff $vg
(dm_table | not grep $vg) || \
@@ -331,7 +334,7 @@ check_and_cleanup_lvs_
#COMM "pvmove abort"
restore_lvs_
-pvmove $mode -i100 -b "$dev1" "$dev3"
+LVM_TEST_TAG="kill_me_$PREFIX" pvmove $mode -i100 -b "$dev1" "$dev3"
pvmove --abort
check_and_cleanup_lvs_
@@ -355,4 +358,4 @@ else
fi
lvremove -ff $vg
-done \ No newline at end of file
+done
diff --git a/test/shell/pvmove-restart.sh b/test/shell/pvmove-restart.sh
index effa43daf..4f9398338 100644
--- a/test/shell/pvmove-restart.sh
+++ b/test/shell/pvmove-restart.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2013 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2013-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -15,7 +15,13 @@
aux prepare_pvs 3 60
-vgcreate -s 128k $vg "$dev1" "$dev2" "$dev3"
+vgcreate -s 128k $vg "$dev1" "$dev2"
+pvcreate --metadatacopies 0 "$dev3"
+vgextend $vg "$dev3"
+
+# Slowdown writes
+# (FIXME: generates interesting race when not used)
+aux delay_dev "$dev3" 0 800 $(get first_extent_sector "$dev3"):
for mode in "--atomic" ""
do
@@ -26,22 +32,32 @@ lvextend -l+10 $vg/$lv1 "$dev2"
lvextend -l+5 $vg/$lv1 "$dev1"
lvextend -l+10 $vg/$lv1 "$dev2"
-# Slowdown writes
-aux delay_dev "$dev3" 0 100
-
-lvs -o+devices $vg
-
-pvmove -i0 -n $vg/$lv1 "$dev1" "$dev3" &
+pvmove -i10 -n $vg/$lv1 "$dev1" "$dev3" $mode &
PVMOVE=$!
# Let's wait a bit till pvmove starts and kill it
-sleep 1
+aux wait_pvmove_lv_ready "$vg-pvmove0"
kill -9 $PVMOVE
+
+if test -e LOCAL_LVMPOLLD; then
+ aux prepare_lvmpolld
+fi
+
wait
# Simulate reboot - forcibly remove related devices
-dmsetup table
-dmsetup remove $vg-$lv1
-dmsetup remove "$DM_DEV_DIR/mapper/$vg-pvmove0"
+
+# First take down $lv1 then it's pvmove0
+j=0
+for i in $lv1 pvmove0 pvmove0_mimage_0 pvmove0_mimage_1 ; do
+ while dmsetup status "$vg-$i"; do
+ dmsetup remove "$vg-$i" || {
+ j=$(($j + 1))
+ test $j -le 100 || die "Cannot take down devices."
+ sleep .1;
+ }
+ done
+done
+dmsetup table | grep $PREFIX
# Check we really have pvmove volume
check lv_attr_bit type $vg/pvmove0 "p"
@@ -57,35 +73,32 @@ if test -e LOCAL_CLVMD ; then
kill $(< LOCAL_CLVMD)
for i in $(seq 1 100) ; do
test $i -eq 100 && die "Shutdown of clvmd is too slow."
- test -e "$CLVMD_PIDFILE" || break
+ pgrep clvmd || break
sleep .1
done # wait for the pid removal
aux prepare_clvmd
fi
-if test -e LOCAL_LVMETAD ; then
- # Restart lvmetad
- kill $(< LOCAL_LVMETAD)
- aux prepare_lvmetad
-fi
+aux notify_lvmetad "$dev1" "$dev2" "$dev3"
# Only PVs should be left in table...
dmsetup table
# Restart pvmove
# use exclusive activation to have usable pvmove without cmirrord
-vgchange -aey $vg
-#sleep 2
-#pvmove
+LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey $vg
+aux wait_pvmove_lv_ready "$vg-pvmove0"
+dmsetup table
pvmove --abort
-# Restore delayed device back
-aux delay_dev "$dev3"
-
lvs -a -o+devices $vg
lvremove -ff $vg
+aux kill_tagged_processes
done
+# Restore delayed device back
+aux delay_dev "$dev3"
+
vgremove -ff $vg
diff --git a/test/shell/pvmove-resume-1.sh b/test/shell/pvmove-resume-1.sh
new file mode 100644
index 000000000..0070826f7
--- /dev/null
+++ b/test/shell/pvmove-resume-1.sh
@@ -0,0 +1,259 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check whether all available pvmove resume methods works as expected.
+# lvchange is able to resume pvmoves in progress.
+
+# 2 pvmove LVs in 2 VGs (1 per VG)
+
+. lib/inittest
+
+aux prepare_pvs 4 30
+
+vgcreate -s 128k $vg "$dev1"
+vgcreate -s 128k $vg1 "$dev2"
+pvcreate --metadatacopies 0 "$dev3"
+pvcreate --metadatacopies 0 "$dev4"
+vgextend $vg "$dev3"
+vgextend $vg1 "$dev4"
+
+# $1 resume fn
+test_pvmove_resume() {
+ lvcreate -an -Zn -l30 -n $lv1 $vg
+ lvcreate -an -Zn -l30 -n $lv1 $vg1
+
+ aux delay_dev "$dev3" 0 1000 $(get first_extent_sector "$dev3"):
+ aux delay_dev "$dev4" 0 1000 $(get first_extent_sector "$dev4"):
+
+ pvmove -i5 "$dev1" &
+ PVMOVE=$!
+ aux wait_pvmove_lv_ready "$vg-pvmove0" 300
+ kill -9 $PVMOVE
+
+ pvmove -i5 "$dev2" &
+ PVMOVE=$!
+ aux wait_pvmove_lv_ready "$vg1-pvmove0" 300
+ kill -9 $PVMOVE
+
+ if test -e LOCAL_LVMPOLLD ; then
+ aux prepare_lvmpolld
+ fi
+
+ wait
+
+ while dmsetup status "$vg-$lv1"; do dmsetup remove "$vg-$lv1" || true; done
+ while dmsetup status "$vg1-$lv1"; do dmsetup remove "$vg1-$lv1" || true; done
+ while dmsetup status "$vg-pvmove0"; do dmsetup remove "$vg-pvmove0" || true; done
+ while dmsetup status "$vg1-pvmove0"; do dmsetup remove "$vg1-pvmove0" || true; done
+
+ check lv_attr_bit type $vg/pvmove0 "p"
+ check lv_attr_bit type $vg1/pvmove0 "p"
+
+ if test -e LOCAL_CLVMD ; then
+ # giveup all clvmd locks (faster then restarting clvmd)
+ # no deactivation happen, nodes are already removed
+ #vgchange -an $vg
+ # FIXME: However above solution has one big problem
+ # as clvmd starts to abort on internal errors on various
+ # errors, based on the fact pvmove is killed -9
+ # Restart clvmd
+ kill $(< LOCAL_CLVMD)
+ for i in $(seq 1 100) ; do
+ test $i -eq 100 && die "Shutdown of clvmd is too slow."
+ test -e "$CLVMD_PIDFILE" || break
+ sleep .1
+ done # wait for the pid removal
+ aux prepare_clvmd
+ fi
+
+ aux notify_lvmetad "$dev1" "$dev2" "$dev3" "$dev4"
+
+ # call resume function (see below)
+ # with expected number of spawned
+ # bg polling as parameter
+ $1 2
+
+ aux enable_dev "$dev3"
+ aux enable_dev "$dev4"
+
+ i=0
+ while get lv_field $vg name -a | grep "^\[pvmove"; do
+ # wait for 30 secs at max
+ test $i -ge 300 && die "Pvmove is too slow or does not progress."
+ sleep .1
+ i=$((i + 1))
+ done
+ while get lv_field $vg1 name -a | grep "^\[pvmove"; do
+ # wait for 30 secs at max
+ test $i -ge 300 && die "Pvmove is too slow or does not progress."
+ sleep .1
+ i=$((i + 1))
+ done
+
+ aux kill_tagged_processes
+
+ lvremove -ff $vg $vg1
+}
+
+lvchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg1/$lv1
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+lvchange_all() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1 $vg1/$lv1
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+vgchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange -aey $vg
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange -aey $vg1
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+vgchange_all() {
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange -aey $vg $vg1
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+pvmove_fg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg $vg1
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg1 name -a | grep "^\[pvmove0\]"
+
+ # disable delay device
+ # fg pvmove would take ages to complete otherwise
+ aux enable_dev "$dev3"
+ aux enable_dev "$dev4"
+
+ pvmove
+}
+
+pvmove_bg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg $vg1
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg1 name -a | grep "^\[pvmove0\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b -i0
+}
+
+pvmove_fg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg1 name -a | grep "^\[pvmove0\]"
+
+ # disable delay device
+ # fg pvmove would take ages to complete otherwise
+ aux enable_dev "$dev3"
+ aux enable_dev "$dev4"
+
+ pvmove "$dev1"
+ pvmove "$dev2"
+}
+
+pvmove_bg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg1/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg1 name -a | grep "^\[pvmove0\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b "$dev1"
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b "$dev2"
+}
+
+test -e LOCAL_CLVMD && skip
+
+test_pvmove_resume lvchange_single
+test_pvmove_resume lvchange_all
+test_pvmove_resume vgchange_single
+test_pvmove_resume vgchange_all
+test_pvmove_resume pvmove_fg
+test_pvmove_resume pvmove_fg_single
+test_pvmove_resume pvmove_bg
+test_pvmove_resume pvmove_bg_single
+
+vgremove -ff $vg $vg1
diff --git a/test/shell/pvmove-resume-2.sh b/test/shell/pvmove-resume-2.sh
new file mode 100644
index 000000000..7ae3721a2
--- /dev/null
+++ b/test/shell/pvmove-resume-2.sh
@@ -0,0 +1,202 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check whether all available pvmove resume methods works as expected.
+# lvchange is able to resume pvmoves in progress.
+
+# Moving 2 LVs in VG variant
+
+. lib/inittest
+
+aux prepare_pvs 2 30
+
+vgcreate -s 128k $vg "$dev1"
+pvcreate --metadatacopies 0 "$dev2"
+vgextend $vg "$dev2"
+
+test_pvmove_resume() {
+ # 2 LVs on same device
+ lvcreate -an -Zn -l15 -n $lv1 $vg "$dev1"
+ lvcreate -an -Zn -l15 -n $lv2 $vg "$dev1"
+
+ aux delay_dev "$dev2" 0 1000 $(get first_extent_sector "$dev2"):
+
+ pvmove -i5 "$dev1" &
+ PVMOVE=$!
+ aux wait_pvmove_lv_ready "$vg-pvmove0" 300
+ kill -9 $PVMOVE
+
+ if test -e LOCAL_LVMPOLLD ; then
+ aux prepare_lvmpolld
+ fi
+
+ wait
+
+ while dmsetup status "$vg-$lv1"; do dmsetup remove "$vg-$lv1" || true; done
+ while dmsetup status "$vg-$lv2"; do dmsetup remove "$vg-$lv2" || true; done
+ while dmsetup status "$vg-pvmove0"; do dmsetup remove "$vg-pvmove0" || true; done
+
+ check lv_attr_bit type $vg/pvmove0 "p"
+
+ if test -e LOCAL_CLVMD ; then
+ # giveup all clvmd locks (faster then restarting clvmd)
+ # no deactivation happen, nodes are already removed
+ #vgchange -an $vg
+ # FIXME: However above solution has one big problem
+ # as clvmd starts to abort on internal errors on various
+ # errors, based on the fact pvmove is killed -9
+ # Restart clvmd
+ kill $(< LOCAL_CLVMD)
+ for i in $(seq 1 100) ; do
+ test $i -eq 100 && die "Shutdown of clvmd is too slow."
+ test -e "$CLVMD_PIDFILE" || break
+ sleep .1
+ done # wait for the pid removal
+ aux prepare_clvmd
+ fi
+
+ aux notify_lvmetad "$dev1" "$dev2"
+
+ # call resume function (see below)
+ # with expected number of spawned
+ # bg polling as parameter
+ $1 1
+
+ aux enable_dev "$dev2"
+
+ i=0
+ while get lv_field $vg name -a | grep "^\[pvmove"; do
+ # wait for 30 secs at max
+ test $i -ge 300 && die "Pvmove is too slow or does not progress."
+ sleep .1
+ i=$((i + 1))
+ done
+
+ aux kill_tagged_processes
+
+ lvremove -ff $vg
+}
+
+lvchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv2
+}
+
+lvchange_all() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1 $vg/$lv2
+
+ # we don't want to spawn more than $1 background pollings
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0" || should false
+ else
+ test $(aux count_processes_with_tag) -eq $1 || should false
+ fi
+}
+
+vgchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange -aey $vg
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+pvmove_fg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+
+ aux enable_dev "$dev2"
+
+ pvmove
+}
+
+pvmove_bg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b
+}
+
+pvmove_fg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+
+ aux enable_dev "$dev2"
+
+ pvmove "$dev1"
+}
+
+pvmove_bg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b "$dev1"
+}
+
+test -e LOCAL_CLVMD && skip
+
+test_pvmove_resume lvchange_single
+test_pvmove_resume lvchange_all
+test_pvmove_resume vgchange_single
+test_pvmove_resume pvmove_fg
+test_pvmove_resume pvmove_fg_single
+test_pvmove_resume pvmove_bg
+test_pvmove_resume pvmove_bg_single
+
+vgremove -ff $vg
diff --git a/test/shell/pvmove-resume-multiseg.sh b/test/shell/pvmove-resume-multiseg.sh
new file mode 100644
index 000000000..713534f76
--- /dev/null
+++ b/test/shell/pvmove-resume-multiseg.sh
@@ -0,0 +1,232 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check whether all available pvmove resume methods works as expected.
+# lvchange is able to resume pvmoves in progress.
+
+# Multisegment variant w/ 2 pvmoves LVs per VG
+
+. lib/inittest
+
+aux prepare_pvs 5 30
+
+vgcreate -s 128k $vg "$dev1" "$dev2" "$dev3"
+pvcreate --metadatacopies 0 "$dev4" "$dev5"
+vgextend $vg "$dev4" "$dev5"
+
+# $1 resume fn
+test_pvmove_resume() {
+ # Create multisegment LV
+ lvcreate -an -Zn -l30 -n $lv1 $vg "$dev1"
+ lvextend -l+30 $vg/$lv1 "$dev2"
+ # next LV on same VG and differetnt PV (we want to test 2 pvmoves per VG)
+ lvcreate -an -Zn -l30 -n $lv2 $vg "$dev3"
+
+ aux delay_dev "$dev4" 0 250
+ aux delay_dev "$dev5" 0 250
+
+ pvmove -i5 "$dev1" "$dev4" &
+ PVMOVE=$!
+ aux wait_pvmove_lv_ready "$vg-pvmove0" 300
+ kill -9 $PVMOVE
+
+ pvmove -i5 -n $vg/$lv2 "$dev3" "$dev5" &
+ PVMOVE=$!
+ aux wait_pvmove_lv_ready "$vg-pvmove1" 300
+ kill -9 $PVMOVE
+
+ if test -e LOCAL_LVMPOLLD ; then
+ aux prepare_lvmpolld
+ fi
+
+ wait
+
+ while dmsetup status "$vg-$lv1"; do dmsetup remove "$vg-$lv1" || true; done
+ while dmsetup status "$vg-$lv2"; do dmsetup remove "$vg-$lv2" || true; done
+ while dmsetup status "$vg-pvmove0"; do dmsetup remove "$vg-pvmove0" || true; done
+ while dmsetup status "$vg-pvmove1"; do dmsetup remove "$vg-pvmove1" || true; done
+
+ check lv_attr_bit type $vg/pvmove0 "p"
+ check lv_attr_bit type $vg/pvmove1 "p"
+
+ if test -e LOCAL_CLVMD ; then
+ # giveup all clvmd locks (faster then restarting clvmd)
+ # no deactivation happen, nodes are already removed
+ #vgchange -an $vg
+ # FIXME: However above solution has one big problem
+ # as clvmd starts to abort on internal errors on various
+ # errors, based on the fact pvmove is killed -9
+ # Restart clvmd
+ kill $(< LOCAL_CLVMD)
+ for i in $(seq 1 100) ; do
+ test $i -eq 100 && die "Shutdown of clvmd is too slow."
+ test -e "$CLVMD_PIDFILE" || break
+ sleep .1
+ done # wait for the pid removal
+ aux prepare_clvmd
+ fi
+
+ aux notify_lvmetad "$dev1" "$dev2" "$dev3" "$dev4" "$dev5"
+
+ # call resume function (see below)
+ # with expected number of spawned
+ # bg polling as parameter
+ $1 2
+
+ aux enable_dev "$dev4"
+ aux enable_dev "$dev5"
+
+ i=0
+ while get lv_field $vg name -a | grep "^\[pvmove"; do
+ # wait for 30 secs at max
+ test $i -ge 300 && die "Pvmove is too slow or does not progress."
+ sleep .1
+ i=$((i + 1))
+ done
+
+ aux kill_tagged_processes
+
+ lvremove -ff $vg
+}
+
+lvchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv2
+}
+
+lvchange_all() {
+ LVM_TEST_TAG="kill_me_$PREFIX" lvchange -aey $vg/$lv1 $vg/$lv2
+
+ # we don't want to spawn more than $1 background pollings
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+vgchange_single() {
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange -aey $vg
+
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 1 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq $1
+ fi
+}
+
+pvmove_fg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg name -a | grep "^\[pvmove1\]"
+
+ # disable delay device
+ # fg pvmove would take ages to complete otherwise
+ aux enable_dev "$dev4"
+ aux enable_dev "$dev5"
+
+ pvmove
+}
+
+pvmove_bg() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg name -a | grep "^\[pvmove1\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b
+}
+
+pvmove_fg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg name -a | grep "^\[pvmove1\]"
+
+ # disable delay device
+ # fg pvmove would take ages to complete otherwise
+ aux enable_dev "$dev4"
+ aux enable_dev "$dev5"
+
+ pvmove "$dev1"
+ pvmove "$dev3"
+}
+
+pvmove_bg_single() {
+ # pvmove resume requires LVs active...
+ LVM_TEST_TAG="kill_me_$PREFIX" vgchange --config 'activation{polling_interval=10}' -aey --poll n $vg
+
+ # ...also vgchange --poll n must not spawn any bg processes...
+ if test -e LOCAL_LVMPOLLD; then
+ aux lvmpolld_dump | tee lvmpolld_dump.txt
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove0"
+ aux check_lvmpolld_init_rq_count 0 "$vg/pvmove1"
+ else
+ test $(aux count_processes_with_tag) -eq 0
+ fi
+
+ # ...thus finish polling
+ get lv_field $vg name -a | grep "^\[pvmove0\]"
+ get lv_field $vg name -a | grep "^\[pvmove1\]"
+
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b "$dev1"
+ LVM_TEST_TAG="kill_me_$PREFIX" pvmove -b "$dev3"
+}
+
+test -e LOCAL_CLVMD && skip
+
+test_pvmove_resume lvchange_single
+test_pvmove_resume lvchange_all
+test_pvmove_resume vgchange_single
+test_pvmove_resume pvmove_fg
+test_pvmove_resume pvmove_fg_single
+test_pvmove_resume pvmove_bg
+test_pvmove_resume pvmove_bg_single
+
+vgremove -ff $vg
diff --git a/test/shell/pvremove-thin.sh b/test/shell/pvremove-thin.sh
index 5bfb880a5..2400ac5ed 100644
--- a/test/shell/pvremove-thin.sh
+++ b/test/shell/pvremove-thin.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg
aux have_thin 1 8 0 || skip
diff --git a/test/shell/pvremove-usage.sh b/test/shell/pvremove-usage.sh
index 52d26a648..5c55839d8 100644
--- a/test/shell/pvremove-usage.sh
+++ b/test/shell/pvremove-usage.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
pvcreate "$dev1"
pvcreate --metadatacopies 0 "$dev2"
diff --git a/test/shell/pvremove-warnings.sh b/test/shell/pvremove-warnings.sh
index f9d273729..f7ac95b5f 100644
--- a/test/shell/pvremove-warnings.sh
+++ b/test/shell/pvremove-warnings.sh
@@ -10,7 +10,14 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate "$dev1" "$dev2"
pvremove "$dev1" "$dev2" 2>&1 | tee pvremove.txt
not grep "No physical" pvremove.txt
+
+pvcreate "$dev1" "$dev2"
+vgcreate bla "$dev1" "$dev2"
+pvremove -ff -y "$dev1" "$dev2" 2>&1 | tee pvremove.txt
+not grep "device missing" pvremove.txt
diff --git a/test/shell/read-ahead.sh b/test/shell/read-ahead.sh
index 764bef132..322a527db 100644
--- a/test/shell/read-ahead.sh
+++ b/test/shell/read-ahead.sh
@@ -17,6 +17,8 @@ test_description='Test read-ahead functionality'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 5
#COMM "test various read ahead settings (bz450922)"
diff --git a/test/shell/sanlock-hello-world.sh b/test/shell/sanlock-hello-world.sh
new file mode 100644
index 000000000..23cdc3434
--- /dev/null
+++ b/test/shell/sanlock-hello-world.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Hello world for vgcreate with lvmlockd and sanlock'
+
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_SANLOCK" ] && skip;
+
+aux prepare_pvs 1
+
+vgcreate $SHARED $vg "$dev1"
+
+vgs -o+locktype,lockargs $vg
+
+check vg_field $vg vg_locktype sanlock
+
+vgremove $vg
+
diff --git a/test/shell/sanlock-prepare.sh b/test/shell/sanlock-prepare.sh
new file mode 100644
index 000000000..401d6d7de
--- /dev/null
+++ b/test/shell/sanlock-prepare.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Set up things to run tests with sanlock'
+
+. lib/utils
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_SANLOCK" ] && skip;
+
+SANLOCK_CONF="/etc/sysconfig/sanlock"
+create_sanlock_conf() {
+ if test -a $SANLOCK_CONF; then
+ if ! grep "created by lvm test suite" $SANLOCK_CONF; then
+ rm $SANLOCK_CONF
+ else
+ mv $SANLOCK_CONF $SANLOCK_CONF.prelvmtest
+ fi
+ fi
+
+ cp lib/test-sanlock-conf $SANLOCK_CONF
+ echo "created new $SANLOCK_CONF"
+}
+
+prepare_lvmlockd_sanlock() {
+ if pgrep lvmlockd ; then
+ echo "Cannot run while existing lvmlockd process exists"
+ exit 1
+ fi
+
+ if pgrep sanlock ; then
+ echo "Cannot run while existing sanlock process exists"
+ exit 1
+ fi
+
+ create_sanlock_conf
+
+ # FIXME: use 'systemctl start sanlock' once we can pass options
+ sanlock daemon -U sanlock -G sanlock -w 0 -e testhostname
+ sleep 1
+ if ! pgrep sanlock; then
+ echo "Failed to start sanlock"
+ exit 1
+ fi
+
+ # FIXME: use 'systemctl start lvm2-lvmlockd' once we can pass -o 2
+ lvmlockd -o 2
+ sleep 1
+ if ! pgrep lvmlockd; then
+ echo "Failed to start lvmlockd"
+ exit 1
+ fi
+}
+
+# Create a device and a VG that are both outside the scope of
+# the standard lvm test suite so that they will not be removed
+# and will remain in place while all the tests are run.
+#
+# Use this VG to hold the sanlock global lock which will be used
+# by lvmlockd during other tests.
+#
+# This script will be run before any standard tests are run.
+# After all the tests are run, another script will be run
+# to remove this VG and device.
+
+GL_DEV="/dev/mapper/GL_DEV"
+GL_FILE="$PWD/gl_file.img"
+rm -f "$GL_FILE"
+dd if=/dev/zero of="$GL_FILE" bs=$((1024*1024)) count=1024 2> /dev/null
+GL_LOOP=$(losetup -f "$GL_FILE" --show)
+echo "0 `blockdev --getsize $GL_LOOP` linear $GL_LOOP 0" | dmsetup create GL_DEV
+
+prepare_lvmlockd_sanlock
+
+vgcreate --config 'devices { global_filter=["a|GL_DEV|", "r|.*|"] filter=["a|GL_DEV|", "r|.*|"]}' --lock-type sanlock glvg $GL_DEV
+
+vgs --config 'devices { global_filter=["a|GL_DEV|", "r|.*|"] filter=["a|GL_DEV|", "r|.*|"]}' -o+locktype,lockargs glvg
+
diff --git a/test/shell/sanlock-remove.sh b/test/shell/sanlock-remove.sh
new file mode 100644
index 000000000..47487d68e
--- /dev/null
+++ b/test/shell/sanlock-remove.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Copyright (C) 2008-2012 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Remove the sanlock test setup'
+
+. lib/inittest
+
+[ -z "$LVM_TEST_LOCK_TYPE_SANLOCK" ] && skip;
+
+# Removes the VG with the global lock that was created by
+# the corresponding create script.
+
+vgremove --config 'devices { global_filter=["a|GL_DEV|", "r|.*|"] filter=["a|GL_DEV|", "r|.*|"]}' glvg
+
+
+killall lvmlockd
+killall sanlock
+
+dmsetup remove GL_DEV
+# dmsetup remove glvg-lvmlock
diff --git a/test/shell/report-select.sh b/test/shell/select-report.sh
index 70e536643..cd3f45238 100644
--- a/test/shell/report-select.sh
+++ b/test/shell/select-report.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 6 16
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -38,22 +40,24 @@ lvcreate -L4m -s "$vg3/orig" -n "snap"
OUT_LOG_FILE="out"
ERR_LOG_FILE="err"
-function result()
-{
+sel() {
local items_found
- test -f $OUT_LOG_FILE || {
+ ${1}s --noheadings -o ${1}_name --select "$2" 2>"$ERR_LOG_FILE" | tee "$OUT_LOG_FILE"
+ shift 2
+
+ test -f "$OUT_LOG_FILE" || {
echo " >>> Missing log file to check!"
return 1
}
# there shouldn't be any selection syntax error
- grep "Selection syntax error at" $ERR_LOG_FILE > /dev/null && {
+ grep "Selection syntax error at" "$ERR_LOG_FILE" >/dev/null && {
echo " >>> Selection syntax error hit!"
return 1
}
- items_found=$(wc -l $OUT_LOG_FILE | cut -f 1 -d " ")
+ items_found=$(wc -l "$OUT_LOG_FILE" | cut -f 1 -d ' ')
# the number of lines on output must match
test $items_found -eq $# || {
@@ -65,87 +69,71 @@ function result()
# the names selected must be correct
# each pv, vg and lv name is unique so just check
# the presence of the names given as arg
- for name in $1; do
- grep $name $OUT_LOG_FILE > /dev/null || {
+ for name in "$@" ; do
+ grep "$name" "$OUT_LOG_FILE" >/dev/null || {
echo " >>> $name not found in the output log"
return 1
}
done
- rm -f $OUT_LOG_FILE
- rm -f $ERR_LOG_FILE
-}
-
-function pvs_sel()
-{
- pvs 1>$OUT_LOG_FILE 2>$ERR_LOG_FILE --noheadings -o pv_name --select "$1" && result $2
-}
-
-function vgs_sel()
-{
- vgs &>$OUT_LOG_FILE 2>$ERR_LOG_FILE --noheadings -o vg_name --select "$1" && result $2
-}
-
-function lvs_sel()
-{
- lvs &>$OUT_LOG_FILE 2>$ERR_LOG_FILE --noheadings -o lv_name --select "$1" && result $2
+ rm -f "$OUT_LOG_FILE" "$ERR_LOG_FILE"
}
##########################
# STRING FIELD SELECTION #
##########################
#$LVS 'lv_name="vol1"' && result vol1
-lvs_sel 'lv_name="vol1"' "vol1"
+sel lv 'lv_name="vol1"' vol1
#$LVS 'lv_name!="vol1"' && result vol2 abc xyz
-lvs_sel 'lv_name!="vol1"' "vol2 abc xyz orig snap"
+sel lv 'lv_name!="vol1"' vol2 abc xyz orig snap
# check string values are accepted without quotes too
-lvs_sel 'lv_name=vol1' "vol1"
+sel lv 'lv_name=vol1' vol1
# check single quotes are also accepted instead of double quotes
-lvs_sel "lv_name='vol1'" "vol1"
+sel lv "lv_name='vol1'" vol1
###############################
# STRING LIST FIELD SELECTION #
###############################
-pvs_sel 'tags=["pv_tag1"]' ""
+sel pv 'tags=["pv_tag1"]'
# for one item, no need to use []
-pvs_sel 'tags="pv_tag1"' "$dev1 $dev2"
+sel pv 'tags="pv_tag1"' "$dev1" "$dev6"
# no match
-pvs_sel 'tags=["pv_tag1" && "pv_tag2"]'
-pvs_sel 'tags=["pv_tag1" && "pv_tag2" && "pv_tag3"]' "$dev1"
+sel pv 'tags=["pv_tag1" && "pv_tag2"]'
+sel pv 'tags=["pv_tag1" && "pv_tag2" && "pv_tag3"]' "$dev1"
# check the order has no effect on selection result
-pvs_sel 'tags=["pv_tag3" && "pv_tag2" && "pv_tag1"]' "$dev1"
-pvs_sel 'tags=["pv_tag4" || "pv_tag3"]' "$dev1 $dev6"
-pvs_sel 'tags!=["pv_tag1"]' "$dev1 $dev2 $dev3 $dev4 $dev5 $dev6"
+sel pv 'tags=["pv_tag3" && "pv_tag2" && "pv_tag1"]' "$dev1"
+sel pv 'tags=["pv_tag4" || "pv_tag3"]' "$dev1" "$dev6"
+sel pv 'tags!=["pv_tag1"]' "$dev1" "$dev2" "$dev3" "$dev4" "$dev5" "$dev6"
# check mixture of && and || - this is not allowed
-not pvs_sel 'tags=["pv_tag1" && "pv_tag2" || "pv_tag3"]'
+not sel pv 'tags=["pv_tag1" && "pv_tag2" || "pv_tag3"]'
##########################
# NUMBER FIELD SELECTION #
##########################
-vgs_sel 'pv_count=3' "$vg1"
-vgs_sel 'pv_count!=3' "$vg3 $vg2"
-vgs_sel 'pv_count<2' "$vg3"
-vgs_sel 'pv_count<=2' "$vg3 $vg2"
-vgs_sel 'pv_count>2' "$vg1"
-vgs_sel 'pv_count>=2' "$vg1 $vg2"
+sel vg 'pv_count=3' $vg1
+sel vg 'pv_count!=3' $vg3 $vg2
+sel vg 'pv_count<2' $vg3
+sel vg 'pv_count<=2' $vg3 $vg2
+sel vg 'pv_count>2' $vg1
+sel vg 'pv_count>=2' $vg1 $vg2
########################
# SIZE FIELD SELECTION #
########################
# check size units are accepted as well as floating point numbers for sizes
-lvs_sel 'size=8388608b' "vol1"
-lvs_sel 'size=8192k' "vol1"
-lvs_sel 'size=8m' "vol1"
-lvs_sel 'size=8.00m' "vol1"
-lvs_sel 'size=0.0078125g' "vol1"
-lvs_sel 'size=0.00000762939453125t' "vol1"
-lvs_sel 'size=0.000000007450580596923828125p' "vol1"
-lvs_sel 'size=0.0000000000072759576141834259033203125e' "vol1"
-
-lvs_sel 'size>8m' "abc"
-lvs_sel 'size>=8m' "abc vol1"
-lvs_sel 'size<8m' "vol2 xyz orig snap"
-lvs_sel 'size<=8m' "vol2 xyz vol1 orig snap"
+sel lv 'size=8388608b' vol1
+sel lv 'size=8192k' vol1
+sel lv 'size=8m' vol1
+sel lv 'size=8.00m' vol1
+sel lv 'size=0.0078125g' vol1
+sel lv 'size=0.00000762939453125t' vol1
+sel lv 'size=0.000000007450580596923828125p' vol1
+sel lv 'size=0.0000000000072759576141834259033203125e' vol1
+
+sel lv 'size>8m' abc
+sel lv 'size>=8m' abc vol1
+sel lv 'size<8m' vol2 xyz orig snap
+sel lv 'size<=8m' vol2 xyz vol1 orig snap
###########################
# PERCENT FIELD SELECTION #
@@ -153,69 +141,71 @@ lvs_sel 'size<=8m' "vol2 xyz vol1 orig snap"
if aux target_at_least dm-snapshot 1 10 0; then
# Test zero percent only if snapshot can be zero.
# Before 1.10.0, the snap percent included metadata size.
- lvs_sel 'snap_percent=0' "snap"
+ sel lv 'snap_percent=0' snap
fi
-dd if=/dev/zero of=$DM_DEV_DIR/$vg3/snap bs=1M count=1
-lvs_sel 'snap_percent<50' "snap"
-lvs_sel 'snap_percent>50'
-dd if=/dev/zero of=$DM_DEV_DIR/$vg3/snap bs=1M count=4
-lvs_sel 'snap_percent=100' "snap"
+dd if=/dev/zero of="$DM_DEV_DIR/$vg3/snap" bs=1M count=1
+sel lv 'snap_percent<50' snap
+sel lv 'snap_percent>50'
+dd if=/dev/zero of="$DM_DEV_DIR/$vg3/snap" bs=1M count=4
+sel lv 'snap_percent=100' snap
# % char is accepted as suffix for percent values
-lvs_sel 'snap_percent=100%' "snap"
+sel lv 'snap_percent=100%' snap
# percent values over 100% are not accepted
-not lvs_sel 'snap_percent=101%'
+not sel lv 'snap_percent=101%'
#########################
# REGEX FIELD SELECTION #
#########################
-lvs_sel 'lv_name=~"^vol[12]"' "vol1 vol2"
-lvs_sel 'lv_name!~"^vol[12]"' "abc xyz orig snap"
+sel lv 'lv_name=~"^vol[12]"' vol1 vol2
+sel lv 'lv_name!~"^vol[12]"' abc xyz orig snap
# check regex is accepted without quotes too
-lvs_sel 'lv_name=~^vol[12]' "vol1 vol2"
+sel lv 'lv_name=~^vol[12]' vol1 vol2
###########
# GENERIC #
###########
# check prefix works for selection too
-lvs_sel 'lv_name="vol1"' "vol1"
-lvs_sel 'name="vol1"' "vol1"
+sel lv 'lv_name="vol1"' vol1
+sel lv 'name="vol1"' vol1
# check reserved values are accepted for certain fields as well as usual values
-vgs_sel 'vg_mda_copies=unmanaged' "$vg2 $vg3"
-vgs_sel 'vg_mda_copies=2' "$vg1"
+sel vg 'vg_mda_copies=unmanaged' $vg2 $vg3
+sel vg 'vg_mda_copies=2' $vg1
# also, we must match only vg1, not including vg2 and vg3
# when comparing ranges - unamanged is mapped onto 2^64 - 1 internally,
# so we need to skip this internal value if it matches with selection criteria!
-vgs_sel 'vg_mda_copies>=2' "$vg1"
-not vgs_sel 'vg_mda_copies=18446744073709551615'
+sel vg 'vg_mda_copies>=2' $vg1
+not sel vg 'vg_mda_copies=18446744073709551615'
-lvs_sel 'lv_read_ahead=auto' "vol1 vol2 orig snap"
-lvs_sel 'lv_read_ahead=256k' "abc xyz"
+sel lv 'lv_read_ahead=auto' vol1 vol2 orig snap
+sel lv 'lv_read_ahead=256k' abc xyz
-lvs_sel 'lv_minor=-1' "vol1 vol2 abc orig snap"
-lvs_sel 'lv_minor=undefined' "vol1 vol2 abc orig snap"
-lvs_sel 'lv_minor=undef' "vol1 vol2 abc orig snap"
-lvs_sel 'lv_minor=unknown' "vol1 vol2 abc orig snap"
-lvs_sel 'lv_minor=254' "xyz"
+sel lv 'lv_minor=-1' vol1 vol2 abc orig snap
+sel lv 'lv_minor=undefined' vol1 vol2 abc orig snap
+sel lv 'lv_minor=undef' vol1 vol2 abc orig snap
+sel lv 'lv_minor=unknown' vol1 vol2 abc orig snap
+sel lv 'lv_minor=254' xyz
+# also test synonym for string field type
+sel lv 'seg_monitor=undefined' vol1 vol2 abc abc orig snap xyz
# if size unit not spefied, the 'm' (MiB) unit is used by default
-lvs_sel 'lv_size=8' "vol1"
+sel lv 'lv_size=8' vol1
# no need to use quotes for the whole selection string if it does not clash with shell
-lvs_sel name=vol1 vol1
+sel lv name=vol1 vol1
##########################################
# FORMING MORE COMPLEX SELECTION CLAUSES #
##########################################
# AND clause
-lvs_sel 'lv_tags=lv_tag1 && lv_size=4m' "vol2"
+sel lv 'lv_tags=lv_tag1 && lv_size=4m' vol2
# OR clause
-lvs_sel 'lv_name=vol1 || lv_name=vol2' "vol1 vol2"
+sel lv 'lv_name=vol1 || lv_name=vol2' vol1 vol2
# grouping by using ( )
-lvs_sel '(lv_name=vol1 || lv_name=vol2) || vg_tags=vg_tag1' "vol1 vol2 abc orig snap"
-lvs_sel '(lv_name=vol1 && lv_size=100m) || vg_tags=vg_tag1' "xyz orig snap"
-lvs_sel '(lv_name=vol1 || lv_name=vol2) && vg_tags=vg_tag1'
-lvs_sel '(lv_name=vol1 || lv_name=vol2) && lv_size < 8m' "vol2"
-lvs_sel '(lv_name=vol1 && lv_size=8m) && vg_tags=vg_tag2' "vol1"
+sel lv '(lv_name=vol1 || lv_name=vol2) || vg_tags=vg_tag1' vol1 vol2 orig snap xyz
+sel lv '(lv_name=vol1 && lv_size=100m) || vg_tags=vg_tag1' xyz orig snap
+sel lv '(lv_name=vol1 || lv_name=vol2) && vg_tags=vg_tag1'
+sel lv '(lv_name=vol1 || lv_name=vol2) && lv_size < 8m' vol2
+sel lv '(lv_name=vol1 && lv_size=8m) && vg_tags=vg_tag2' vol1
# negation of clause grouped by ( )
-lvs_sel '!(lv_name=vol1 || lv_name=vol2)' "abc xyz orig snap"
+sel lv '!(lv_name=vol1 || lv_name=vol2)' abc xyz orig snap
diff --git a/test/shell/select-tools-thin.sh b/test/shell/select-tools-thin.sh
new file mode 100644
index 000000000..4cabde1ed
--- /dev/null
+++ b/test/shell/select-tools-thin.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux have_thin 1 0 0 || skip
+
+aux prepare_pvs 1 16
+
+#########################
+# special cases to test #
+#########################
+
+# if calling lvremove and an LV is removed that is related to other LV
+# and we're doing selection based on this relation, check if we're
+# selecting on initial state (here, thin origin LV thin_orig is removed
+# first, but thin snap should be still selectable based on origin=thin_orig
+# condition even though thin_orig has just been removed)
+vgcreate -s 4m $vg1 "$dev1"
+lvcreate -l100%FREE -T $vg1/pool
+lvcreate -V4m -T $vg1/pool -n thin_orig
+lvcreate -s $vg1/thin_orig -n thin_snap
+lvremove -ff -S 'lv_name=thin_orig || origin=thin_orig' > out
+grep "Logical volume \"thin_orig\" successfully removed" out
+grep "Logical volume \"thin_snap\" successfully removed" out
+not lvs $vg1/thin_orig
+not lvs $vg1/thin_snap
+
+vgremove -ff $vg1
diff --git a/test/shell/select-tools.sh b/test/shell/select-tools.sh
new file mode 100644
index 000000000..631c6379c
--- /dev/null
+++ b/test/shell/select-tools.sh
@@ -0,0 +1,278 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux prepare_pvs 4 12
+
+vgcreate -s 4m $vg1 "$dev1" "$dev2"
+vgcreate -s 4m $vg2 "$dev3" "$dev4"
+
+# vg1/lv1 mapped onto dev1
+lvcreate -l1 -n "lv1" $vg1 "$dev1"
+
+# vg1/lv2 mapped onto dev1 and dev2 (2 segments)
+lvcreate -l3 -n "lv2" $vg1 "$dev1" "$dev2"
+
+# vg2/lv3 mapped onto dev3
+lvcreate -l1 -n "lv3" $vg2 "$dev3"
+
+# vg2/lv4 mapped onto dev3
+lvcreate -l1 -n "lv4" $vg2 "$dev3" "$dev4"
+
+# vg2/lv1 mapped onto "$dev4" (same LV name as vg1/lv1)
+lvcreate -l1 -n "lv1" $vg2 "$dev4"
+
+###########################################
+# exercise process_each_vg with selection #
+###########################################
+
+# select contains VGS field
+# direct vg name match
+vgchange --addtag 101 -S "vg_name=$vg1"
+check vg_field $vg1 vg_tags 101
+not check vg_field $vg2 vg_tags 101
+vgchange --deltag 101
+
+# select contains LVS fiels
+vgchange --addtag 102 -S "lv_name=lv2"
+check vg_field $vg1 vg_tags 102
+not check vg_field $vg2 vg_tags 102
+vgchange --deltag 102
+vgchange --addtag 103 -S "lv_name=lv1"
+check vg_field $vg1 vg_tags 103
+check vg_field $vg2 vg_tags 103
+vgchange --deltag 103
+
+# select contains SEGS field
+vgchange --addtag 104 -S 'seg_start=8m'
+check vg_field $vg1 vg_tags 104
+not check vg_field $vg2 vg_tags 104
+vgchange --deltag 104
+vgchange --addtag 105 -S "seg_start=0m"
+check vg_field $vg1 vg_tags 105
+check vg_field $vg2 vg_tags 105
+vgchange --deltag 105
+
+# select contains PVS field
+vgchange --addtag 106 -S "pv_name="$dev1""
+check vg_field $vg1 vg_tags 106
+not check vg_field $vg2 vg_tags 106
+vgchange --deltag 106
+vgchange --addtag 107 -S "pv_size>0m"
+check vg_field $vg1 vg_tags 107
+check vg_field $vg2 vg_tags 107
+vgchange --deltag 107
+
+# select contains PVSEGS field
+vgchange --addtag 108 -S "pvseg_size=2"
+check vg_field $vg1 vg_tags 108
+not check vg_field $vg2 vg_tags 108
+vgchange --deltag 108
+vgchange --addtag 109 -S "pvseg_size=1"
+check vg_field $vg1 vg_tags 109
+check vg_field $vg2 vg_tags 109
+vgchange --deltag 109
+
+# if VG name or tag is supplied together with the
+# selection, the result is an intersection of both
+vgchange --addtag 110 -S "vg_name=$vg1" $vg2
+not check vg_field $vg1 vg_tags 110
+not check vg_field $vg2 vg_tags 110
+vgchange --deltag 110
+vgchange --addtag 111 -S "vg_name=$vg1" $vg1
+check vg_field $vg1 vg_tags 111
+not check vg_field $vg2 vg_tags 111
+vgchange --deltag 111
+vgchange --addtag "tag" $vg1
+vgchange --addtag 112 -S "vg_name=$vg2" @tag
+not check vg_field $vg1 vg_tags "tag,112"
+not check vg_field $vg2 vg_tags "tag,112"
+vgchange --deltag 112
+vgchange --addtag 113 -S "vg_name=$vg1" @tag
+check vg_field $vg1 vg_tags "113,tag"
+not check vg_field $vg2 vg_tags "113,tag"
+vgchange --deltag 113 --deltag tag
+
+###########################################
+# exercise process_each_lv with selection #
+###########################################
+
+# select contains VGS field
+lvchange --addtag 201 -S "vg_name=$vg1"
+check lv_field $vg1/lv1 lv_tags 201
+check lv_field $vg1/lv2 lv_tags 201
+not check lv_field $vg2/lv3 lv_tags 201
+not check lv_field $vg2/lv4 lv_tags 201
+not check lv_field $vg2/lv1 lv_tags 201
+lvchange --deltag 201 $vg1 $vg2
+
+# select contains LVS fiels
+lvchange --addtag 202 -S "lv_name=lv2"
+not check lv_field $vg1/lv1 lv_tags 202
+check lv_field $vg1/lv2 lv_tags 202
+not check lv_field $vg2/lv3 lv_tags 202
+not check lv_field $vg2/lv4 lv_tags 202
+not check lv_field $vg2/lv1 lv_tags 202
+lvchange --deltag 202 $vg1 $vg2
+lvchange --addtag 203 -S "lv_name=lv1"
+check lv_field $vg1/lv1 lv_tags 203
+not check lv_field $vg1/lv2 lv_tags 203
+not check lv_field $vg2/lv3 lv_tags 203
+not check lv_field $vg2/lv4 lv_tags 203
+check lv_field $vg2/lv1 lv_tags 203
+lvchange --deltag 203 $vg1 $vg2
+
+# select contains SEGS field
+lvchange --addtag 204 -S "seg_start=8m"
+not check lv_field $vg1/lv1 lv_tags 204
+check lv_field $vg1/lv2 lv_tags 204
+not check lv_field $vg2/lv3 lv_tags 204
+not check lv_field $vg2/lv4 lv_tags 204
+not check lv_field $vg2/lv1 lv_tags 204
+lvchange --deltag 204 $vg1 $vg2
+
+# select contains PVS field - COMBINATION NOT ALLOWED!
+lvchange --addtag 205 -S pv_name="$dev1" 2>err
+grep "Can't report LV and PV fields at the same time" err
+grep "Selection failed for LV" err
+not check lv_field $vg1/lv1 lv_tags 205
+not check lv_field $vg1/lv2 lv_tags 205
+not check lv_field $vg2/lv3 lv_tags 205
+not check lv_field $vg2/lv4 lv_tags 205
+not check lv_field $vg2/lv1 lv_tags 205
+
+# select contains PVSEGS field - COMBINATION NOT ALLOWED!
+lvchange --addtag 206 -S "pvseg_start>=0" 2>err
+grep "Can't report LV and PV fields at the same time" err
+grep "Selection failed for LV" err
+not check lv_field $vg1/lv1 lv_tags 206
+not check lv_field $vg1/lv2 lv_tags 206
+not check lv_field $vg2/lv3 lv_tags 206
+not check lv_field $vg2/lv4 lv_tags 206
+not check lv_field $vg2/lv1 lv_tags 206
+
+# if LV name or tag is supplied together with the
+# selection, the result is an intersection of both
+lvchange --addtag 207 -S "lv_name=lv2" $vg1/lv1
+not check lv_field $vg1/lv1 lv_tags 207
+not check lv_field $vg1/lv2 lv_tags 207
+not check lv_field $vg2/lv3 lv_tags 207
+not check lv_field $vg2/lv4 lv_tags 207
+not check lv_field $vg2/lv1 lv_tags 207
+lvchange --deltag 207 $vg1 $vg2
+lvchange --addtag 208 -S "lv_name=lv2" $vg1/lv2
+not check lv_field $vg1/lv1 lv_tags 208
+check lv_field $vg1/lv2 lv_tags 208
+not check lv_field $vg2/lv3 lv_tags 208
+not check lv_field $vg2/lv4 lv_tags 208
+not check lv_field $vg2/lv1 lv_tags 208
+lvchange --deltag 208 $vg1 $vg2
+lvchange --addtag "tag" $vg1/lv2
+lvchange --addtag 209 -S "lv_name=lv3" @tag
+not check lv_field $vg1/lv1 lv_tags "209,tag"
+not check lv_field $vg1/lv2 lv_tags "209,tag"
+not check lv_field $vg2/lv3 lv_tags "209,tag"
+not check lv_field $vg2/lv4 lv_tags "209,tag"
+not check lv_field $vg2/lv1 lv_tags "209,tag"
+lvchange --deltag 209 $vg1 $vg2
+lvchange --addtag 210 -S "lv_name=lv2" @tag
+not check lv_field $vg1/lv1 lv_tags "210,tag"
+check lv_field $vg1/lv2 lv_tags "210,tag"
+not check lv_field $vg2/lv3 lv_tags "210,tag"
+not check lv_field $vg2/lv4 lv_tags "210,tag"
+not check lv_field $vg2/lv1 lv_tags "210,tag"
+lvchange --deltag 210 --deltag tag $vg1 $vg2
+
+###########################################
+# exercise process_each_pv with selection #
+###########################################
+
+# select contains VGS field
+pvchange --addtag 301 -S "vg_name=$vg1"
+check pv_field "$dev1" pv_tags 301
+check pv_field "$dev2" pv_tags 301
+not check pv_field "$dev3" pv_tags 301
+not check pv_field "$dev4" pv_tags 301
+pvchange -a --deltag 301
+
+# select contains LVS field
+pvchange --addtag 302 -S "lv_name=lv2"
+check pv_field "$dev1" pv_tags 302
+check pv_field "$dev2" pv_tags 302
+not check pv_field "$dev3" pv_tags 302
+not check pv_field "$dev4" pv_tags 302
+pvchange -a --deltag 302
+
+# select contains SEGS field
+pvchange --addtag 303 -S "seg_start=8m"
+check pv_field "$dev1" pv_tags 303
+not check pv_field "$dev2" pv_tags 303
+not check pv_field "$dev3" pv_tags 303
+not check pv_field "$dev4" pv_tags 303
+pvchange -a --deltag 303
+
+# select contains PVS field
+pvchange --addtag 304 -S pv_name="$dev1"
+check pv_field "$dev1" pv_tags 304
+not check pv_field "$dev2" pv_tags 304
+not check pv_field "$dev3" pv_tags 304
+not check pv_field "$dev4" pv_tags 304
+pvchange -a --deltag 304
+
+# select contains PVSEGS field
+pvchange --addtag 305 -S "pvseg_size=2"
+not check pv_field "$dev1" pv_tags 305
+check pv_field "$dev2" pv_tags 305
+not check pv_field "$dev3" pv_tags 305
+not check pv_field "$dev4" pv_tags 305
+pvchange -a --deltag 305
+
+# if PV name or tag is supplied together with the
+# selection, the result is an intersection of both
+pvchange --addtag 306 -S pv_name="$dev1" "$dev2"
+not check pv_field "$dev1" pv_tags 306
+not check pv_field "$dev2" pv_tags 306
+not check pv_field "$dev3" pv_tags 306
+not check pv_field "$dev4" pv_tags 306
+pvchange -a --deltag 306
+pvchange --addtag 307 -S pv_name="$dev1" "$dev1"
+check pv_field "$dev1" pv_tags 307
+not check pv_field "$dev2" pv_tags 307
+not check pv_field "$dev3" pv_tags 307
+not check pv_field "$dev4" pv_tags 307
+pvchange -a --deltag 307
+pvchange --addtag "tag" "$dev1"
+pvchange --addtag 308 -S pv_name="$dev2" @tag
+not check pv_field "$dev1" pv_tags "308,tag"
+not check pv_field "$dev2" pv_tags "308,tag"
+not check pv_field "$dev3" pv_tags "308,tag"
+not check pv_field "$dev4" pv_tags "308,tag"
+pvchange --deltag 308 "$dev1"
+pvchange --addtag 309 -S pv_name="$dev1" @tag
+check pv_field "$dev1" pv_tags "309,tag"
+not check pv_field "$dev2" pv_tags "309,tag"
+not check pv_field "$dev3" pv_tags "309,tag"
+not check pv_field "$dev4" pv_tags "309,tag"
+pvchange -a --deltag 309 --deltag tag
+
+#########################
+# special cases to test #
+#########################
+
+# if calling vgremove, make sure we're doing selection per-VG, not per-LV
+# (vgremove calls process_each_vg with vgremove_single which itself
+# iterates over LVs with process_each_lv_in_vg - so internally it actually
+# operates per-LV, but we still need the selection to be done per-VG)
+vgremove --yes -S 'lv_name=lv2' # should remove whole vg1, not just the lv2
+vgremove --yes $vg2
diff --git a/test/shell/snapshot-autoumount-dmeventd.sh b/test/shell/snapshot-autoumount-dmeventd.sh
index 9d5b8c555..d369dac06 100644
--- a/test/shell/snapshot-autoumount-dmeventd.sh
+++ b/test/shell/snapshot-autoumount-dmeventd.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which mkfs.ext2 || skip
aux lvmconf "activation/snapshot_autoextend_percent = 0" \
@@ -30,14 +32,17 @@ lvchange --monitor y $vg/snap
mkdir "$mntdir"
mount "$DM_DEV_DIR/mapper/$vg-snap" "$mntdir"
-mount
+
cat /proc/mounts | grep "$mntdir"
-dd if=/dev/zero of="$mntdir/file$1" bs=1M count=5
-sync
-#dmeventd only checks every 10 seconds :(
-for i in {1..10}; do
- cat /proc/mounts | grep "$mntdir" || break
- sleep 1
+not dd if=/dev/zero of="$mntdir/file$1" bs=1M count=5 oflag=direct
+
+# Should be nearly instant check of dmeventd for invalid snapshot.
+# Wait here for umount and open_count drops to 0 as it may
+# take a while to finalize umount operation (it might be already
+# removed from /proc/mounts, but still opened).
+for i in {1..100}; do
+ test $(dmsetup info -c --noheadings -o open $vg-snap) -eq 0 && break
+ sleep .1
done
cat /proc/mounts | not grep "$mntdir"
diff --git a/test/shell/snapshot-cluster.sh b/test/shell/snapshot-cluster.sh
index 051c16516..47c8d2e25 100644
--- a/test/shell/snapshot-cluster.sh
+++ b/test/shell/snapshot-cluster.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 1
lvcreate -aey -L1 -n $lv1 $vg
diff --git a/test/shell/snapshot-lvm1.sh b/test/shell/snapshot-lvm1.sh
index f6afe148c..0abba0c85 100644
--- a/test/shell/snapshot-lvm1.sh
+++ b/test/shell/snapshot-lvm1.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
vgcreate --metadatatype 1 $vg $(cat DEVICES)
diff --git a/test/shell/snapshot-maxsize.sh b/test/shell/snapshot-maxsize.sh
index f5be5a555..e24f96552 100644
--- a/test/shell/snapshot-maxsize.sh
+++ b/test/shell/snapshot-maxsize.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 1
vgcreate -s 1K $vg $(cat DEVICES)
@@ -22,7 +24,7 @@ lvcreate -aey -L1 -n $lv1 $vg
# Snapshot should be large enough to handle any writes
lvcreate -L2 -s $vg/$lv1 -n $lv2
-dd if=/dev/zero of="$DM_DEV_DIR/$vg/$lv2" bs=1M count=1
+dd if=/dev/zero of="$DM_DEV_DIR/$vg/$lv2" bs=1M count=1 oflag=direct
# Snapshot must not be 'I'nvalid here
check lv_attr_bit state $vg/$lv2 "a"
diff --git a/test/shell/snapshot-reactivate.sh b/test/shell/snapshot-reactivate.sh
index 0fbdc6232..978f7c017 100644
--- a/test/shell/snapshot-reactivate.sh
+++ b/test/shell/snapshot-reactivate.sh
@@ -17,6 +17,8 @@
#
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# Snapshot should remain unmodified
check_s_() {
check dev_md5sum $vg s
diff --git a/test/shell/snapshot-rename.sh b/test/shell/snapshot-rename.sh
index 8d1eefba7..47bf21383 100644
--- a/test/shell/snapshot-rename.sh
+++ b/test/shell/snapshot-rename.sh
@@ -14,6 +14,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 1
lvcreate -aey -L1 -n $lv1 $vg
diff --git a/test/shell/snapshot-usage-exa.sh b/test/shell/snapshot-usage-exa.sh
new file mode 100644
index 000000000..133014b2b
--- /dev/null
+++ b/test/shell/snapshot-usage-exa.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Check very large device size (upto 15Exa bytes)
+# this needs 64bit arch
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux can_use_16T || skip
+
+aux prepare_pvs 1
+
+# Prepare large enough backend device
+vgcreate -s 4M $vg $(cat DEVICES)
+lvcreate --type snapshot -s -l 100%FREE -n $lv $vg --virtualsize 15P
+aux extend_filter_LVMTEST
+
+# Check usability with largest extent size
+pvcreate "$DM_DEV_DIR/$vg/$lv"
+vgcreate -s 4G $vg1 "$DM_DEV_DIR/$vg/$lv"
+
+lvcreate -an -Zn -l50%FREE -n $lv1 $vg1
+lvcreate -s -l100%FREE -n $lv2 $vg1/$lv1
+check lv_field $vg1/$lv2 size "7.50p"
+lvremove -ff $vg1
+
+lvcreate --type snapshot -V15E -l1 -n $lv1 -s $vg1
+check lv_field $vg1/$lv1 origin_size "15.00e"
+
+vgremove -ff $vg1
+vgremove -ff $vg
diff --git a/test/shell/snapshot-usage.sh b/test/shell/snapshot-usage.sh
index 74f54decf..ed9d8cb07 100644
--- a/test/shell/snapshot-usage.sh
+++ b/test/shell/snapshot-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
MKFS=mkfs.ext2
which $MKFS || skip
@@ -68,7 +70,7 @@ vgcreate -s 4M $vg $(cat DEVICES)
# Play with 1 extent
lvcreate -aey -l1 -n $lv $vg
# 100%LV is not supported for snapshot
-fail lvcreate -s -l 100%LV -n snap $vg/$lv |& tee out
+fail lvcreate -s -l 100%LV -n snap $vg/$lv 2>&1 | tee out
grep 'Please express size as %FREE, %ORIGIN, %PVS or %VG' out
# 100%ORIGIN needs to have enough space for all data and needs to round-up
lvcreate -s -l 100%ORIGIN -n $lv1 $vg/$lv
@@ -208,27 +210,3 @@ fsck -n "$DM_DEV_DIR/$vg1/snap"
check lv_field $vg1/snap data_percent "$EXPECT4"
vgremove -ff $vg1
-
-
-# Can't test >= 16T devices on 32bit
-test "$TSIZE" = 15P || exit 0
-
-# synchronize with udev activity
-# FIXME - otherwise sequence of vgremove followed by vgcreate may fail...
-# as there could be still remaing links in /dev
-# Unusure if 'vgcreate' should do this type of detection in udev mode.
-aux udev_wait
-
-# Check usability with largest extent size
-pvcreate "$DM_DEV_DIR/$vg/$lv"
-vgcreate -s 4G $vg1 "$DM_DEV_DIR/$vg/$lv"
-
-lvcreate -an -Zn -l50%FREE -n $lv1 $vg1
-lvcreate -s -l100%FREE -n $lv2 $vg1/$lv1
-check lv_field $vg1/$lv2 size "7.50p"
-lvremove -ff $vg1
-
-lvcreate --type snapshot -V15E -l1 -n $lv1 -s $vg1
-check lv_field $vg1/$lv1 origin_size "15.00e"
-
-vgremove -ff $vg1
diff --git a/test/shell/stray-device-node.sh b/test/shell/stray-device-node.sh
index 0ff4796cd..09d9e75ed 100644
--- a/test/shell/stray-device-node.sh
+++ b/test/shell/stray-device-node.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
cp -r "$dev1" "$DM_DEV_DIR/stray"
diff --git a/test/shell/system_id.sh b/test/shell/system_id.sh
new file mode 100644
index 000000000..5bb5c3a9e
--- /dev/null
+++ b/test/shell/system_id.sh
@@ -0,0 +1,886 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+test_description='Test system_id'
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+aux prepare_devs 1
+
+# with clvm enabled, vgcreate with no -c option creates a clustered vg,
+# which should have no system id
+
+if [ -e LOCAL_CLVMD ]; then
+SID1=sidfoolocal
+SID2=""
+LVMLOCAL=etc/lvmlocal.conf
+rm -f $LVMLOCAL
+echo "local {" > $LVMLOCAL
+echo " system_id = $SID1" >> $LVMLOCAL
+echo "}" >> $LVMLOCAL
+aux lvmconf "global/system_id_source = lvmlocal"
+vgcreate $vg1 "$dev1"
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID2
+vgremove $vg1
+rm -f $LVMLOCAL
+exit 0
+fi
+
+# create vg with system_id using each source
+
+## none
+
+SID=""
+aux lvmconf "global/system_id_source = none"
+vgcreate $vg1 "$dev1"
+check vg_field $vg1 systemid $SID
+vgremove $vg1
+
+# FIXME - print 'life' config data
+eval $(lvmconfig global/etc 2>/dev/null || lvmconfig --type default global/etc)
+
+## machineid
+if [ -e $etc/machine-id ]; then
+SID=$(cat $etc/machine-id)
+aux lvmconf "global/system_id_source = machineid"
+vgcreate $vg1 "$dev1"
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+vgremove $vg1
+fi
+
+## uname
+
+SID1=$(uname -n)
+if [ -n $SID1 ]; then
+aux lvmconf "global/system_id_source = uname"
+SID2=$(lvm systemid | awk '{ print $3 }')
+vgcreate $vg1 "$dev1"
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID2
+vgremove $vg1
+fi
+
+## lvmlocal
+
+SID=sidfoolocal
+LVMLOCAL=etc/lvmlocal.conf
+rm -f $LVMLOCAL
+echo "local {" > $LVMLOCAL
+echo " system_id = $SID" >> $LVMLOCAL
+echo "}" >> $LVMLOCAL
+aux lvmconf "global/system_id_source = lvmlocal"
+vgcreate $vg1 "$dev1"
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+vgremove $vg1
+rm -f $LVMLOCAL
+
+## file
+
+SID=sidfoofile
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+vgcreate $vg1 "$dev1"
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+vgremove $vg1
+rm -f $SIDFILE
+
+# override system_id to create a foreign vg, then fail to use the vg
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg, overriding the local system_id so the vg looks foreign
+vgcreate --systemid $SID2 $vg1 "$dev1"
+# normal vgs is not an error and does not see the vg
+vgs >err
+not grep $vg1 err
+# vgs on the foreign vg is an error and not displayed
+not vgs $vg1 >err
+not grep $vg1 err
+# fail to remove foreign vg
+not vgremove $vg1
+# using --foreign we can see foreign vg
+vgs --foreign >err
+grep $vg1 err
+vgs --foreign $vg1 >err
+grep $vg1 err
+# change the local system_id to the second value, making the vg not foreign
+echo "$SID2" > $SIDFILE
+# we can now see and remove the vg
+vgs $vg1 >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# create a vg, then change the local system_id, making the vg foreign
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal vgs doesn't see the vg
+vgs >err
+not grep $vg1 err
+# using --foreign we can see the vg
+vgs --foreign >err
+grep $vg1 err
+# change the local system_id back to the first value, making the vg not foreign
+echo "$SID1" > $SIDFILE
+vgs >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# create a vg, then change the vg's system_id, making it foreign
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# change the vg's system_id, making the vg foreign
+echo "y" | vgchange --systemid $SID2 $vg1
+# normal vgs doesn't see the vg
+vgs >err
+not grep $vg1 err
+# using --foreign we can see the vg
+vgs --foreign >err
+grep $vg1 err
+# change the local system_id to the second system_id so we can remove the vg
+echo "$SID2" > $SIDFILE
+vgs >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# create a vg, create active lvs in it, change our system_id, making
+# the VG foreign, verify that we can still see the foreign VG,
+# and can deactivate the LVs
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 $vg1
+# normal vgs sees the vg and lv
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# change our system_id, making the vg foreign, but accessible
+echo "$SID2" > $SIDFILE
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# can deactivate the lv
+lvchange -an $vg1/$lv1
+# now that the foreign vg has no active lvs, we can't access it
+not lvremove $vg1/$lv1
+not vgremove $vg1
+# change our system_id back to match the vg so it's not foreign
+echo "$SID1" > $SIDFILE
+vgs >err
+grep $vg1 err
+lvremove $vg1/$lv1
+vgremove $vg1
+rm -f $SIDFILE
+
+# local system has no system_id, so it can't access a vg with a system_id
+
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+aux lvmconf "global/system_id_source = none"
+vgs >err
+not grep $vg1 err
+not vgs $vg1 >err
+not grep $vg1 err
+aux lvmconf "global/system_id_source = file"
+vgs >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# local system has a system_id, and can use a vg without a system_id
+
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+# create a vg with no system_id
+aux lvmconf "global/system_id_source = none"
+vgcreate $vg1 "$dev1"
+check vg_field $vg1 systemid ""
+# set a local system_id
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# check we can see and use the vg with no system_id
+vgs >err
+grep $vg1 err
+vgs $vg1 >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgexport clears system_id, vgimport sets system_id
+
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid >err
+grep $vg1 err
+grep $SID1 err
+# after vgexport there is no systemid
+vgexport $vg1
+vgs -o+systemid >err
+grep $vg1 err
+not grep $SID1 err
+# after vgimport there is a systemid
+vgimport $vg1
+vgs -o+systemid >err
+grep $vg1 err
+grep $SID1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgchange -cy clears system_id, vgchange -cn sets system_id
+
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid >err
+grep $vg1 err
+grep $SID1 err
+# after vgchange -cy there is no systemid
+echo "y" | vgchange -cy $vg1
+vgs --config 'global { locking_type=0 }' -o+systemid $vg1 >err
+grep $vg1 err
+not grep $SID1 err
+# after vgchange -cn there is a systemid
+vgchange --config 'global { locking_type=0 }' -cn $vg1
+vgs -o+systemid >err
+grep $vg1 err
+grep $SID1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# Test max system_id length (128) and invalid system_id characters.
+# The 128 length limit is imposed before invalid characters are omitted.
+
+SIDFILE=etc/lvm_test.conf
+
+# 120 numbers followed by 8 letters (max len)
+SID1=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcdefgh
+# 120 numbers followed by 9 letters (too long by 1 character, the last is omitted)
+SID2=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcdefghi
+
+# max len system_id should appear normally
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid $vg1 >err
+grep $vg1 err
+grep $SID1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# max+1 len system_id should be missing the last character
+rm -f $SIDFILE
+echo "$SID2" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid $vg1 >err
+grep $vg1 err
+grep $SID1 err
+not grep $SID2 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# max len system_id containing an invalid character should appear without
+# the invalid character
+# 120 numbers followed by invalid '%' character followed by 8 letters (too long by 1 character)
+SID1=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789%abcdefgh
+# After the invalid character is omitted from SID1
+# The string is truncated to max length (128) before the invalid character is omitted
+SID2=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abcdefg
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid $vg1 >err
+grep $vg1 err
+not grep $SID1 err
+grep $SID2 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# contains a bunch of invalid characters
+SID1="?%$&A.@1]"
+# SID1 without the invalid characters
+SID2=A.1
+
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs -o+systemid $vg1 >err
+grep $vg1 err
+not grep $SID1 err
+grep $SID2 err
+vgremove $vg1
+rm -f $SIDFILE
+
+
+# pvs: pv in a foreign vg not reported
+# pvs --foreign: pv in a foreign vg is reported
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal pvs sees the vg and pv
+pvs >err
+grep $vg1 err
+grep "$dev1" err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal pvs does not see the vg or pv
+pvs >err
+not grep $vg1 err
+not grep "$dev1" err
+# pvs --foreign does see the vg and pv
+pvs --foreign >err
+grep $vg1 err
+grep "$dev1" err
+# change the local system_id back so the vg can be removed
+echo "$SID1" > $SIDFILE
+vgremove $vg1
+rm -f $SIDFILE
+
+# lvs: lvs in a foreign vg not reported
+# lvs --foreign: lvs in a foreign vg are reported
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 $vg1
+lvchange -an $vg1/$lv1
+# normal lvs sees the vg and lv
+lvs >err
+grep $vg1 err
+grep $lv1 err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal lvs does not see the vg or lv
+lvs >err
+not grep $vg1 err
+not grep $lv1 err
+# lvs --foreign does see the vg and lv
+lvs --foreign >err
+grep $vg1 err
+grep $lv1 err
+# change the local system_id back so the vg can be removed
+echo "$SID1" > $SIDFILE
+lvremove $vg1/$lv1
+vgremove $vg1
+rm -f $SIDFILE
+
+# use extra_system_ids to read a foreign VG
+
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+LVMLOCAL=etc/lvmlocal.conf
+rm -f $LVMLOCAL
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal vgs doesn't see the vg
+vgs >err
+not grep $vg1 err
+# using --foreign we can see the vg
+vgs --foreign >err
+grep $vg1 err
+# add the first system_id to extra_system_ids so we can see the vg
+echo "local {" > $LVMLOCAL
+echo " extra_system_ids = [ $SID1" ] >> $LVMLOCAL
+echo "}" >> $LVMLOCAL
+vgs >err
+grep $vg1 err
+vgremove $vg1
+rm -f $SIDFILE
+rm -f $LVMLOCAL
+
+# vgcreate --systemid "" creates a vg without a system_id even if source is set
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate --systemid "" $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# our system_id is not displayed for the vg
+vgs -o+systemid >err
+not grep $SID1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgchange --systemid "" clears the system_id on owned vg
+SID1=sidfoofile1
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# the vg has our system_id
+vgs -o+systemid >err
+grep $SID1 err
+# clear the system_id
+vgchange --yes --systemid "" $vg1
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# the vg does not have our system_id
+vgs -o+systemid >err
+not grep $SID1 err
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgchange --systemid does not set the system_id on foreign vg
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $LVMLOCAL
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal vgs doesn't see the vg
+vgs >err
+not grep $vg1 err
+# using --foreign we can see the vg
+vgs --foreign >err
+grep $vg1 err
+# cannot clear the system_id of the foreign vg
+not vgchange --yes --systemid "" $vg1
+# cannot set the system_id of the foreign vg
+not vgchange --yes --systemid foo $vg1
+# change our system_id back so we can remove the vg
+echo "$SID1" > $SIDFILE
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgcfgbackup backs up foreign vg with --foreign
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $LVMLOCAL
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg
+vgcreate $vg1 "$dev1"
+# normal vgs sees the vg
+vgs >err
+grep $vg1 err
+# change the local system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+# normal vgs doesn't see the vg
+vgs >err
+not grep $vg1 err
+# using --foreign we can back up the vg
+not vgcfgbackup $vg1
+vgcfgbackup --foreign $vg1
+# change our system_id back so we can remove the vg
+echo "$SID1" > $SIDFILE
+vgremove $vg1
+rm -f $SIDFILE
+
+
+
+# Test handling of bad system_id source configurations
+# The commands should proceed without a system_id.
+# Look at the warning/error messages.
+
+# vgcreate with source machineid, where no $etc/machine-id file exists
+if [ ! -e $etc/machine-id ]; then
+SID=""
+aux lvmconf "global/system_id_source = machineid"
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+fi
+
+# vgcreate with source uname, but uname is localhost
+# TODO: don't want to change the hostname on the test machine...
+
+# vgcreate with source lvmlocal, but no lvmlocal.conf file
+SID=""
+rm -f $LVMLOCAL
+aux lvmconf "global/system_id_source = lvmlocal"
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+rm -f $LVMLOCAL
+
+# vgcreate with source lvmlocal, but no system_id = "x" entry
+SID=""
+LVMLOCAL=etc/lvmlocal.conf
+rm -f $LVMLOCAL
+echo "local {" > $LVMLOCAL
+# echo " system_id = $SID" >> $LVMLOCAL
+echo "}" >> $LVMLOCAL
+aux lvmconf "global/system_id_source = lvmlocal"
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+rm -f $LVMLOCAL
+
+# vgcreate with source lvmlocal, and empty string system_id = ""
+SID=""
+LVMLOCAL=etc/lvmlocal.conf
+rm -f $LVMLOCAL
+echo "local {" > $LVMLOCAL
+echo " system_id = \"\"" >> $LVMLOCAL
+echo "}" >> $LVMLOCAL
+aux lvmconf "global/system_id_source = lvmlocal"
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+rm -f $LVMLOCAL
+
+# vgcreate with source file, but no system_id_file config
+SID=""
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+aux lvmconf "global/system_id_source = file"
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgcreate with source file, but system_id_file does not exist
+SID=""
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+vgcreate $vg1 "$dev1" 2>&1 | tee err
+vgs -o+systemid $vg1
+check vg_field $vg1 systemid $SID
+grep "No system ID found from system_id_source" err
+vgremove $vg1
+rm -f $SIDFILE
+
+
+# Test cases where lvmetad cache of a foreign VG are out of date
+# because the foreign owner has changed the VG.
+
+test ! -e LOCAL_LVMETAD && exit 0
+
+# When a foreign vg is newer on disk than in lvmetad, using --foreign
+# should find the newer version. This simulates a foreign host changing
+# foreign vg by turning off lvmetad when we create an lv in the vg.
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg with an lv
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 -an $vg1
+# normal vgs sees the vg and lv
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# go around lvmetad to create another lv in the vg,
+# forcing the lvmetad copy to be older than on disk.
+aux lvmconf 'global/use_lvmetad = 0'
+lvcreate -n $lv2 -l 2 -an $vg1
+aux lvmconf 'global/use_lvmetad = 1'
+# verify that the second lv is not in lvmetad
+lvs $vg1 >err
+grep $lv1 err
+not grep $lv2 err
+# change our system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+vgs >err
+not grep $vg1 err
+# using --foreign, we will get the latest vg from disk
+lvs --foreign $vg1 >err
+grep $vg1 err
+grep $lv1 err
+grep $lv2 err
+# change our system_id back to match the vg so it's not foreign
+echo "$SID1" > $SIDFILE
+lvremove $vg1/$lv1
+lvremove $vg1/$lv2
+vgremove $vg1
+rm -f $SIDFILE
+
+# vgimport should find the exported vg on disk even though
+# lvmetad's copy of the vg shows it's foreign.
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg with an lv
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 -an $vg1
+# normal vgs sees the vg and lv
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# go around lvmetad to export the vg so that lvmetad still
+# has the original vg owned by SID1 in its cache
+aux lvmconf 'global/use_lvmetad = 0'
+vgexport $vg1
+aux lvmconf 'global/use_lvmetad = 1'
+# change the local system_id so the lvmetad copy of the vg is foreign
+echo "$SID2" > $SIDFILE
+# verify that lvmetad thinks the vg is foreign
+# (don't use --foreign to verify this because that will cause
+# the lvmetad cache to be updated, which we don't want yet)
+not vgs $vg1
+# attempt to import the vg that has been exported, but
+# which lvmetad thinks is foreign
+vgimport $vg1
+# verify that the imported vg has our system_id
+vgs -o+systemid $vg1 >err
+grep $vg1 err
+grep $SID2 err
+check lv_exists $vg1 $lv1
+lvremove $vg1/$lv1
+vgremove $vg1
+rm -f $SIDFILE
+
+# pvscan --cache should cause the latest version of a foreign VG to be
+# cached in lvmetad. Without the --cache option, pvscan will see the old
+# version of the VG.
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg with an lv
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 -an $vg1
+# normal vgs sees the vg and lv
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# go around lvmetad to create another lv in the vg,
+# forcing the lvmetad copy to be older than on disk.
+aux lvmconf 'global/use_lvmetad = 0'
+lvcreate -n $lv2 -l 2 -an $vg1
+aux lvmconf 'global/use_lvmetad = 1'
+# verify that the second lv is not in lvmetad
+lvs $vg1 >err
+grep $lv1 err
+not grep $lv2 err
+# verify that after pvscan without --cache, lvmetad still
+# reports the old version
+pvscan
+lvs $vg1 >err
+grep $lv1 err
+not grep $lv2 err
+# change our system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+not vgs $vg1 >err
+not grep $vg1 err
+# use pvscan --cache to update the foreign vg in lvmetad
+pvscan --cache
+not vgs $vg1 >err
+not grep $vg1 err
+# change our system_id back to SID1 so we can check that
+# lvmetad has the latest copy of the vg (without having
+# to use --foreign to check)
+echo "$SID1" > $SIDFILE
+vgs $vg1 >err
+grep $vg1 err
+lvs $vg1 >err
+grep $lv1 err
+grep $lv2 err
+lvremove $vg1/$lv1
+lvremove $vg1/$lv2
+vgremove $vg1
+rm -f $SIDFILE
+
+# repeat the same test for vgscan instead of pvscan
+SID1=sidfoofile1
+SID2=sidfoofile2
+SIDFILE=etc/lvm_test.conf
+rm -f $SIDFILE
+echo "$SID1" > $SIDFILE
+aux lvmconf "global/system_id_source = file" \
+ "global/system_id_file = \"$SIDFILE\""
+# create a vg with an lv
+vgcreate $vg1 "$dev1"
+lvcreate -n $lv1 -l 2 -an $vg1
+# normal vgs sees the vg and lv
+vgs >err
+grep $vg1 err
+check lv_exists $vg1 $lv1
+# go around lvmetad to create another lv in the vg,
+# forcing the lvmetad copy to be older than on disk.
+aux lvmconf 'global/use_lvmetad = 0'
+lvcreate -n $lv2 -l 2 -an $vg1
+aux lvmconf 'global/use_lvmetad = 1'
+# verify that the second lv is not in lvmetad
+lvs $vg1 >err
+grep $lv1 err
+not grep $lv2 err
+# verify that after vgscan without --cache, lvmetad still
+# reports the old version
+vgscan
+lvs $vg1 >err
+grep $lv1 err
+not grep $lv2 err
+# change our system_id, making the vg foreign
+echo "$SID2" > $SIDFILE
+not vgs $vg1 >err
+not grep $vg1 err
+# use vgscan --cache to update the foreign vg in lvmetad
+vgscan --cache
+not vgs $vg1 >err
+not grep $vg1 err
+# change our system_id back to SID1 so we can check that
+# lvmetad has the latest copy of the vg (without having
+# to use --foreign to check)
+echo "$SID1" > $SIDFILE
+vgs $vg1 >err
+grep $vg1 err
+lvs $vg1 >err
+grep $lv1 err
+grep $lv2 err
+lvremove $vg1/$lv1
+lvremove $vg1/$lv2
+vgremove $vg1
+rm -f $SIDFILE
+
+
diff --git a/test/shell/tags.sh b/test/shell/tags.sh
index bc66d15c7..37d738fd4 100644
--- a/test/shell/tags.sh
+++ b/test/shell/tags.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 4
# vgcreate with --addtag
diff --git a/test/shell/test-partition.sh b/test/shell/test-partition.sh
index af04b4ef5..0cdaa8705 100644
--- a/test/shell/test-partition.sh
+++ b/test/shell/test-partition.sh
@@ -19,6 +19,8 @@ LVM_TEST_CONFIG_DEVICES="types = [\"device-mapper\", 142]"
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which sfdisk || skip
aux prepare_pvs 1 30
@@ -26,6 +28,7 @@ aux prepare_pvs 1 30
pvs "$dev1"
# create small partition table
-echo "1 2" | sfdisk "$dev1"
+echo "1 2" | sfdisk --force "$dev1"
-pvs "$dev1"
+aux notify_lvmetad "$dev1"
+not pvs "$dev1"
diff --git a/test/shell/thin-autoumount-dmeventd.sh b/test/shell/thin-autoumount-dmeventd.sh
index a77ad082c..5f1fc6b5c 100644
--- a/test/shell/thin-autoumount-dmeventd.sh
+++ b/test/shell/thin-autoumount-dmeventd.sh
@@ -11,6 +11,8 @@
# no automatic extensions, just umount
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
is_dir_mounted_()
{
cat /proc/mounts | sed 's:\\040: :g' | grep "$1"
@@ -18,6 +20,8 @@ is_dir_mounted_()
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/thin-defaults.sh b/test/shell/thin-defaults.sh
index 3a681a1f1..3e49390a6 100644
--- a/test/shell/thin-defaults.sh
+++ b/test/shell/thin-defaults.sh
@@ -11,8 +11,12 @@
# test defaults entered through lvm.conf
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/thin-merge.sh b/test/shell/thin-merge.sh
index d3275ddd0..ef53496db 100644
--- a/test/shell/thin-merge.sh
+++ b/test/shell/thin-merge.sh
@@ -11,6 +11,8 @@
# test merge of thin snapshot
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
MKFS=mkfs.ext2
diff --git a/test/shell/thin-overprovisioning.sh b/test/shell/thin-overprovisioning.sh
new file mode 100644
index 000000000..ac0b96cdb
--- /dev/null
+++ b/test/shell/thin-overprovisioning.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# Test warns when thin pool is overprovisiong
+
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
+. lib/inittest
+
+aux have_thin 1 3 0 || skip
+
+# 2PVs by 32M
+aux prepare_vg 2 33
+
+lvcreate -L32 -T $vg/pool
+
+# leave 12M free space
+lvcreate -an -n $lv1 -L16 $vg 2>&1 | tee out
+vgs $vg
+
+lvcreate -n thin1 -V30 $vg/pool 2>&1 | tee out
+not grep "WARNING: Sum" out
+
+# Pool gets overprovisioned
+lvcreate -an -n thin2 -V4 $vg/pool 2>&1 | tee out
+grep "WARNING: Sum" out
+grep "amount of free space in volume group (12.00 MiB)" out
+
+# Eat all space in VG
+lvcreate -an -n $lv2 -L12 $vg 2>&1 | tee out
+grep "WARNING: Sum" out
+grep "no free space in volume group" out
+
+lvcreate -an -n thin3 -V1G $vg/pool 2>&1 | tee out
+grep "WARNING: Sum" out
+grep "the size of whole volume group" out
+
+lvremove -ff $vg/thin2 $vg/thin3 $vg/$lv2
+
+# Create 2nd thin pool in a VG
+
+lvcreate -L4 -T $vg/pool2
+lvcreate -V4 -n thin2 $vg/pool2 2>&1 | tee out
+not grep "WARNING: Sum" out
+
+lvcreate -an -V4 -n thin3 $vg/pool2 2>&1 | tee out
+grep "WARNING: Sum of all thin volume sizes (38.00 MiB)" out
+grep "free space in volume group (6.00 MiB)" out
+
+lvcreate -an -L6 -n $lv3 $vg 2>&1 | tee out
+grep "no free space in volume group" out
+
+lvremove -ff $vg/thin2 $vg/thin3
+
+lvcreate -an -V4 -n thin2 $vg/pool2 2>&1 | tee out
+not grep "WARNING: Sum" out
+
+# Check if resize notices problem
+lvextend -L+8 $vg/thin2
+
+vgs $vg
+
+vgremove -ff $vg
diff --git a/test/shell/thin-restore.sh b/test/shell/thin-restore.sh
index fd4118f21..3516e2d4b 100644
--- a/test/shell/thin-restore.sh
+++ b/test/shell/thin-restore.sh
@@ -11,8 +11,12 @@
# test restore operation of thin pool metadata
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/thin-vglock.sh b/test/shell/thin-vglock.sh
index fe1ee1115..d06c62cad 100644
--- a/test/shell/thin-vglock.sh
+++ b/test/shell/thin-vglock.sh
@@ -12,6 +12,8 @@
# Test locking works and doesn't update metadata
# RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1063542
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
MKFS=mkfs.ext2
diff --git a/test/shell/thin-volume-list.sh b/test/shell/thin-volume-list.sh
index 32d13b8e0..c3dab2576 100644
--- a/test/shell/thin-volume-list.sh
+++ b/test/shell/thin-volume-list.sh
@@ -11,8 +11,12 @@
# test pool behaviour when volume_list masks activation
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
#
# Main
#
diff --git a/test/shell/topology-support.sh b/test/shell/topology-support.sh
index caebb3d86..ebb72200a 100644
--- a/test/shell/topology-support.sh
+++ b/test/shell/topology-support.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-# Copyright (C) 2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions
@@ -11,21 +11,9 @@
. lib/inittest
-which mkfs.ext3 || skip
-
-check_logical_block_size() {
- # Verify logical_block_size - requires Linux >= 2.6.31
- SYSFS_LOGICAL_BLOCK_SIZE="/sys/block/$(basename $(< SCSI_DEBUG_DEV))/queue/logical_block_size"
- test -f "$SYSFS_LOGICAL_BLOCK_SIZE" || return 0
- test "$(< $SYSFS_LOGICAL_BLOCK_SIZE)" -eq "$1" # ACTUAL_LOGICAL_BLOCK_SIZE
-}
+test -e LOCAL_LVMPOLLD && skip
-check_optimal_io_size() {
- # Verify optimal_io_size
- SYSFS_OPTIMAL_IO_SIZE="/sys/block/$(basename $(< SCSI_DEBUG_DEV))/queue/optimal_io_size"
- test -f "$SYSFS_OPTIMAL_IO_SIZE" || return 0
- test "$(< $SYSFS_OPTIMAL_IO_SIZE)" -eq "$1" # ACTUAL_OPTIMAL_IO_SIZE
-}
+which mkfs.ext3 || skip
lvdev_() {
echo "$DM_DEV_DIR/$1/$2"
@@ -38,9 +26,11 @@ test_snapshot_mount() {
mount "$(lvdev_ $vg $lv1)" test_mnt
lvcreate -L4M -n $lv2 -s $vg/$lv1
umount test_mnt
+ aux udev_wait
# mount the origin
mount "$(lvdev_ $vg $lv1)" test_mnt
umount test_mnt
+ aux udev_wait
# mount the snapshot
mount "$(lvdev_ $vg $lv2)" test_mnt
umount test_mnt
@@ -55,23 +45,18 @@ NUM_DEVS=1
PER_DEV_SIZE=34
DEV_SIZE=$(($NUM_DEVS*$PER_DEV_SIZE))
-# Test that kernel supports topology
-aux prepare_scsi_debug_dev $DEV_SIZE || skip
-
-if [ ! -e /sys/block/$(basename $(< SCSI_DEBUG_DEV))/alignment_offset ] ; then
- aux cleanup_scsi_debug_dev
- skip
-fi
-aux cleanup_scsi_debug_dev
-
# ---------------------------------------------
# Create "desktop-class" 4K drive
# (logical_block_size=512, physical_block_size=4096, alignment_offset=0):
LOGICAL_BLOCK_SIZE=512
aux prepare_scsi_debug_dev $DEV_SIZE \
sector_size=$LOGICAL_BLOCK_SIZE physblk_exp=3
-check_logical_block_size $LOGICAL_BLOCK_SIZE
-
+# Test that kernel supports topology
+if [ ! -e /sys/block/$(basename $(< SCSI_DEBUG_DEV))/alignment_offset ] ; then
+ aux cleanup_scsi_debug_dev
+ skip
+fi
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size $LOGICAL_BLOCK_SIZE
aux prepare_pvs $NUM_DEVS $PER_DEV_SIZE
get_devs
@@ -87,7 +72,7 @@ aux cleanup_scsi_debug_dev
LOGICAL_BLOCK_SIZE=512
aux prepare_scsi_debug_dev $DEV_SIZE \
sector_size=$LOGICAL_BLOCK_SIZE physblk_exp=3 lowest_aligned=7
-check_logical_block_size $LOGICAL_BLOCK_SIZE
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size $LOGICAL_BLOCK_SIZE
aux prepare_pvs $NUM_DEVS $PER_DEV_SIZE
vgcreate $vg "${DEVICES[@]}"
@@ -102,7 +87,7 @@ aux cleanup_scsi_debug_dev
LOGICAL_BLOCK_SIZE=4096
aux prepare_scsi_debug_dev $DEV_SIZE \
sector_size=$LOGICAL_BLOCK_SIZE
-check_logical_block_size $LOGICAL_BLOCK_SIZE
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size $LOGICAL_BLOCK_SIZE
aux prepare_pvs $NUM_DEVS $PER_DEV_SIZE
vgcreate $vg "${DEVICES[@]}"
@@ -111,6 +96,9 @@ vgremove $vg
aux cleanup_scsi_debug_dev
+# scsi_debug option opt_blks appeared in Oct 2010
+aux kernel_at_least 2 6 37 || exit 0
+
# ---------------------------------------------
# Create "enterprise-class" 512 drive w/ HW raid stripe_size = 768K
# (logical_block_size=512, physical_block_size=512, alignment_offset=0):
@@ -118,10 +106,15 @@ aux cleanup_scsi_debug_dev
LOGICAL_BLOCK_SIZE=512
aux prepare_scsi_debug_dev $DEV_SIZE \
sector_size=$LOGICAL_BLOCK_SIZE opt_blks=1536
-check_logical_block_size $LOGICAL_BLOCK_SIZE
-check_optimal_io_size 786432
+
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/logical_block_size $LOGICAL_BLOCK_SIZE
+check sysfs "$(< SCSI_DEBUG_DEV)" queue/optimal_io_size 786432
aux prepare_pvs 1 $PER_DEV_SIZE
-check pv_field "${DEVICES[@]}" pe_start 768.00k
+
+# Kernel (3.19) could provide wrong results - in this case skip
+# test with incorrect result - lvm2 can't figure out good values.
+check sysfs "$dev1" queue/optimal_io_size 786432 || SHOULD=should
+$SHOULD check pv_field "${DEVICES[@]}" pe_start 768.00k
aux cleanup_scsi_debug_dev
diff --git a/test/shell/unknown-segment.sh b/test/shell/unknown-segment.sh
index b07e1b5f2..a1d3696c3 100644
--- a/test/shell/unknown-segment.sh
+++ b/test/shell/unknown-segment.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 4
lvcreate -an -Zn -l 1 -n $lv1 $vg
diff --git a/test/shell/unlost-pv.sh b/test/shell/unlost-pv.sh
index 980c45179..76bf8a1c4 100644
--- a/test/shell/unlost-pv.sh
+++ b/test/shell/unlost-pv.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
check_() {
# vgscan needs --cache option for direct scan if lvmetad is used
test -e LOCAL_LVMETAD && cache="--cache"
@@ -43,4 +45,14 @@ check_
test -e LOCAL_LVMETAD && lvremove $vg/boo # FIXME trigger a write :-(
check_ not
+aux disable_dev "$dev1"
+vgreduce --removemissing --force $vg
+aux enable_dev "$dev1"
+
+vgscan 2>&1 | tee out
+grep 'Removing PV' out
+
+vgs 2>&1 | tee out
+not grep 'Removing PV' out
+
vgremove -ff $vg
diff --git a/test/shell/vg-name-from-env.sh b/test/shell/vg-name-from-env.sh
index 67ad5c558..a3d1b9ab5 100644
--- a/test/shell/vg-name-from-env.sh
+++ b/test/shell/vg-name-from-env.sh
@@ -9,6 +9,8 @@ test_description='Test the vg name for an lv from env var'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
pvcreate "$dev1"
diff --git a/test/shell/vgcfgbackup-usage.sh b/test/shell/vgcfgbackup-usage.sh
index eb3698397..b0faf3863 100644
--- a/test/shell/vgcfgbackup-usage.sh
+++ b/test/shell/vgcfgbackup-usage.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 4
pvcreate --metadatacopies 0 "$dev4"
diff --git a/test/shell/vgchange-many.sh b/test/shell/vgchange-many.sh
index 16e051682..7b3d557c1 100644
--- a/test/shell/vgchange-many.sh
+++ b/test/shell/vgchange-many.sh
@@ -12,10 +12,14 @@
# Check perfomance of activation and deactivation
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
# FIXME: lvmetad fails with i.e. 1500 device on memory failure...
# Number of LVs to create
DEVICES=1000
+# On low-memory boxes let's not stress too much
+test "$(aux total_mem)" -gt 524288 || DEVICES=256
aux prepare_pvs 1 400
diff --git a/test/shell/vgchange-maxlv.sh b/test/shell/vgchange-maxlv.sh
index 35ce5d6c1..613d985ee 100644
--- a/test/shell/vgchange-maxlv.sh
+++ b/test/shell/vgchange-maxlv.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_dmeventd
aux prepare_pvs 3
diff --git a/test/shell/vgchange-partial.sh b/test/shell/vgchange-partial.sh
index 7fd984016..f9c3ac02e 100644
--- a/test/shell/vgchange-partial.sh
+++ b/test/shell/vgchange-partial.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 2
aux disable_dev "$dev1"
diff --git a/test/shell/vgchange-sysinit.sh b/test/shell/vgchange-sysinit.sh
index 4debf42cc..8450d63b0 100644
--- a/test/shell/vgchange-sysinit.sh
+++ b/test/shell/vgchange-sysinit.sh
@@ -12,6 +12,7 @@
. lib/inittest
test -e LOCAL_CLVMD && skip
+test -e LOCAL_LVMPOLLD && skip
which mkfs.ext3 || skip
diff --git a/test/shell/vgchange-usage.sh b/test/shell/vgchange-usage.sh
index e564337d8..077b447f7 100644
--- a/test/shell/vgchange-usage.sh
+++ b/test/shell/vgchange-usage.sh
@@ -13,6 +13,8 @@ test_description='Exercise some vgchange diagnostics'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 4
pvcreate --metadatacopies 0 "$dev1"
@@ -85,7 +87,7 @@ vgremove -ff $vg
# set cluster bit
vgcreate -cn $vg "$dev1" "$dev2" "$dev3"
# check prompt to change cluster bit without giving explicit vg name
-fail vgchange -cy |& tee out
+fail vgchange -cy 2>&1 | tee out
grep "y/n" out
check vg_attr_bit cluster $vg "-"
@@ -113,9 +115,9 @@ else
# can't switch with active LV
vgchange --yes -cy $vg
fail vgchange --yes -cy $vg
- fail vgs $vg |& tee out
+ fail vgs $vg 2>&1 | tee out
grep "Skipping clustered volume group" out
- vgs --ignoreskippedcluster $vg |& tee out
+ vgs --ignoreskippedcluster $vg 2>&1 | tee out
not grep "Skipping clustered volume group" out
# reset back to non-clustered VG with disabled locking
vgchange -cn $vg --config 'global{locking_type=0}' $vg
diff --git a/test/shell/vgck.sh b/test/shell/vgck.sh
index 629b6e7fd..a4930aae0 100644
--- a/test/shell/vgck.sh
+++ b/test/shell/vgck.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
lvcreate -n blabla -L 1 $vg
diff --git a/test/shell/vgcreate-many-pvs.sh b/test/shell/vgcreate-many-pvs.sh
new file mode 100644
index 000000000..bb78cafa7
--- /dev/null
+++ b/test/shell/vgcreate-many-pvs.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+# Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+. lib/inittest
+
+test -e LOCAL_LVMPOLLD && skip
+
+#
+# Test to exercise larger number of PVs in a VG
+# Related to https://bugzilla.redhat.com/show_bug.cgi?id=736027
+#
+# Original measured times of the whole test case before
+# and with the acceleration patch from my bare metal hw
+# (Lenovo T61, 2.2GHz, 4G RAM, rawhide 2015-03-06 with ndebug kernel):
+#
+# export LVM_TEST_PVS=300
+#
+# make check_local ~52sec (U:29s, S:13s)
+# make check_lvmetad ~20sec (U: 4s, S: 5s)
+#
+# With patch from 2015-03-06:
+#
+# make check_local ~30sec (U:10s, S:12s)
+# make check_lvmetad ~20sec (U: 4s, S: 5s)
+#
+
+# TODO: extend test suite to monitor performance and report regressions...
+
+# Use just 100 to get 'decent' speed on slow boxes
+LVM_TEST_PVS=${LVM_TEST_PVS:-100}
+
+#aux prepare_devs $LVM_TEST_PVS 8
+#vgcreate $vg $(< DEVICES)
+
+# prepare_vg is now directly using steps above
+aux prepare_vg $LVM_TEST_PVS
+
+# Check we have decent speed with typical commands
+vgs
+
+lvs
+
+pvs
+
+lvcreate -l1 -n $lv1 $vg
+
+lvremove -f $vg/$lv1
+
+vgremove -ff $vg
+
+#
+# TODO Turn this into another test case:
+#
+#for i in $(seq 1 $LVM_TEST_PVS); do
+# vgcreate ${vg}$i "$DM_DEV_DIR/mapper/${PREFIX}pv$i"
+#done
diff --git a/test/shell/vgcreate-usage.sh b/test/shell/vgcreate-usage.sh
index a1d178d39..df0094c40 100644
--- a/test/shell/vgcreate-usage.sh
+++ b/test/shell/vgcreate-usage.sh
@@ -13,6 +13,8 @@ test_description='Exercise some vgcreate diagnostics'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 3
pvcreate "$dev1" "$dev2"
pvcreate --metadatacopies 0 "$dev3"
diff --git a/test/shell/vgextend-restoremissing.sh b/test/shell/vgextend-restoremissing.sh
index 7b7748fd7..d8de96d23 100644
--- a/test/shell/vgextend-restoremissing.sh
+++ b/test/shell/vgextend-restoremissing.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
lvcreate -an -Zn --type mirror -m 1 -l 1 -n mirror $vg
@@ -19,7 +21,7 @@ lvcreate -l 1 -n lv1 $vg "$dev1"
# vgextend require vgname
invalid vgextend
# --metadatacopies => use --pvmetadatacopies
-invalid vgextend --metadatacopies 3 $vg "$dev1" |& tee out
+invalid vgextend --metadatacopies 3 $vg "$dev1" 2>&1 | tee out
grep -- "use --pvmetadatacopies" out
# VG name should exist
diff --git a/test/shell/vgextend-usage.sh b/test/shell/vgextend-usage.sh
index 93401f0c7..65665bcaf 100644
--- a/test/shell/vgextend-usage.sh
+++ b/test/shell/vgextend-usage.sh
@@ -15,6 +15,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 5
for mdatype in 1 2
diff --git a/test/shell/vgimportclone.sh b/test/shell/vgimportclone.sh
index c3d7e523d..0b1af384e 100644
--- a/test/shell/vgimportclone.sh
+++ b/test/shell/vgimportclone.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 2
vgcreate --metadatasize 128k $vg1 "$dev1"
diff --git a/test/shell/vgmerge-operation.sh b/test/shell/vgmerge-operation.sh
index 261953eef..42976c2b5 100644
--- a/test/shell/vgmerge-operation.sh
+++ b/test/shell/vgmerge-operation.sh
@@ -13,6 +13,8 @@ test_description='Test vgmerge operation'
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 4 64
# 'vgmerge succeeds with single linear LV in source VG'
diff --git a/test/shell/vgmerge-usage.sh b/test/shell/vgmerge-usage.sh
index d8fa09c15..981a964ee 100644
--- a/test/shell/vgmerge-usage.sh
+++ b/test/shell/vgmerge-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_pvs 4
# 'vgmerge normal operation'
diff --git a/test/shell/vgreduce-usage.sh b/test/shell/vgreduce-usage.sh
index 52c5f36cf..9f5539576 100644
--- a/test/shell/vgreduce-usage.sh
+++ b/test/shell/vgreduce-usage.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 4
for mdatype in 1 2
diff --git a/test/shell/vgremove-corrupt-vg.sh b/test/shell/vgremove-corrupt-vg.sh
index f709e4eb1..dc3c9dbac 100644
--- a/test/shell/vgremove-corrupt-vg.sh
+++ b/test/shell/vgremove-corrupt-vg.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_vg 3
lvcreate -n blabla -L 1 $vg -an --zero n
diff --git a/test/shell/vgrename-usage.sh b/test/shell/vgrename-usage.sh
index de1b9332b..59c339766 100644
--- a/test/shell/vgrename-usage.sh
+++ b/test/shell/vgrename-usage.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 4
pvcreate "$dev1" "$dev2"
pvcreate --metadatacopies 0 "$dev3" "$dev4"
diff --git a/test/shell/vgsplit-operation.sh b/test/shell/vgsplit-operation.sh
index 8081b3821..975e8733c 100644
--- a/test/shell/vgsplit-operation.sh
+++ b/test/shell/vgsplit-operation.sh
@@ -11,8 +11,13 @@
# Test vgsplit operation, including different LV types
+# disable lvmetad logging as it bogs down test systems
+export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
COMM() {
LAST_TEST="$@"
}
@@ -116,9 +121,12 @@ COMM "vgsplit correctly splits mirror (log+leg on same dev) into $i VG ($j args)
lvremove -f $vg2/$lv1
vgremove -f $vg1 $vg2
+# Can't use mirrored log without cmirrord
+# TODO: Should work for inactive device, needs some fixes....
+if test ! -e LOCAL_CLVMD ; then
COMM "vgsplit correctly splits mirror LV with mirrored log into $i VG ($j args)"
- create_vg_ -c n $vg1 "$dev1" "$dev2" "$dev3" "$dev4"
- test $i = existing && create_vg_ -c n $vg2 "$dev5"
+ create_vg_ $vg1 "$dev1" "$dev2" "$dev3" "$dev4"
+ test $i = existing && create_vg_ $vg2 "$dev5"
lvcreate -an -Zn -l 64 --mirrorlog mirrored --type mirror -m1 -n $lv1 $vg1 \
"$dev1" "$dev2" "$dev3" "$dev4"
@@ -143,8 +151,8 @@ COMM "vgsplit correctly splits mirror LV with mirrored log into $i VG ($j args)"
# RHBZ 875903
COMM "vgsplit correctly splits mirror LV with mirrored log on same devs into $i VG ($j args)"
- create_vg_ -c n $vg1 "$dev1" "$dev2" "$dev3" "$dev4"
- test $i = existing && create_vg_ -c n $vg2 "$dev5"
+ create_vg_ $vg1 "$dev1" "$dev2" "$dev3" "$dev4"
+ test $i = existing && create_vg_ $vg2 "$dev5"
lvcreate -an -Zn -l 64 --mirrorlog mirrored --type mirror -m1 -n $lv1 $vg1 \
"$dev1" "$dev2"
@@ -163,6 +171,7 @@ COMM "vgsplit correctly splits mirror LV with mirrored log on same devs into $i
fi
lvremove -f $vg2/$lv1
vgremove -f $vg1 $vg2
+fi
COMM "vgsplit correctly splits origin and snapshot LV into $i VG ($j args)"
create_vg_ $vg1 "$dev1" "$dev2"
diff --git a/test/shell/vgsplit-raid.sh b/test/shell/vgsplit-raid.sh
index a7c9687f6..2fe734f9b 100644
--- a/test/shell/vgsplit-raid.sh
+++ b/test/shell/vgsplit-raid.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
COMM() {
LAST_TEST="$@"
}
diff --git a/test/shell/vgsplit-stacked.sh b/test/shell/vgsplit-stacked.sh
index 42010fc6b..6a04f07a4 100644
--- a/test/shell/vgsplit-stacked.sh
+++ b/test/shell/vgsplit-stacked.sh
@@ -11,6 +11,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux extend_filter_LVMTEST
aux prepare_pvs 3
diff --git a/test/shell/vgsplit-thin.sh b/test/shell/vgsplit-thin.sh
index 7b06132fd..754f8ba3b 100644
--- a/test/shell/vgsplit-thin.sh
+++ b/test/shell/vgsplit-thin.sh
@@ -11,8 +11,12 @@
# Test vgsplit command options for validity
+export LVM_TEST_THIN_REPAIR_CMD=${LVM_TEST_THIN_REPAIR_CMD-/bin/false}
+
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux have_thin 1 0 0 || skip
aux prepare_devs 5
diff --git a/test/shell/vgsplit-usage.sh b/test/shell/vgsplit-usage.sh
index fe999f90c..b0422d535 100644
--- a/test/shell/vgsplit-usage.sh
+++ b/test/shell/vgsplit-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
aux prepare_devs 5
for mdatype in 1 2
diff --git a/test/shell/zero-usage.sh b/test/shell/zero-usage.sh
index 046b3f5fe..df9052942 100644
--- a/test/shell/zero-usage.sh
+++ b/test/shell/zero-usage.sh
@@ -13,6 +13,8 @@
. lib/inittest
+test -e LOCAL_LVMPOLLD && skip
+
which md5sum || skip
aux prepare_pvs 1
diff --git a/tools/.gitignore b/tools/.gitignore
index d06b2d832..2e7b3320a 100644
--- a/tools/.gitignore
+++ b/tools/.gitignore
@@ -1,3 +1,4 @@
.commands
dmsetup
+dmstats
lvm
diff --git a/tools/Makefile.in b/tools/Makefile.in
index 1a8db36e5..d6e54f0ac 100644
--- a/tools/Makefile.in
+++ b/tools/Makefile.in
@@ -21,6 +21,7 @@ SOURCES =\
formats.c \
lvchange.c \
lvconvert.c \
+ lvconvert_poll.c \
lvcreate.c \
lvdisplay.c \
lvextend.c \
@@ -38,6 +39,7 @@ SOURCES =\
pvcreate.c \
pvdisplay.c \
pvmove.c \
+ pvmove_poll.c \
pvremove.c \
pvresize.c \
pvscan.c \
@@ -57,6 +59,7 @@ SOURCES =\
vgimport.c \
vgmerge.c \
vgmknodes.c \
+ lvpoll.c \
vgreduce.c \
vgremove.c \
vgrename.c \
@@ -94,7 +97,8 @@ LIB_VERSION = $(LIB_VERSION_LVM)
CLEAN_TARGETS = liblvm2cmd.$(LIB_SUFFIX) $(TARGETS_DM) \
liblvm2cmd.$(LIB_SUFFIX).$(LIB_VERSION) lvm-static.o \
- liblvm2cmd-static.a dmsetup.static lvm.static
+ liblvm2cmd-static.a dmsetup.static lvm.static \
+ $(LDDEPS) .exported_symbols_generated
ifeq ("@CMDLIB@", "yes")
TARGETS += liblvm2cmd.$(LIB_SUFFIX).$(LIB_VERSION)
@@ -110,8 +114,6 @@ LVMLIBS += -ldevmapper
EXPORTED_HEADER = $(srcdir)/lvm2cmd.h
EXPORTED_FN_PREFIX = lvm2
-DEFS += -DLVM_SHARED_PATH=\"$(exec_prefix)/sbin/lvm\"
-
CFLOW_LIST = lvmcmdlib.c lvm2cmd.c
CFLOW_LIST_TARGET = liblvm2cmd.cflow
CFLOW_TARGET = lvm
@@ -133,6 +135,7 @@ dmsetup.static: dmsetup.o $(interfacebuilddir)/libdevmapper.a
all: device-mapper
CFLAGS_lvm.o += $(EXTRA_EXEC_CFLAGS)
+CFLAGS_lvmcmdline.o += $(VALGRIND_CFLAGS)
lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a
$(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \
@@ -165,7 +168,7 @@ liblvm2cmd.$(LIB_SUFFIX).$(LIB_VERSION): liblvm2cmd.$(LIB_SUFFIX)
.commands: $(srcdir)/commands.h $(srcdir)/cmdnames.h Makefile
$(CC) -E -P $(srcdir)/cmdnames.h 2> /dev/null | \
- egrep -v '^ *(|#.*|devtypes|dumpconfig|formats|help|pvdata|segtypes|tags|version) *$$' > .commands
+ egrep -v '^ *(|#.*|config|devtypes|dumpconfig|formats|help|lvpoll|pvdata|segtypes|systemid|tags|version) *$$' > .commands
ifneq ("$(CFLOW_CMD)", "")
CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
@@ -201,9 +204,11 @@ install_tools_static: lvm.static
install_dmsetup_dynamic: dmsetup
$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+ $(LN_S) -f $(<F) $(sbindir)/dmstats
install_dmsetup_static: dmsetup.static
$(INSTALL_PROGRAM) -D $< $(staticdir)/$(<F)
+ $(LN_S) -f $(<F) $(sbindir)/dmstats
install_device-mapper: $(INSTALL_DMSETUP_TARGETS)
diff --git a/tools/args.h b/tools/args.h
index 1abe0ebdc..6fa900f97 100644
--- a/tools/args.h
+++ b/tools/args.h
@@ -39,13 +39,23 @@ arg(deltag_ARG, '\0', "deltag", tag_arg, ARG_GROUPABLE)
arg(detachprofile_ARG, '\0', "detachprofile", NULL, 0)
arg(discards_ARG, '\0', "discards", discards_arg, 0)
arg(driverloaded_ARG, '\0', "driverloaded", yes_no_arg, 0)
+arg(duplicate_ARG, '\0', "duplicate", NULL, ARG_COUNTABLE)
+arg(unduplicate_ARG, '\0', "unduplicate", NULL, ARG_COUNTABLE)
+arg(errorwhenfull_ARG, '\0', "errorwhenfull", yes_no_arg, 0)
arg(force_long_ARG, '\0', "force", NULL, ARG_COUNTABLE)
+arg(foreign_ARG, '\0', "foreign", NULL, 0)
+arg(handlemissingpvs_ARG, '\0', "handlemissingpvs", NULL, 0)
arg(ignoreadvanced_ARG, '\0', "ignoreadvanced", NULL, 0)
+arg(ignorelocal_ARG, '\0', "ignorelocal", NULL, 0)
arg(ignorelockingfailure_ARG, '\0', "ignorelockingfailure", NULL, 0)
arg(ignoremonitoring_ARG, '\0', "ignoremonitoring", NULL, 0)
arg(ignoreskippedcluster_ARG, '\0', "ignoreskippedcluster", NULL, 0)
arg(ignoreunsupported_ARG, '\0', "ignoreunsupported", NULL, 0)
arg(labelsector_ARG, '\0', "labelsector", int_arg, 0)
+arg(lockopt_ARG, '\0', "lockopt", string_arg, 0)
+arg(lockstart_ARG, '\0', "lockstart", NULL, 0)
+arg(lockstop_ARG, '\0', "lockstop", NULL, 0)
+arg(locktype_ARG, '\0', "locktype", locktype_arg, 0)
arg(maxrecoveryrate_ARG, '\0', "maxrecoveryrate", size_kb_arg, 0)
arg(merge_ARG, '\0', "merge", NULL, 0)
arg(mergedconfig_ARG, '\0', "mergedconfig", NULL, 0)
@@ -69,6 +79,7 @@ arg(noudevsync_ARG, '\0', "noudevsync", NULL, 0)
arg(originname_ARG, '\0', "originname", string_arg, 0)
arg(physicalvolumesize_ARG, '\0', "setphysicalvolumesize", size_mb_arg, 0)
arg(poll_ARG, '\0', "poll", yes_no_arg, 0)
+arg(polloperation_ARG, '\0', "polloperation", string_arg, 0)
arg(pooldatasize_ARG, '\0', "pooldatasize", size_mb_arg, 0)
arg(poolmetadata_ARG, '\0', "poolmetadata", string_arg, 0)
arg(poolmetadatasize_ARG, '\0', "poolmetadatasize", size_mb_arg, 0)
@@ -91,13 +102,17 @@ arg(resync_ARG, '\0', "resync", NULL, 0)
arg(rows_ARG, '\0', "rows", NULL, 0)
arg(segments_ARG, '\0', "segments", NULL, 0)
arg(separator_ARG, '\0', "separator", string_arg, 0)
+arg(shared_ARG, '\0', "shared", NULL, 0)
arg(split_ARG, '\0', "split", NULL, 0)
arg(splitcache_ARG, '\0', "splitcache", NULL, 0)
arg(splitmirrors_ARG, '\0', "splitmirrors", int_arg, 0)
arg(splitsnapshot_ARG, '\0', "splitsnapshot", NULL, 0)
+arg(showdeprecated_ARG, '\0', "showdeprecated", NULL, 0)
+arg(showunsupported_ARG, '\0', "showunsupported", NULL, 0)
arg(stripes_long_ARG, '\0', "stripes", int_arg, 0)
arg(syncaction_ARG, '\0', "syncaction", string_arg, 0) /* FIXME Use custom validation fn */
arg(sysinit_ARG, '\0', "sysinit", NULL, 0)
+arg(systemid_ARG, '\0', "systemid", string_arg, 0)
arg(thinpool_ARG, '\0', "thinpool", string_arg, 0)
arg(trackchanges_ARG, '\0', "trackchanges", NULL, 0)
arg(trustcache_ARG, '\0', "trustcache", NULL, 0)
@@ -106,14 +121,17 @@ arg(unbuffered_ARG, '\0', "unbuffered", NULL, 0)
arg(uncache_ARG, '\0', "uncache", NULL, 0)
arg(cachepolicy_ARG, '\0', "cachepolicy", string_arg, 0)
arg(cachesettings_ARG, '\0', "cachesettings", string_arg, ARG_GROUPABLE)
+arg(unconfigured_ARG, '\0', "unconfigured", NULL, 0)
arg(units_ARG, '\0', "units", string_arg, 0)
arg(unquoted_ARG, '\0', "unquoted", NULL, 0)
-arg(use_policies_ARG, '\0', "use-policies", NULL, 0)
+arg(use_policies_ARG, '\0', "usepolicies", NULL, 0)
arg(validate_ARG, '\0', "validate", NULL, 0)
arg(version_ARG, '\0', "version", NULL, 0)
arg(vgmetadatacopies_ARG, '\0', "vgmetadatacopies", metadatacopies_arg, 0)
arg(virtualoriginsize_ARG, '\0', "virtualoriginsize", size_mb_arg, 0)
+arg(withsummary_ARG, '\0', "withsummary", NULL, 0)
arg(withcomments_ARG, '\0', "withcomments", NULL, 0)
+arg(withspaces_ARG, '\0', "withspaces", NULL, 0)
arg(withversions_ARG, '\0', "withversions", NULL, 0)
arg(writebehind_ARG, '\0', "writebehind", int_arg, 0)
arg(writemostly_ARG, '\0', "writemostly", string_arg, ARG_GROUPABLE)
diff --git a/tools/commands.h b/tools/commands.h
index 8b6b60716..e090409ec 100644
--- a/tools/commands.h
+++ b/tools/commands.h
@@ -28,9 +28,39 @@ xx(e2fsadm,
extents_ARG, size_ARG, nofsck_ARG, test_ARG)
*********/
+xx(config,
+ "Display and manipulate configuration information",
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
+ "config\n"
+ "\t[-f|--file filename]\n"
+ "\t[--type {current|default|diff|full|list|missing|new|profilable|profilable-command|profilable-metadata}\n"
+ "\t[--atversion version]]\n"
+ "\t[--ignoreadvanced]\n"
+ "\t[--ignoreunsupported]\n"
+ "\t[--ignorelocal]\n"
+ "\t[-l|--list]\n"
+ "\t[--config ConfigurationString]\n"
+ "\t[--commandprofile ProfileName]\n"
+ "\t[--profile ProfileName]\n"
+ "\t[--metadataprofile ProfileName]\n"
+ "\t[--mergedconfig]\n"
+ "\t[--showdeprecated]\n"
+ "\t[--showunsupported]\n"
+ "\t[--validate]\n"
+ "\t[--withsummary]\n"
+ "\t[--withcomments]\n"
+ "\t[--withspaces]\n"
+ "\t[--unconfigured]\n"
+ "\t[--withversions]\n"
+ "\t[ConfigurationNode...]\n",
+ atversion_ARG, configtype_ARG, file_ARG, ignoreadvanced_ARG,
+ ignoreunsupported_ARG, ignorelocal_ARG, list_ARG, mergedconfig_ARG, metadataprofile_ARG,
+ showdeprecated_ARG, showunsupported_ARG, validate_ARG, withsummary_ARG, withcomments_ARG,
+ withspaces_ARG, unconfigured_ARG, withversions_ARG)
+
xx(devtypes,
"Display recognised built-in block device types",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"devtypes\n"
"\t[--aligned]\n"
"\t[--binary]\n"
@@ -55,35 +85,43 @@ xx(devtypes,
unbuffered_ARG, unquoted_ARG)
xx(dumpconfig,
- "Dump configuration",
- PERMITTED_READ_ONLY,
+ "Display and manipulate configuration information",
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"dumpconfig\n"
"\t[-f|--file filename]\n"
- "\t[--type {current|default|diff|missing|new|profilable|profilable-command|profilable-metadata}\n"
+ "\t[--type {current|default|diff|full|list|missing|new|profilable|profilable-command|profilable-metadata}\n"
"\t[--atversion version]]\n"
"\t[--ignoreadvanced]\n"
"\t[--ignoreunsupported]\n"
+ "\t[--ignorelocal]\n"
+ "\t[-l|--list]\n"
"\t[--config ConfigurationString]\n"
"\t[--commandprofile ProfileName]\n"
"\t[--profile ProfileName]\n"
"\t[--metadataprofile ProfileName]\n"
"\t[--mergedconfig]\n"
+ "\t[--showdeprecated]\n"
+ "\t[--showunsupported]\n"
"\t[--validate]\n"
+ "\t[--withsummary]\n"
"\t[--withcomments]\n"
+ "\t[--withspaces]\n"
+ "\t[--unconfigured]\n"
"\t[--withversions]\n"
"\t[ConfigurationNode...]\n",
atversion_ARG, configtype_ARG, file_ARG, ignoreadvanced_ARG,
- ignoreunsupported_ARG, mergedconfig_ARG, metadataprofile_ARG,
- validate_ARG, withcomments_ARG, withversions_ARG)
+ ignoreunsupported_ARG, ignorelocal_ARG, list_ARG, mergedconfig_ARG, metadataprofile_ARG,
+ showdeprecated_ARG, showunsupported_ARG, validate_ARG, withsummary_ARG, withcomments_ARG,
+ withspaces_ARG, unconfigured_ARG, withversions_ARG)
xx(formats,
"List available metadata formats",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"formats\n")
xx(help,
"Display help for commands",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"help <command>\n")
/*********
@@ -111,6 +149,7 @@ xx(lvchange,
"\t[-d|--debug]\n"
"\t[--deltag Tag]\n"
"\t[--detachprofile]\n"
+ "\t[--errorwhenfull {y|n}]\n"
"\t[-f|--force]\n"
"\t[-h|--help]\n"
"\t[--discards {ignore|nopassdown|passdown}]\n"
@@ -134,6 +173,7 @@ xx(lvchange,
"\t[-r|--readahead ReadAheadSectors|auto|none]\n"
"\t[--refresh]\n"
"\t[--resync]\n"
+ "\t[-S|--select Selection]\n"
"\t[--sysinit]\n"
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
@@ -143,16 +183,16 @@ xx(lvchange,
"\tLogicalVolume[Path] [LogicalVolume[Path]...]\n",
activationmode_ARG, addtag_ARG, alloc_ARG, autobackup_ARG, activate_ARG,
- available_ARG,
- cachepolicy_ARG, cachesettings_ARG, contiguous_ARG, deltag_ARG, discards_ARG, detachprofile_ARG, force_ARG,
+ available_ARG, cachepolicy_ARG, cachesettings_ARG, contiguous_ARG, deltag_ARG,
+ discards_ARG, detachprofile_ARG, errorwhenfull_ARG, force_ARG,
ignorelockingfailure_ARG, ignoremonitoring_ARG, ignoreactivationskip_ARG,
ignoreskippedcluster_ARG, major_ARG, metadataprofile_ARG, minor_ARG,
monitor_ARG, minrecoveryrate_ARG, maxrecoveryrate_ARG, noudevsync_ARG,
partial_ARG, permission_ARG, persistent_ARG, poll_ARG,
raidminrecoveryrate_ARG, raidmaxrecoveryrate_ARG, raidsyncaction_ARG,
raidwritebehind_ARG, raidwritemostly_ARG, readahead_ARG, resync_ARG,
- refresh_ARG, setactivationskip_ARG, syncaction_ARG, sysinit_ARG, test_ARG,
- writebehind_ARG, writemostly_ARG, zero_ARG)
+ refresh_ARG, select_ARG, setactivationskip_ARG, syncaction_ARG, sysinit_ARG,
+ test_ARG, writebehind_ARG, writemostly_ARG, zero_ARG)
#define COMMON_OPTS \
"\t[--commandprofile ProfileName] [-d|--debug] [-h|-?|--help]\n" \
@@ -169,6 +209,7 @@ xx(lvconvert,
"\t[-R|--regionsize MirrorLogRegionSize]\n"
"\t[--alloc AllocationPolicy]\n"
"\t[-b|--background]\n"
+ "\t[--duplicate/{--unduplicate --name DuplicatedSubLogicalVolumeName}]\n"
"\t[-f|--force]\n"
"\t[-i|--interval seconds]\n"
"\t[--stripes Stripes [-I|--stripesize StripeSize]]\n"
@@ -235,6 +276,8 @@ xx(lvconvert,
"[--type cache[-pool]|-H|--cache]\n"
"\t[--cachepool CacheDataLogicalVolume[Path]]\n"
"\t[--cachemode CacheMode]\n"
+ "\t[--cachepolicy policy]\n"
+ "\t[--cachesettings key=value]\n"
"\t[--chunksize size]\n"
"\t[--poolmetadataspare {y|n}]]\n"
"\t[{--poolmetadata CacheMetadataLogicalVolume[Path] |\n"
@@ -242,8 +285,10 @@ xx(lvconvert,
COMMON_OPTS
"\t[Cache|CacheDataPool]LogicalVolume[Path] [PhysicalVolumePath...]\n\n",
- alloc_ARG, background_ARG, cache_ARG, cachemode_ARG, cachepool_ARG, chunksize_ARG,
- corelog_ARG, discards_ARG, force_ARG, interval_ARG, merge_ARG, mirrorlog_ARG,
+ alloc_ARG, background_ARG, cache_ARG, cachemode_ARG,
+ cachepool_ARG, cachepolicy_ARG, cachesettings_ARG, chunksize_ARG,
+ corelog_ARG, discards_ARG, duplicate_ARG, unduplicate_ARG,
+ force_ARG, interval_ARG, merge_ARG, mirrorlog_ARG,
mirrors_ARG, name_ARG, noudevsync_ARG, originname_ARG, poolmetadata_ARG,
poolmetadatasize_ARG, poolmetadataspare_ARG, readahead_ARG, regionsize_ARG,
repair_ARG, replace_ARG, snapshot_ARG,
@@ -261,12 +306,15 @@ xx(lvcreate,
"\t[--alloc AllocationPolicy]\n"
"\t[-H|--cache\n"
"\t [--cachemode {writeback|writethrough}]\n"
+ "\t [--cachepolicy policy]\n"
+ "\t [--cachesettings key=value]\n"
"\t[--cachepool CachePoolLogicalVolume{Name|Path}]\n"
"\t[-c|--chunksize ChunkSize]\n"
"\t[-C|--contiguous {y|n}]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
"\t[-h|-?|--help]\n"
+ "\t[--errorwhenfull {y|n}]\n"
"\t[--ignoremonitoring]\n"
"\t[--monitor {y|n}]\n"
"\t[-i|--stripes Stripes [-I|--stripesize StripeSize]]\n"
@@ -339,7 +387,7 @@ xx(lvcreate,
addtag_ARG, alloc_ARG, autobackup_ARG, activate_ARG, available_ARG,
cache_ARG, cachemode_ARG, cachepool_ARG, cachepolicy_ARG, cachesettings_ARG,
- chunksize_ARG, contiguous_ARG, corelog_ARG, discards_ARG,
+ chunksize_ARG, contiguous_ARG, corelog_ARG, discards_ARG, errorwhenfull_ARG,
extents_ARG, ignoreactivationskip_ARG, ignoremonitoring_ARG, major_ARG,
metadataprofile_ARG, minor_ARG, mirrorlog_ARG, mirrors_ARG, monitor_ARG,
minrecoveryrate_ARG, maxrecoveryrate_ARG, name_ARG, nosync_ARG,
@@ -353,12 +401,13 @@ xx(lvcreate,
xx(lvdisplay,
"Display information about a logical volume",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"lvdisplay\n"
"\t[-a|--all]\n"
"\t[-c|--colon]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -366,6 +415,7 @@ xx(lvdisplay,
"\t[--nosuffix]\n"
"\t[-P|--partial]\n"
"\t[--readonly]\n"
+ "\t[-S|--select Selection]\n"
"\t[--units hHbBsSkKmMgGtTpPeE]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
@@ -377,6 +427,7 @@ xx(lvdisplay,
"\t[--binary]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -395,10 +446,10 @@ xx(lvdisplay,
"\t[--version]\n"
"\t[LogicalVolume[Path] [LogicalVolume[Path]...]]\n",
- aligned_ARG, all_ARG, binary_ARG, colon_ARG, columns_ARG,
+ aligned_ARG, all_ARG, binary_ARG, colon_ARG, columns_ARG, foreign_ARG,
ignorelockingfailure_ARG, ignoreskippedcluster_ARG, maps_ARG,
noheadings_ARG, nosuffix_ARG, options_ARG, sort_ARG, partial_ARG,
- readonly_ARG, segments_ARG, select_ARG, separator_ARG,
+ readonly_ARG, segments_ARG, select_ARG, separator_ARG, shared_ARG,
unbuffered_ARG, units_ARG)
xx(lvextend,
@@ -445,6 +496,36 @@ xx(lvmchange,
reset_ARG)
+xx(lvmconfig,
+ "Display and manipulate configuration information",
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
+ "lvmconfig\n"
+ "\t[-f|--file filename]\n"
+ "\t[--type {current|default|diff|full|list|missing|new|profilable|profilable-command|profilable-metadata}\n"
+ "\t[--atversion version]]\n"
+ "\t[--ignoreadvanced]\n"
+ "\t[--ignoreunsupported]\n"
+ "\t[--ignorelocal]\n"
+ "\t[-l|--list]\n"
+ "\t[--config ConfigurationString]\n"
+ "\t[--commandprofile ProfileName]\n"
+ "\t[--profile ProfileName]\n"
+ "\t[--metadataprofile ProfileName]\n"
+ "\t[--mergedconfig]\n"
+ "\t[--showdeprecated]\n"
+ "\t[--showunsupported]\n"
+ "\t[--validate]\n"
+ "\t[--withsummary]\n"
+ "\t[--withcomments]\n"
+ "\t[--withspaces]\n"
+ "\t[--unconfigured]\n"
+ "\t[--withversions]\n"
+ "\t[ConfigurationNode...]\n",
+ atversion_ARG, configtype_ARG, file_ARG, ignoreadvanced_ARG,
+ ignoreunsupported_ARG, ignorelocal_ARG, list_ARG, mergedconfig_ARG, metadataprofile_ARG,
+ showdeprecated_ARG, showunsupported_ARG, validate_ARG, withsummary_ARG, withcomments_ARG,
+ withspaces_ARG, unconfigured_ARG, withversions_ARG)
+
xx(lvmdiskscan,
"List devices that may be used as physical volumes",
PERMITTED_READ_ONLY | ENABLE_ALL_DEVS,
@@ -509,7 +590,7 @@ xx(lvreduce,
xx(lvremove,
"Remove logical volume(s) from the system",
- 0,
+ ALL_VGS_IS_DEFAULT, /* all VGs only with --select */
"lvremove\n"
"\t[-A|--autobackup y|n]\n"
"\t[--commandprofile ProfileName]\n"
@@ -517,12 +598,13 @@ xx(lvremove,
"\t[-f|--force]\n"
"\t[-h|--help]\n"
"\t[--noudevsync]\n"
+ "\t[-S|--select Selection]\n"
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
"\tLogicalVolume[Path] [LogicalVolume[Path]...]\n",
- autobackup_ARG, force_ARG, noudevsync_ARG, test_ARG)
+ autobackup_ARG, force_ARG, noudevsync_ARG, select_ARG, test_ARG)
xx(lvrename,
"Rename a logical volume",
@@ -571,13 +653,14 @@ xx(lvresize,
xx(lvs,
"Display information about logical volumes",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"lvs\n"
"\t[-a|--all]\n"
"\t[--aligned]\n"
"\t[--binary]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -600,15 +683,15 @@ xx(lvs,
"\t[--version]\n"
"\t[LogicalVolume[Path] [LogicalVolume[Path]...]]\n",
- aligned_ARG, all_ARG, binary_ARG, ignorelockingfailure_ARG,
+ aligned_ARG, all_ARG, binary_ARG, foreign_ARG, ignorelockingfailure_ARG,
ignoreskippedcluster_ARG, nameprefixes_ARG, noheadings_ARG,
nolocking_ARG, nosuffix_ARG, options_ARG, partial_ARG,
- readonly_ARG, rows_ARG, segments_ARG, select_ARG, separator_ARG,
+ readonly_ARG, rows_ARG, segments_ARG, select_ARG, separator_ARG, shared_ARG,
sort_ARG, trustcache_ARG, unbuffered_ARG, units_ARG, unquoted_ARG)
xx(lvscan,
"List all logical volumes in all volume groups",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"lvscan\n"
"\t[-a|--all]\n"
"\t[-b|--blockdevice]\n"
@@ -635,10 +718,12 @@ xx(pvchange,
"\t[-d|--debug]\n"
"\t[-f|--force]\n"
"\t[-h|--help]\n"
+ "\t[--ignoreskippedcluster]\n"
+ "\t[--metadataignore y|n]\n"
+ "\t[-S|--select Selection]\n"
"\t[-t|--test]\n"
"\t[-u|--uuid]\n"
"\t[-x|--allocatable y|n]\n"
- "\t[--metadataignore y|n]\n"
"\t[-v|--verbose]\n"
"\t[--addtag Tag]\n"
"\t[--deltag Tag]\n"
@@ -646,7 +731,8 @@ xx(pvchange,
"\t[PhysicalVolumePath...]\n",
all_ARG, allocatable_ARG, allocation_ARG, autobackup_ARG, deltag_ARG,
- addtag_ARG, force_ARG, metadataignore_ARG, test_ARG, uuid_ARG)
+ addtag_ARG, force_ARG, ignoreskippedcluster_ARG, metadataignore_ARG,
+ select_ARG, test_ARG, uuid_ARG)
xx(pvresize,
"Resize physical volume(s)",
@@ -665,7 +751,7 @@ xx(pvresize,
xx(pvck,
"Check the consistency of physical volume(s)",
- 0,
+ LOCKD_VG_SH,
"pvck "
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
@@ -731,17 +817,19 @@ xx(pvdata,
xx(pvdisplay,
"Display various attributes of physical volume(s)",
- CACHE_VGMETADATA | PERMITTED_READ_ONLY | ENABLE_ALL_DEVS,
+ CACHE_VGMETADATA | PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | LOCKD_VG_SH,
"pvdisplay\n"
"\t[-c|--colon]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
"\t[-m|--maps]\n"
"\t[--nosuffix]\n"
"\t[--readonly]\n"
+ "\t[-S|--select Selection]\n"
"\t[-s|--short]\n"
"\t[--units hHbBsSkKmMgGtTpPeE]\n"
"\t[-v|--verbose]\n"
@@ -754,6 +842,7 @@ xx(pvdisplay,
"\t[--binary]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -770,10 +859,10 @@ xx(pvdisplay,
"\t[--version]\n"
"\t[PhysicalVolumePath [PhysicalVolumePath...]]\n",
- aligned_ARG, all_ARG, binary_ARG, colon_ARG, columns_ARG,
+ aligned_ARG, all_ARG, binary_ARG, colon_ARG, columns_ARG, foreign_ARG,
ignorelockingfailure_ARG, ignoreskippedcluster_ARG, maps_ARG,
noheadings_ARG, nosuffix_ARG, options_ARG, readonly_ARG,
- select_ARG, separator_ARG, short_ARG, sort_ARG, unbuffered_ARG,
+ select_ARG, separator_ARG, shared_ARG, short_ARG, sort_ARG, unbuffered_ARG,
units_ARG)
xx(pvmove,
@@ -801,6 +890,24 @@ xx(pvmove,
abort_ARG, alloc_ARG, atomic_ARG, autobackup_ARG, background_ARG,
interval_ARG, name_ARG, noudevsync_ARG, test_ARG)
+xx(lvpoll,
+ "Continue already initiated poll operation on a logical volume",
+ 0,
+ "\t[--abort]\n"
+ "\t[-A|--autobackup {y|n}]\n"
+ "\t[--commandprofile ProfileName]\n"
+ "\t[-d|--debug]\n "
+ "\t[-h|-?|--help]\n"
+ "\t[--handlemissingpvs]\n"
+ "\t[-i|--interval seconds]\n"
+ "\t[--polloperation]\n"
+ "\t[-t|--test]\n "
+ "\t[-v|--verbose]\n "
+ "\t[--version]\n",
+
+ abort_ARG, autobackup_ARG, handlemissingpvs_ARG, interval_ARG, polloperation_ARG,
+ test_ARG)
+
xx(pvremove,
"Remove LVM label(s) from physical volume(s)",
0,
@@ -819,13 +926,14 @@ xx(pvremove,
xx(pvs,
"Display information about physical volumes",
- CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS,
+ CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | LOCKD_VG_SH,
"pvs\n"
"\t[-a|--all]\n"
"\t[--aligned]\n"
"\t[--binary]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|-?|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -848,15 +956,15 @@ xx(pvs,
"\t[--version]\n"
"\t[PhysicalVolume [PhysicalVolume...]]\n",
- aligned_ARG, all_ARG, binary_ARG, ignorelockingfailure_ARG,
+ aligned_ARG, all_ARG, binary_ARG, foreign_ARG, ignorelockingfailure_ARG,
ignoreskippedcluster_ARG, nameprefixes_ARG, noheadings_ARG, nolocking_ARG,
nosuffix_ARG, options_ARG, partial_ARG, readonly_ARG, rows_ARG,
- segments_ARG, select_ARG, separator_ARG, sort_ARG, trustcache_ARG,
+ segments_ARG, select_ARG, separator_ARG, shared_ARG, sort_ARG, trustcache_ARG,
unbuffered_ARG, units_ARG, unquoted_ARG)
xx(pvscan,
"List all physical volumes",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | LOCKD_VG_SH,
"pvscan\n"
"\t[-b|--background]\n"
"\t[--cache [-a|--activate ay] [ DevicePath | -j|--major major --minor minor]...]\n"
@@ -878,21 +986,27 @@ xx(pvscan,
xx(segtypes,
"List available segment types",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"segtypes\n")
+xx(systemid,
+ "Display the system ID, if any, currently set on this host",
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
+ "systemid\n")
+
xx(tags,
"List tags defined on this host",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"tags\n")
xx(vgcfgbackup,
"Backup volume group configuration(s)",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"vgcfgbackup\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
"\t[-f|--file filename]\n"
+ "\t[--foreign]\n"
"\t[-h|-?|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[-P|--partial]\n"
@@ -901,7 +1015,7 @@ xx(vgcfgbackup,
"\t[--version]\n"
"\t[VolumeGroupName...]\n",
- file_ARG, ignorelockingfailure_ARG, partial_ARG, readonly_ARG)
+ file_ARG, foreign_ARG, ignorelockingfailure_ARG, partial_ARG, readonly_ARG)
xx(vgcfgrestore,
"Restore volume group configuration",
@@ -942,7 +1056,9 @@ xx(vgchange,
"\t[--poll {y|n}]\n"
"\t[--noudevsync]\n"
"\t[--refresh]\n"
+ "\t[-S|--select Selection]\n"
"\t[--sysinit]\n"
+ "\t[--systemid SystemID]\n"
"\t[-t|--test]\n"
"\t[-u|--uuid]\n"
"\t[-v|--verbose]\n"
@@ -959,17 +1075,18 @@ xx(vgchange,
"\t[VolumeGroupName...]\n",
activationmode_ARG, addtag_ARG, alloc_ARG, allocation_ARG, autobackup_ARG,
- activate_ARG,
- available_ARG, clustered_ARG, deltag_ARG, detachprofile_ARG,
+ activate_ARG, available_ARG, clustered_ARG, deltag_ARG, detachprofile_ARG,
ignoreactivationskip_ARG, ignorelockingfailure_ARG, ignoremonitoring_ARG,
ignoreskippedcluster_ARG, logicalvolume_ARG, maxphysicalvolumes_ARG,
metadataprofile_ARG, monitor_ARG, noudevsync_ARG, metadatacopies_ARG,
vgmetadatacopies_ARG, partial_ARG, physicalextentsize_ARG, poll_ARG,
- refresh_ARG, resizeable_ARG, resizable_ARG, sysinit_ARG, test_ARG, uuid_ARG)
+ refresh_ARG, resizeable_ARG, resizable_ARG, select_ARG, sysinit_ARG,
+ systemid_ARG, test_ARG, uuid_ARG, lockstart_ARG, lockstop_ARG, locktype_ARG, lockopt_ARG,
+ force_ARG)
xx(vgck,
"Check the consistency of volume group(s)",
- ALL_VGS_IS_DEFAULT,
+ ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"vgck "
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
@@ -1016,6 +1133,7 @@ xx(vgcreate,
"\t[--[vg]metadatacopies #copies]\n"
"\t[-p|--maxphysicalvolumes MaxPhysicalVolumes]\n"
"\t[-s|--physicalextentsize PhysicalExtentSize[bBsSkKmMgGtTpPeE]]\n"
+ "\t[--systemid SystemID]\n"
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
@@ -1027,22 +1145,25 @@ xx(vgcreate,
maxphysicalvolumes_ARG, metadataprofile_ARG, metadatatype_ARG,
physicalextentsize_ARG, test_ARG, force_ARG, zero_ARG, labelsector_ARG,
metadatasize_ARG, pvmetadatacopies_ARG, metadatacopies_ARG,
- vgmetadatacopies_ARG, dataalignment_ARG, dataalignmentoffset_ARG)
+ vgmetadatacopies_ARG, dataalignment_ARG, dataalignmentoffset_ARG,
+ shared_ARG, systemid_ARG, locktype_ARG, lockopt_ARG)
xx(vgdisplay,
"Display volume group information",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"vgdisplay\n"
"\t[-A|--activevolumegroups]\n"
"\t[-c|--colon | -s|--short | -v|--verbose]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
"\t[--nosuffix]\n"
"\t[-P|--partial]\n"
"\t[--readonly]\n"
+ "\t[-S|--select Selection]\n"
"\t[--units hHbBsSkKmMgGtTpPeE]\n"
"\t[--version]\n"
"\t[VolumeGroupName [VolumeGroupName...]]\n"
@@ -1052,6 +1173,7 @@ xx(vgdisplay,
"\t[--binary]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -1070,9 +1192,9 @@ xx(vgdisplay,
"\t[VolumeGroupName [VolumeGroupName...]]\n",
activevolumegroups_ARG, aligned_ARG, binary_ARG, colon_ARG, columns_ARG,
- ignorelockingfailure_ARG, ignoreskippedcluster_ARG, noheadings_ARG,
- nosuffix_ARG, options_ARG, partial_ARG, readonly_ARG, select_ARG,
- short_ARG, separator_ARG, sort_ARG, unbuffered_ARG, units_ARG)
+ foreign_ARG, ignorelockingfailure_ARG, ignoreskippedcluster_ARG,
+ noheadings_ARG, nosuffix_ARG, options_ARG, partial_ARG, readonly_ARG,
+ select_ARG, shared_ARG, short_ARG, separator_ARG, sort_ARG, unbuffered_ARG, units_ARG)
xx(vgexport,
"Unregister volume group(s) from the system",
@@ -1082,11 +1204,12 @@ xx(vgexport,
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
"\t[-h|--help]\n"
+ "\t[-S|--select Selection]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
"\tVolumeGroupName [VolumeGroupName...]\n",
- all_ARG, test_ARG)
+ all_ARG, select_ARG, test_ARG)
xx(vgextend,
"Add physical volumes to a volume group",
@@ -1120,12 +1243,13 @@ xx(vgimport,
"\t[-d|--debug]\n"
"\t[-f|--force]\n"
"\t[-h|--help]\n"
+ "\t[-S|--select Selection]\n"
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
"\tVolumeGroupName...\n",
- all_ARG, force_ARG, test_ARG)
+ all_ARG, force_ARG, select_ARG, test_ARG)
xx(vgmerge,
"Merge volume groups",
@@ -1181,19 +1305,20 @@ xx(vgreduce,
xx(vgremove,
"Remove volume group(s)",
- 0,
+ ALL_VGS_IS_DEFAULT, /* all VGs only with select */
"vgremove\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
"\t[-f|--force]\n"
"\t[-h|--help]\n"
"\t[--noudevsync]\n"
+ "\t[-S|--select Selection]\n"
"\t[-t|--test]\n"
"\t[-v|--verbose]\n"
"\t[--version]\n"
"\tVolumeGroupName [VolumeGroupName...]\n",
- force_ARG, noudevsync_ARG, test_ARG)
+ force_ARG, noudevsync_ARG, select_ARG, test_ARG)
xx(vgrename,
"Rename a volume group",
@@ -1213,13 +1338,14 @@ xx(vgrename,
xx(vgs,
"Display information about volume groups",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"vgs\n"
"\t[--aligned]\n"
"\t[--binary]\n"
"\t[-a|--all]\n"
"\t[--commandprofile ProfileName]\n"
"\t[-d|--debug]\n"
+ "\t[--foreign]\n"
"\t[-h|--help]\n"
"\t[--ignorelockingfailure]\n"
"\t[--ignoreskippedcluster]\n"
@@ -1241,15 +1367,15 @@ xx(vgs,
"\t[--version]\n"
"\t[VolumeGroupName [VolumeGroupName...]]\n",
- aligned_ARG, all_ARG, binary_ARG, ignorelockingfailure_ARG,
+ aligned_ARG, all_ARG, binary_ARG, foreign_ARG, ignorelockingfailure_ARG,
ignoreskippedcluster_ARG, nameprefixes_ARG, noheadings_ARG,
nolocking_ARG, nosuffix_ARG, options_ARG, partial_ARG,
- readonly_ARG, rows_ARG, select_ARG, separator_ARG, sort_ARG,
+ readonly_ARG, rows_ARG, select_ARG, separator_ARG, shared_ARG, sort_ARG,
trustcache_ARG, unbuffered_ARG, units_ARG, unquoted_ARG)
xx(vgscan,
"Search for all volume groups",
- PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT,
+ PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH,
"vgscan "
"\t[--cache]\n"
"\t[--commandprofile ProfileName]\n"
@@ -1290,5 +1416,5 @@ xx(vgsplit,
xx(version,
"Display software and driver version information",
- PERMITTED_READ_ONLY,
+ PERMITTED_READ_ONLY | NO_METADATA_PROCESSING,
"version\n")
diff --git a/tools/dmsetup.c b/tools/dmsetup.c
index 4202dbbb4..45d4173d6 100644
--- a/tools/dmsetup.c
+++ b/tools/dmsetup.c
@@ -15,23 +15,13 @@
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-
-#include "configure.h"
+#include "tool.h"
#include "dm-logging.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <ctype.h>
#include <dirent.h>
-#include <errno.h>
-#include <unistd.h>
#include <sys/wait.h>
-#include <unistd.h>
#include <sys/param.h>
#include <locale.h>
#include <langinfo.h>
@@ -58,7 +48,11 @@
# include <sys/ioctl.h>
#endif
-#if HAVE_TERMIOS_H
+#ifdef HAVE_SYS_TIMERFD_H
+# include <sys/timerfd.h>
+#endif
+
+#ifdef HAVE_TERMIOS_H
# include <termios.h>
#endif
@@ -77,10 +71,10 @@ extern char *optarg;
#ifndef TEMP_FAILURE_RETRY
# define TEMP_FAILURE_RETRY(expression) \
- (__extension__ \
- ({ long int __result; \
- do __result = (long int) (expression); \
- while (__result == -1L && errno == EINTR); \
+ (__extension__ \
+ ({ long int __result; \
+ do __result = (long int) (expression); \
+ while (__result == -1L && errno == EINTR); \
__result; }))
#endif
@@ -111,6 +105,50 @@ extern char *optarg;
#define err(msg, x...) fprintf(stderr, msg "\n", ##x)
+/* program_id used for dmstats-managed statistics regions */
+#define DM_STATS_PROGRAM_ID "dmstats"
+
+/*
+ * Basic commands this code implments.
+ */
+typedef enum {
+ DMSETUP_CMD = 0,
+ LOSETUP_CMD = 1,
+ DMLOSETUP_CMD = 2,
+ DMSTATS_CMD = 3,
+ DMSETUP_STATS_CMD = 4,
+ DEVMAP_NAME_CMD = 5
+} cmd_name_t;
+
+typedef enum {
+ DMSETUP_TYPE = 0,
+ LOSETUP_TYPE = 1,
+ STATS_TYPE = 2,
+ DEVMAP_NAME_TYPE = 3
+} cmd_type_t;
+
+#define DMSETUP_CMD_NAME "dmsetup"
+#define LOSETUP_CMD_NAME "losetup"
+#define DMLOSETUP_CMD_NAME "dmlosetup"
+#define DMSTATS_CMD_NAME "dmstats"
+#define DMSETUP_STATS_CMD_NAME "dmsetup stats"
+#define DEVMAP_NAME_CMD_NAME "devmap_name"
+
+static const struct {
+ cmd_name_t command;
+ const char name[14];
+ cmd_type_t type;
+} _base_commands[] = {
+ { DMSETUP_CMD, DMSETUP_CMD_NAME, DMSETUP_TYPE },
+ { LOSETUP_CMD, LOSETUP_CMD_NAME, LOSETUP_TYPE },
+ { DMLOSETUP_CMD, DMLOSETUP_CMD_NAME, LOSETUP_TYPE },
+ { DMSTATS_CMD, DMSTATS_CMD_NAME, STATS_TYPE },
+ { DMSETUP_STATS_CMD, DMSETUP_STATS_CMD_NAME, STATS_TYPE },
+ { DEVMAP_NAME_CMD, DEVMAP_NAME_CMD_NAME, DEVMAP_NAME_TYPE },
+};
+
+static const int _num_base_commands = DM_ARRAY_SIZE(_base_commands);
+
/*
* We have only very simple switches ATM.
*/
@@ -118,8 +156,16 @@ enum {
READ_ONLY = 0,
ADD_NODE_ON_CREATE_ARG,
ADD_NODE_ON_RESUME_ARG,
+ ALL_DEVICES_ARG,
+ ALL_PROGRAMS_ARG,
+ ALL_REGIONS_ARG,
+ AREAS_ARG,
+ AREA_SIZE_ARG,
+ AUX_DATA_ARG,
CHECKS_ARG,
+ CLEAR_ARG,
COLS_ARG,
+ COUNT_ARG,
DEFERRED_ARG,
SELECT_ARG,
EXEC_ARG,
@@ -127,6 +173,8 @@ enum {
GID_ARG,
HELP_ARG,
INACTIVE_ARG,
+ INTERVAL_ARG,
+ LENGTH_ARG,
MANGLENAME_ARG,
MAJOR_ARG,
MINOR_ARG,
@@ -136,23 +184,30 @@ enum {
NOHEADINGS_ARG,
NOLOCKFS_ARG,
NOOPENCOUNT_ARG,
+ NOSUFFIX_ARG,
NOTABLE_ARG,
UDEVCOOKIE_ARG,
NOUDEVRULES_ARG,
NOUDEVSYNC_ARG,
OPTIONS_ARG,
+ PROGRAM_ID_ARG,
+ RAW_ARG,
READAHEAD_ARG,
+ REGION_ID_ARG,
RETRY_ARG,
ROWS_ARG,
SEPARATOR_ARG,
SETUUID_ARG,
SHOWKEYS_ARG,
SORT_ARG,
+ START_ARG,
TABLE_ARG,
TARGET_ARG,
+ SEGMENTS_ARG,
TREE_ARG,
UID_ARG,
UNBUFFERED_ARG,
+ UNITS_ARG,
UNQUOTED_ARG,
UUID_ARG,
VERBOSE_ARG,
@@ -167,7 +222,9 @@ typedef enum {
DR_INFO = 2,
DR_DEPS = 4,
DR_TREE = 8, /* Complete dependency tree required */
- DR_NAME = 16
+ DR_NAME = 16,
+ DR_STATS = 32, /* Requires populated stats handle. */
+ DR_STATS_META = 64, /* Requires listed stats handle. */
} report_type_t;
typedef enum {
@@ -176,6 +233,8 @@ typedef enum {
DN_MAP /* Map name (for dm devices only, equal to DN_BLK otherwise) */
} dev_name_t;
+static cmd_name_t _base_command = DMSETUP_CMD; /* Default command is 'dmsetup' */
+static cmd_type_t _base_command_type = DMSETUP_TYPE;
static int _switches[NUM_SWITCHES];
static int _int_args[NUM_SWITCHES];
static char *_string_args[NUM_SWITCHES];
@@ -183,7 +242,8 @@ static int _num_devices;
static char *_uuid;
static char *_table;
static char *_target;
-static char *_command;
+static char *_command_to_exec; /* --exec <command> */
+static const char *_command; /* dmsetup <command> */
static uint32_t _read_ahead_flags;
static uint32_t _udev_cookie;
static int _udev_only;
@@ -191,13 +251,33 @@ static struct dm_tree *_dtree;
static struct dm_report *_report;
static report_type_t _report_type;
static dev_name_t _dev_name_type;
+static uint32_t _count = 1; /* count of repeating reports */
+static struct dm_timestamp *_initial_timestamp = NULL;
+static uint64_t _disp_factor = 512; /* display sizes in sectors */
+static char _disp_units = 's';
+const char *_program_id = DM_STATS_PROGRAM_ID; /* program_id used for reports. */
+static int _stats_report_by_areas = 1; /* output per-area info for stats reports. */
+
+/* report timekeeping */
+static struct dm_timestamp *_cycle_timestamp = NULL;
+static uint64_t _interval = 0; /* configured interval in nsecs */
+static uint64_t _new_interval = 0; /* flag top-of-interval */
+static uint64_t _last_interval = 0; /* approx. measured interval in nsecs */
+static int _timer_fd = -1; /* timerfd file descriptor. */
+
+/* Invalid fd value used to signal end-of-reporting. */
+#define TIMER_STOPPED -2
+
+#define NSEC_PER_USEC UINT64_C(1000)
+#define NSEC_PER_MSEC UINT64_C(1000000)
+#define NSEC_PER_SEC UINT64_C(1000000000)
/*
* Commands
*/
struct command;
-#define CMD_ARGS const struct command *cmd, int argc, char **argv, struct dm_names *names, int multiple_devices
+#define CMD_ARGS const struct command *cmd, const char *subcommand, int argc, char **argv, struct dm_names *names, int multiple_devices
typedef int (*command_fn) (CMD_ARGS);
struct command {
@@ -206,6 +286,7 @@ struct command {
int min_args;
int max_args;
int repeatable_cmd; /* Repeat to process device list? */
+ int has_subcommands; /* Command implements sub-commands. */
command_fn fn;
};
@@ -296,10 +377,10 @@ static int _parse_file(struct dm_task *dmt, const char *file)
}
struct dm_split_name {
- char *subsystem;
- char *vg_name;
- char *lv_name;
- char *lv_layer;
+ char *subsystem;
+ char *vg_name;
+ char *lv_name;
+ char *lv_layer;
};
struct dmsetup_report_obj {
@@ -308,8 +389,27 @@ struct dmsetup_report_obj {
struct dm_task *deps_task;
struct dm_tree_node *tree_node;
struct dm_split_name *split_name;
+ struct dm_stats *stats;
};
+static int _task_run(struct dm_task *dmt)
+{
+ int r;
+ uint64_t delta;
+
+ if (_initial_timestamp)
+ dm_task_set_record_timestamp(dmt);
+
+ r = dm_task_run(dmt);
+
+ if (_initial_timestamp) {
+ delta = dm_timestamp_delta(dm_task_get_ioctl_timestamp(dmt), _initial_timestamp);
+ log_debug("Timestamp: %7" PRIu64 ".%09" PRIu64 " seconds", delta / NSEC_PER_SEC, delta % NSEC_PER_SEC);
+ }
+
+ return r;
+}
+
static struct dm_task *_get_deps_task(int major, int minor)
{
struct dm_task *dmt;
@@ -331,7 +431,7 @@ static struct dm_task *_get_deps_task(int major, int minor)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto err;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto err;
if (!dm_task_get_info(dmt, &info))
@@ -392,7 +492,7 @@ static struct dm_split_name *_get_split_name(const char *uuid, const char *name,
if (!strcmp(split_name->subsystem, "LVM") &&
(!(split_name->vg_name = dm_strdup(name)) ||
!dm_split_lvm_name(NULL, NULL, &split_name->vg_name,
- &split_name->lv_name, &split_name->lv_layer)))
+ &split_name->lv_name, &split_name->lv_layer)))
log_error("Failed to allocate memory to split LVM name "
"into components.");
@@ -412,9 +512,286 @@ static void _destroy_split_name(struct dm_split_name *split_name)
dm_free(split_name);
}
+/*
+ * Stats clock:
+ *
+ * Use either Linux timerfds or usleep to implement the reporting
+ * interval wait.
+ *
+ * _start_timer() - Start the timer running.
+ * _do_timer_wait() - Wait until the beginning of the next interval.
+ *
+ * _update_interval_times() - Update timestamps and interval estimate.
+ */
+
+/*
+ * Return the current interval number counting upwards from one.
+ */
+static uint64_t _interval_num(void)
+{
+ return 1 + (uint64_t) _int_args[COUNT_ARG] - _count;
+}
+
+#ifdef HAVE_SYS_TIMERFD_H
+static int _start_timerfd_timer(void)
+{
+ struct itimerspec interval_timer;
+ time_t secs;
+ long nsecs;
+
+ log_debug("Using timerfd for interval timekeeping.");
+
+ /* timer running? */
+ if (_timer_fd != -1)
+ return 1;
+
+ memset(&interval_timer, 0, sizeof(interval_timer));
+
+ /* Use CLOCK_MONOTONIC to avoid warp on RTC adjustments. */
+ if ((_timer_fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC)) < 0) {
+ log_error("Could not create timer: %s", strerror(errno));
+ return 0;
+ }
+
+ secs = (time_t) _interval / NSEC_PER_SEC;
+ nsecs = (long) _interval % NSEC_PER_SEC;
+
+ /* Must set interval and value to create an armed periodic timer. */
+ interval_timer.it_interval.tv_sec = secs;
+ interval_timer.it_interval.tv_nsec = nsecs;
+ interval_timer.it_value.tv_sec = secs;
+ interval_timer.it_value.tv_nsec = nsecs;
+
+ log_debug("Setting interval timer to: " FMTu64 "s %ldns", (uint64_t)secs, nsecs);
+ if (timerfd_settime(_timer_fd, 0, &interval_timer, NULL)) {
+ log_error("Could not set interval timer: %s", strerror(errno));
+ return 0;
+ }
+ return 1;
+}
+
+static int _do_timerfd_wait(void)
+{
+ uint64_t expired;
+ ssize_t bytes;
+
+ if (_timer_fd < 0)
+ return 0;
+
+ /* read on timerfd returns a uint64_t in host byte order. */
+ bytes = read(_timer_fd, &expired, sizeof(expired));
+
+ if (bytes < 0) {
+ /* EBADF from invalid timerfd or EINVAL from too small buffer. */
+ log_error("Interval timer wait failed: %s",
+ strerror(errno));
+ return 0;
+ }
+
+ /* read(2) on a timerfd descriptor is guaranteed to return 8 bytes. */
+ if (bytes != 8)
+ log_error("Unexpected byte count on timerfd read: " FMTssize_t, bytes);
+
+ /* FIXME: attempt to rebase clock? */
+ if (expired > 1)
+ log_warn("WARNING: Try increasing --interval ("FMTu64
+ " missed timer events).", expired - 1);
+
+ /* Signal that a new interval has begun. */
+ _new_interval = 1;
+
+ /* Final interval? */
+ if (_count == 2) {
+ if (close(_timer_fd))
+ stack;
+ /* Tell _update_interval_times() to shut down. */
+ _timer_fd = TIMER_STOPPED;
+ }
+
+ return 1;
+}
+
+static int _start_timer(void)
+{
+ return _start_timerfd_timer();
+}
+
+static int _do_timer_wait(void)
+{
+ return _do_timerfd_wait();
+}
+
+#else /* !HAVE_SYS_TIMERFD_H */
+static int _start_usleep_timer(void)
+{
+ log_debug("Using usleep for interval timekeeping.");
+ return 1;
+}
+
+static int _do_usleep_wait(void)
+{
+ static struct dm_timestamp *_last_sleep, *_now = NULL;
+ uint64_t this_interval;
+ int64_t delta_t;
+
+ /*
+ * Report clock: compensate for time spent in userspace and stats
+ * message ioctls by keeping track of the last wake time and
+ * adjusting the sleep interval accordingly.
+ */
+ if (!_last_sleep && !_now) {
+ if (!(_last_sleep = dm_timestamp_alloc()))
+ goto_out;
+ if (!(_now = dm_timestamp_alloc()))
+ goto_out;
+ dm_timestamp_get(_now);
+ this_interval = _interval;
+ log_error("Using "FMTu64" as first interval.", this_interval);
+ } else {
+ dm_timestamp_get(_now);
+ delta_t = dm_timestamp_delta(_now, _last_sleep);
+ log_debug("Interval timer delta_t: "FMTi64, delta_t);
+
+ /* FIXME: usleep timer drift over large counts. */
+
+ /* adjust for time spent populating and reporting */
+ this_interval = 2 * _interval - delta_t;
+ log_debug("Using "FMTu64" as interval.", this_interval);
+ }
+
+ /* Signal that a new interval has begun. */
+ _new_interval = 1;
+ dm_timestamp_copy(_last_sleep, _now);
+
+ if (usleep(this_interval / NSEC_PER_USEC)) {
+ if (errno == EINTR)
+ log_error("Report interval interrupted by signal.");
+ if (errno == EINVAL)
+ log_error("Report interval too short.");
+ goto out;
+ }
+
+ if(_count == 2) {
+ dm_timestamp_destroy(_last_sleep);
+ dm_timestamp_destroy(_now);
+ }
+
+ return 1;
+out:
+ return 0;
+}
+
+static int _start_timer(void)
+{
+ return _start_usleep_timer();
+}
+
+static int _do_timer_wait(void)
+{
+ return _do_usleep_wait();
+}
+
+#endif /* HAVE_SYS_TIMERFD_H */
+
+static int _update_interval_times(void)
+{
+ static struct dm_timestamp *this_timestamp = NULL;
+ uint64_t delta_t, interval_num = _interval_num();
+ int r = 0;
+
+ /*
+ * Clock shutdown for exit - nothing to do.
+ */
+ if (_timer_fd == TIMER_STOPPED && !_cycle_timestamp)
+ return 1;
+
+ /*
+ * Current timestamp. If _new_interval is set this is used as
+ * the new cycle start timestamp.
+ */
+ if (!this_timestamp) {
+ if (!(this_timestamp = dm_timestamp_alloc()))
+ return_0;
+ }
+
+ /*
+ * Take cycle timstamp as close as possible to ioctl return.
+ *
+ * FIXME: use per-region timestamp deltas for interval estimate.
+ */
+ if (!dm_timestamp_get(this_timestamp))
+ goto_out;
+
+ /*
+ * Stats clock: maintain a single timestamp taken just after the
+ * call to dm_stats_populate() and take a delta between the current
+ * and last value to determine the sampling interval.
+ *
+ * A new interval is started when the _new_interval flag is set
+ * on return from _do_report_wait().
+ *
+ * The first interval is treated as a special case: since the
+ * time since the last clear of the counters is unknown (no
+ * previous timestamp exists) the duration is assumed to be the
+ * configured value.
+ */
+ if (_cycle_timestamp)
+ /* Current delta_t: time from start of cycle to now. */
+ delta_t = dm_timestamp_delta(this_timestamp, _cycle_timestamp);
+ else {
+ _cycle_timestamp = dm_timestamp_alloc();
+ if (!_cycle_timestamp) {
+ log_error("Could not allocate timestamp object.");
+ goto out;
+ }
+
+ /* Pretend we have the configured interval. */
+ delta_t = _interval;
+
+ /* start the first cycle */
+ log_debug("Beginning first interval");
+ _new_interval = 1;
+ }
+
+ log_debug("Interval #%-4"PRIu64" time delta: %12"
+ PRIu64"ns", interval_num, delta_t);
+
+ if (_new_interval) {
+ /* Update timestamp and interval and clear _new_interval */
+ dm_timestamp_copy(_cycle_timestamp, this_timestamp);
+ _last_interval = delta_t;
+ _new_interval = 0;
+
+ /*
+ * Log interval duration and current error.
+ */
+ log_debug("Interval #%-5"PRIu64" current err: %12"PRIi64"ns",
+ interval_num, ((int64_t)_last_interval - (int64_t)_interval));
+ log_debug("End interval #%-9"PRIu64" duration: %12"PRIu64"ns",
+ interval_num, _last_interval);
+ }
+
+ r = 1;
+
+out:
+ if (!r || _timer_fd == TIMER_STOPPED) {
+ /* The _cycle_timestamp has not yet been allocated if we
+ * fail to obtain this_timestamp on the first interval.
+ */
+ if (_cycle_timestamp)
+ dm_timestamp_destroy(_cycle_timestamp);
+ dm_timestamp_destroy(this_timestamp);
+
+ /* Clear timestamp pointers to signal shutdown. */
+ _cycle_timestamp = this_timestamp = NULL;
+ }
+ return r;
+}
+
static int _display_info_cols(struct dm_task *dmt, struct dm_info *info)
{
struct dmsetup_report_obj obj;
+
int r = 0;
if (!info->exists) {
@@ -426,6 +803,7 @@ static int _display_info_cols(struct dm_task *dmt, struct dm_info *info)
obj.info = info;
obj.deps_task = NULL;
obj.split_name = NULL;
+ obj.stats = NULL;
if (_report_type & DR_TREE)
if (!(obj.tree_node = dm_tree_find_node(_dtree, info->major, info->minor))) {
@@ -444,9 +822,56 @@ static int _display_info_cols(struct dm_task *dmt, struct dm_info *info)
dm_task_get_name(dmt), '-')))
goto_out;
- if (!dm_report_object(_report, &obj))
- goto_out;
+ /*
+ * Obtain statistics for the current reporting object and set
+ * the interval estimate used for stats rate conversion.
+ */
+ if (_report_type & DR_STATS) {
+ if (!(obj.stats = dm_stats_create(DM_STATS_PROGRAM_ID)))
+ goto_out;
+
+ dm_stats_bind_devno(obj.stats, info->major, info->minor);
+
+ if (!dm_stats_populate(obj.stats, _program_id, DM_STATS_REGIONS_ALL))
+ goto out;
+
+ /* Update timestamps and handle end-of-interval accounting. */
+ _update_interval_times();
+
+ log_debug("Adjusted sample interval duration: %12"PRIu64"ns", _last_interval);
+ /* use measured approximation for calculations */
+ dm_stats_set_sampling_interval_ns(obj.stats, _last_interval);
+ }
+
+ /* Only a dm_stats_list is needed for DR_STATS_META reports. */
+ if (!obj.stats && (_report_type & DR_STATS_META)) {
+ if (!(obj.stats = dm_stats_create(DM_STATS_PROGRAM_ID)))
+ goto_out;
+
+ dm_stats_bind_devno(obj.stats, info->major, info->minor);
+
+ if (!dm_stats_list(obj.stats, _program_id))
+ goto out;
+
+ /* No regions to report */
+ if (!dm_stats_get_nr_regions(obj.stats))
+ goto out;
+ }
+ /*
+ * Walk any statistics regions contained in the current
+ * reporting object: for objects with a NULL stats handle,
+ * or a handle containing no registered regions, this loop
+ * always executes exactly once.
+ */
+ dm_stats_walk_do(obj.stats) {
+ if (!dm_report_object(_report, &obj))
+ goto_out;
+ if (_stats_report_by_areas)
+ dm_stats_walk_next(obj.stats);
+ else
+ dm_stats_walk_next_region(obj.stats);
+ } dm_stats_walk_while(obj.stats);
r = 1;
out:
@@ -454,6 +879,8 @@ static int _display_info_cols(struct dm_task *dmt, struct dm_info *info)
dm_task_destroy(obj.deps_task);
if (obj.split_name)
_destroy_split_name(obj.split_name);
+ if (obj.stats)
+ dm_stats_destroy(obj.stats);
return r;
}
@@ -566,20 +993,20 @@ static int _load(CMD_ARGS)
}
if (!_switches[UUID_ARG] && !_switches[MAJOR_ARG]) {
- if (argc == 1) {
+ if (!argc) {
err("Please specify device.\n");
return 0;
}
- name = argv[1];
+ name = argv[0];
argc--;
argv++;
- } else if (argc > 2) {
+ } else if (argc > 1) {
err("Too many command line arguments.\n");
return 0;
}
- if (argc == 2)
- file = argv[1];
+ if (argc == 1)
+ file = argv[0];
if (!(dmt = dm_task_create(DM_DEVICE_RELOAD)))
return 0;
@@ -602,7 +1029,7 @@ static int _load(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
r = 1;
@@ -624,13 +1051,13 @@ static int _create(CMD_ARGS)
uint32_t cookie = 0;
uint16_t udev_flags = 0;
- if (argc == 3)
- file = argv[2];
+ if (argc == 2)
+ file = argv[1];
if (!(dmt = dm_task_create(DM_DEVICE_CREATE)))
return 0;
- if (!dm_task_set_name(dmt, argv[1]))
+ if (!dm_task_set_name(dmt, argv[0]))
goto out;
if (_switches[UUID_ARG] && !dm_task_set_uuid(dmt, _uuid))
@@ -679,7 +1106,7 @@ static int _create(CMD_ARGS)
goto out;
if (!_set_task_add_node(dmt))
- goto out;
+ goto out;
if (_udev_cookie)
cookie = _udev_cookie;
@@ -688,7 +1115,7 @@ static int _create(CMD_ARGS)
udev_flags |= DM_UDEV_DISABLE_LIBRARY_FALLBACK;
if (!dm_task_set_cookie(dmt, &cookie, udev_flags) ||
- !dm_task_run(dmt))
+ !_task_run(dmt))
goto out;
r = 1;
@@ -744,7 +1171,7 @@ static int _do_rename(const char *name, const char *new_name, const char *new_uu
udev_flags |= DM_UDEV_DISABLE_LIBRARY_FALLBACK;
if (!dm_task_set_cookie(dmt, &cookie, udev_flags) ||
- !dm_task_run(dmt))
+ !_task_run(dmt))
goto out;
r = 1;
@@ -760,7 +1187,7 @@ static int _do_rename(const char *name, const char *new_name, const char *new_uu
static int _rename(CMD_ARGS)
{
- const char *name = (argc == 3) ? argv[1] : NULL;
+ const char *name = (argc == 2) ? argv[0] : NULL;
return _switches[SETUUID_ARG] ? _do_rename(name, NULL, argv[argc - 1]) :
_do_rename(name, argv[argc - 1], NULL);
@@ -784,22 +1211,22 @@ static int _message(CMD_ARGS)
if (!_set_task_device(dmt, NULL, 0))
goto out;
} else {
- if (!_set_task_device(dmt, argv[1], 0))
+ if (!_set_task_device(dmt, argv[0], 0))
goto out;
argc--;
argv++;
}
- sector = strtoull(argv[1], &endptr, 10);
- if (*endptr || endptr == argv[1]) {
+ sector = strtoull(argv[0], &endptr, 10);
+ if (*endptr || endptr == argv[0]) {
err("invalid sector");
goto out;
}
if (!dm_task_set_sector(dmt, sector))
goto out;
- argc -= 2;
- argv += 2;
+ argc--;
+ argv++;
if (argc <= 0)
err("No message supplied.\n");
@@ -834,7 +1261,7 @@ static int _message(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
if ((response = dm_task_get_message_response(dmt))) {
@@ -864,13 +1291,13 @@ static int _setgeometry(CMD_ARGS)
if (!_set_task_device(dmt, NULL, 0))
goto out;
} else {
- if (!_set_task_device(dmt, argv[1], 0))
+ if (!_set_task_device(dmt, argv[0], 0))
goto out;
argc--;
argv++;
}
- if (!dm_task_set_geometry(dmt, argv[1], argv[2], argv[3], argv[4]))
+ if (!dm_task_set_geometry(dmt, argv[0], argv[1], argv[2], argv[3]))
goto out;
if (_switches[NOOPENCOUNT_ARG] && !dm_task_no_open_count(dmt))
@@ -883,7 +1310,7 @@ static int _setgeometry(CMD_ARGS)
goto out;
/* run the task */
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
r = 1;
@@ -899,9 +1326,9 @@ static int _splitname(CMD_ARGS)
struct dmsetup_report_obj obj = { NULL };
int r;
- if (!(obj.split_name = _get_split_name((argc == 3) ? argv[2] : "LVM",
- argv[1], '\0')))
- return_0;
+ if (!(obj.split_name = _get_split_name((argc == 2) ? argv[1] : "LVM",
+ argv[0], '\0')))
+ return_0;
r = dm_report_object(_report, &obj);
_destroy_split_name(obj.split_name);
@@ -914,6 +1341,7 @@ static uint32_t _get_cookie_value(const char *str_value)
unsigned long int value;
char *p;
+ errno = 0;
if (!(value = strtoul(str_value, &p, 0)) ||
*p ||
(value == ULONG_MAX && errno == ERANGE) ||
@@ -939,7 +1367,7 @@ static int _udevflags(CMD_ARGS)
"PRIMARY_SOURCE",
0};
- if (!(cookie = _get_cookie_value(argv[1])))
+ if (!(cookie = _get_cookie_value(argv[0])))
return 0;
flags = cookie >> DM_UDEV_FLAGS_SHIFT;
@@ -971,7 +1399,7 @@ static int _udevcomplete(CMD_ARGS)
{
uint32_t cookie;
- if (!(cookie = _get_cookie_value(argv[1])))
+ if (!(cookie = _get_cookie_value(argv[0])))
return 0;
/*
@@ -1096,7 +1524,7 @@ static int _udevcreatecookie(CMD_ARGS)
static int _udevreleasecookie(CMD_ARGS)
{
- if (argv[1] && !(_udev_cookie = _get_cookie_value(argv[1])))
+ if (argv[0] && !(_udev_cookie = _get_cookie_value(argv[0])))
return 0;
if (!_udev_cookie) {
@@ -1146,7 +1574,7 @@ static int _udevcomplete_all(CMD_ARGS)
unsigned age = 0;
time_t t;
- if (argc == 2 && (sscanf(argv[1], "%u", &age) != 1)) {
+ if (argc == 1 && (sscanf(argv[0], "%u", &age) != 1)) {
log_error("Failed to read age_in_minutes parameter.");
return 0;
}
@@ -1185,7 +1613,7 @@ static int _udevcomplete_all(CMD_ARGS)
if (semctl(sid, 0, IPC_RMID, 0) < 0) {
log_error("Could not cleanup notification semaphore "
"with semid %d and cookie value "
- "%" PRIu32 " (0x%" PRIx32 ")", sid,
+ FMTu32 " (0x" FMTx32 ")", sid,
sdata.sem_perm.__key, sdata.sem_perm.__key);
continue;
}
@@ -1195,7 +1623,7 @@ static int _udevcomplete_all(CMD_ARGS)
}
log_print("%d semaphores with keys prefixed by "
- "%" PRIu16 " (0x%" PRIx16 ") destroyed. %d skipped.",
+ FMTu16 " (0x" FMTx16 ") destroyed. %d skipped.",
counter, DM_COOKIE_MAGIC, DM_COOKIE_MAGIC, skipped);
return 1;
@@ -1257,6 +1685,12 @@ static int _version(CMD_ARGS)
printf("Driver version: %s\n", version);
+ /* don't output column headings for 'dmstats version'. */
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
return 1;
}
@@ -1296,7 +1730,7 @@ static int _simple(int task, const char *name, uint32_t event_nr, int display)
/* FIXME: needs to coperate with udev */
if (!_set_task_add_node(dmt))
- goto out;
+ goto out;
if (_switches[READAHEAD_ARG] &&
!dm_task_set_read_ahead(dmt, _int_args[READAHEAD_ARG],
@@ -1322,7 +1756,7 @@ static int _simple(int task, const char *name, uint32_t event_nr, int display)
if (_switches[DEFERRED_ARG] && (task == DM_DEVICE_REMOVE || task == DM_DEVICE_REMOVE_ALL))
dm_task_deferred_remove(dmt);
- r = dm_task_run(dmt);
+ r = _task_run(dmt);
out:
if (!_udev_cookie && udev_wait_flag)
@@ -1338,17 +1772,17 @@ static int _simple(int task, const char *name, uint32_t event_nr, int display)
static int _suspend(CMD_ARGS)
{
- return _simple(DM_DEVICE_SUSPEND, argc > 1 ? argv[1] : NULL, 0, 1);
+ return _simple(DM_DEVICE_SUSPEND, argc ? argv[0] : NULL, 0, 1);
}
static int _resume(CMD_ARGS)
{
- return _simple(DM_DEVICE_RESUME, argc > 1 ? argv[1] : NULL, 0, 1);
+ return _simple(DM_DEVICE_RESUME, argc ? argv[0] : NULL, 0, 1);
}
static int _clear(CMD_ARGS)
{
- return _simple(DM_DEVICE_CLEAR, argc > 1 ? argv[1] : NULL, 0, 1);
+ return _simple(DM_DEVICE_CLEAR, argc ? argv[0] : NULL, 0, 1);
}
static int _wait(CMD_ARGS)
@@ -1356,19 +1790,19 @@ static int _wait(CMD_ARGS)
const char *name = NULL;
if (!_switches[UUID_ARG] && !_switches[MAJOR_ARG]) {
- if (argc == 1) {
+ if (!argc) {
err("No device specified.");
return 0;
}
- name = argv[1];
+ name = argv[0];
argc--, argv++;
}
return _simple(DM_DEVICE_WAITEVENT, name,
- (argc > 1) ? (uint32_t) atoi(argv[argc - 1]) : 0, 1);
+ (argc) ? (uint32_t) atoi(argv[argc - 1]) : 0, 1);
}
-static int _process_all(const struct command *cmd, int argc, char **argv, int silent,
+static int _process_all(const struct command *cmd, const char *subcommand, int argc, char **argv, int silent,
int (*fn) (CMD_ARGS))
{
int r = 1;
@@ -1383,7 +1817,7 @@ static int _process_all(const struct command *cmd, int argc, char **argv, int si
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt)) {
+ if (!_task_run(dmt)) {
r = 0;
goto out;
}
@@ -1401,7 +1835,7 @@ static int _process_all(const struct command *cmd, int argc, char **argv, int si
do {
names = (struct dm_names *)((char *) names + next);
- if (!fn(cmd, argc, argv, names, 1))
+ if (!fn(cmd, subcommand, argc, argv, names, 1))
r = 0;
next = names->next;
} while (next);
@@ -1434,7 +1868,7 @@ static uint64_t _get_device_size(const char *name)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
if (!dm_task_get_info(dmt, &info) || !info.exists)
@@ -1458,7 +1892,7 @@ static int _error_device(CMD_ARGS)
uint64_t size;
int r = 0;
- name = names ? names->name : argv[1];
+ name = names ? names->name : argv[0];
size = _get_device_size(name);
@@ -1483,7 +1917,7 @@ static int _error_device(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto error;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto error;
if (!_simple(DM_DEVICE_RESUME, name, 0, 0)) {
@@ -1500,7 +1934,7 @@ error:
static int _remove(CMD_ARGS)
{
- if (_switches[FORCE_ARG] && argc > 1) {
+ if (_switches[FORCE_ARG] && argc) {
/*
* 'remove --force' option is doing 2 operations on the same device
* this is not compatible with the use of --udevcookie/DM_UDEV_COOKIE.
@@ -1508,10 +1942,10 @@ static int _remove(CMD_ARGS)
*/
if (_udev_cookie)
log_warn("WARNING: Use of cookie and --force is not compatible.");
- (void) _error_device(cmd, argc, argv, NULL, 0);
+ (void) _error_device(cmd, NULL, argc, argv, NULL, 0);
}
- return _simple(DM_DEVICE_REMOVE, argc > 1 ? argv[1] : NULL, 0, 0);
+ return _simple(DM_DEVICE_REMOVE, argc ? argv[0] : NULL, 0, 0);
}
static int _count_devices(CMD_ARGS)
@@ -1532,17 +1966,17 @@ static int _remove_all(CMD_ARGS)
return r;
_num_devices = 0;
- r |= _process_all(cmd, argc, argv, 1, _count_devices);
+ r |= _process_all(cmd, NULL, argc, argv, 1, _count_devices);
/* No devices left? */
if (!_num_devices)
return r;
- r |= _process_all(cmd, argc, argv, 1, _error_device);
+ r |= _process_all(cmd, NULL, argc, argv, 1, _error_device);
r |= _simple(DM_DEVICE_REMOVE_ALL, "", 0, 0) | dm_mknodes(NULL);
_num_devices = 0;
- r |= _process_all(cmd, argc, argv, 1, _count_devices);
+ r |= _process_all(cmd, NULL, argc, argv, 1, _count_devices);
if (!_num_devices)
return r;
@@ -1561,7 +1995,7 @@ static void _display_dev(struct dm_task *dmt, const char *name)
static int _mknodes(CMD_ARGS)
{
- return dm_mknodes(argc > 1 ? argv[1] : NULL);
+ return dm_mknodes(argc ? argv[0] : NULL);
}
static int _exec_command(const char *name)
@@ -1584,7 +2018,7 @@ static int _exec_command(const char *name)
return 0;
if (!argc) {
- c = _command;
+ c = _command_to_exec;
while (argc < ARGS_MAX) {
while (*c && isspace(*c))
c++;
@@ -1640,9 +2074,9 @@ static int _status(CMD_ARGS)
if (names)
name = names->name;
else {
- if (argc == 1 && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
- return _process_all(cmd, argc, argv, 0, _status);
- name = argv[1];
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, NULL, argc, argv, 0, _status);
+ name = argv[0];
}
if (!strcmp(cmd->name, "table"))
@@ -1671,7 +2105,7 @@ static int _status(CMD_ARGS)
if (_switches[NOFLUSH_ARG] && !dm_task_no_flush(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
if (!dm_task_get_info(dmt, &info) || !info.exists)
@@ -1689,11 +2123,11 @@ static int _status(CMD_ARGS)
(!target_type || strcmp(target_type, _target)))
continue;
if (ls_only) {
- if (!_switches[EXEC_ARG] || !_command ||
+ if (!_switches[EXEC_ARG] || !_command_to_exec ||
_switches[VERBOSE_ARG])
_display_dev(dmt, name);
next = NULL;
- } else if (!_switches[EXEC_ARG] || !_command ||
+ } else if (!_switches[EXEC_ARG] || !_command_to_exec ||
_switches[VERBOSE_ARG]) {
if (!matched && _switches[VERBOSE_ARG])
_display_info(dmt);
@@ -1712,7 +2146,7 @@ static int _status(CMD_ARGS)
while (*c && *c != ' ')
*c++ = '0';
}
- printf("%" PRIu64 " %" PRIu64 " %s %s",
+ printf(FMTu64 " " FMTu64 " %s %s",
start, length, target_type, params);
}
printf("\n");
@@ -1723,7 +2157,7 @@ static int _status(CMD_ARGS)
if (multiple_devices && _switches[VERBOSE_ARG] && matched && !ls_only)
printf("\n");
- if (matched && _switches[EXEC_ARG] && _command && !_exec_command(name))
+ if (matched && _switches[EXEC_ARG] && _command_to_exec && !_exec_command(name))
goto out;
r = 1;
@@ -1747,7 +2181,7 @@ static int _targets(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
target = dm_task_get_versions(dmt);
@@ -1779,9 +2213,9 @@ static int _info(CMD_ARGS)
if (names)
name = names->name;
else {
- if (argc == 1 && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
- return _process_all(cmd, argc, argv, 0, _info);
- name = argv[1];
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, NULL, argc, argv, 0, _info);
+ name = argv[0];
}
if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
@@ -1799,7 +2233,7 @@ static int _info(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
r = _display_info(dmt);
@@ -1823,9 +2257,9 @@ static int _deps(CMD_ARGS)
if (names)
name = names->name;
else {
- if (argc == 1 && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
- return _process_all(cmd, argc, argv, 0, _deps);
- name = argv[1];
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, NULL, argc, argv, 0, _deps);
+ name = argv[0];
}
if (!(dmt = dm_task_create(DM_DEVICE_DEPS)))
@@ -1843,7 +2277,7 @@ static int _deps(CMD_ARGS)
if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
if (!dm_task_get_info(dmt, &info))
@@ -2247,7 +2681,7 @@ static int _build_whole_deptree(const struct command *cmd)
if (!(_dtree = dm_tree_create()))
return 0;
- if (!_process_all(cmd, 0, NULL, 0, _add_dep))
+ if (!_process_all(cmd, NULL, 0, NULL, 0, _add_dep))
return 0;
return 1;
@@ -2289,6 +2723,15 @@ static int _uint32_disp(struct dm_report *rh,
return dm_report_field_uint32(rh, field, &value);
}
+static int _show_units(void)
+{
+ /* --nosuffix overrides --units */
+ if (_switches[NOSUFFIX_ARG])
+ return 0;
+
+ return (_int_args[UNITS_ARG]) ? 1 : 0;
+}
+
static int _dm_name_disp(struct dm_report *rh,
struct dm_pool *mem __attribute__((unused)),
struct dm_report_field *field, const void *data,
@@ -2750,6 +3193,698 @@ static int _dm_lv_layer_name_disp(struct dm_report *rh,
return dm_report_field_string(rh, field, (const char *const *) data);
}
+/**
+ * All _dm_stats_*_disp functions for basic counters are identical:
+ * obtain the value for the current region and area and pass it to
+ * dm_report_field_uint64().
+ */
+#define MK_STATS_COUNTER_DISP_FN(counter) \
+static int _dm_stats_ ## counter ## _disp(struct dm_report *rh, \
+ struct dm_pool *mem __attribute__((unused)), \
+ struct dm_report_field *field, const void *data, \
+ void *private __attribute__((unused))) \
+{ \
+ const struct dm_stats *dms = (const struct dm_stats *) data; \
+ uint64_t value = dm_stats_get_ ## counter(dms, DM_STATS_REGION_CURRENT, \
+ DM_STATS_AREA_CURRENT); \
+ return dm_report_field_uint64(rh, field, &value); \
+}
+
+MK_STATS_COUNTER_DISP_FN(reads)
+MK_STATS_COUNTER_DISP_FN(reads_merged)
+MK_STATS_COUNTER_DISP_FN(read_sectors)
+MK_STATS_COUNTER_DISP_FN(read_nsecs)
+MK_STATS_COUNTER_DISP_FN(writes)
+MK_STATS_COUNTER_DISP_FN(writes_merged)
+MK_STATS_COUNTER_DISP_FN(write_sectors)
+MK_STATS_COUNTER_DISP_FN(write_nsecs)
+MK_STATS_COUNTER_DISP_FN(io_in_progress)
+MK_STATS_COUNTER_DISP_FN(io_nsecs)
+MK_STATS_COUNTER_DISP_FN(weighted_io_nsecs)
+MK_STATS_COUNTER_DISP_FN(total_read_nsecs)
+MK_STATS_COUNTER_DISP_FN(total_write_nsecs)
+#undef MK_STATS_COUNTER_DISP_FN
+
+static int _dm_stats_region_id_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t region_id = dm_stats_get_current_region(dms);
+ return dm_report_field_uint64(rh, field, &region_id);
+}
+
+static int _dm_stats_region_start_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t region_start;
+ const char *repstr;
+ double *sortval;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_current_region_start(dms, &region_start))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, region_start, units, 1, factor,
+ _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = (double) region_start;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_region_len_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t region_length;
+ const char *repstr;
+ double *sortval;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_current_region_len(dms, &region_length))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, region_length, units, 1, factor,
+ _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = (double) region_length;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_area_id_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t area_id = dm_stats_get_current_area(dms);
+ return dm_report_field_uint64(rh, field, &area_id);
+}
+
+static int _dm_stats_area_start_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t area_start;
+ const char *repstr;
+ double *sortval;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_current_area_start(dms, &area_start))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, area_start, units, 1, factor,
+ _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = (double) area_start;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_area_offset_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t area_offset;
+ const char *repstr;
+ double *sortval;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_current_area_offset(dms, &area_offset))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, area_offset, units, 1, factor,
+ _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = (double) area_offset;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_area_len_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t area_len;
+ const char *repstr;
+ double *sortval;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_current_area_len(dms, &area_len))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, area_len, units, 1, factor,
+ _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = (double) area_len;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_area_count_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ uint64_t area_count, region;
+
+ region = dm_stats_get_current_region(dms);
+ if (!(area_count = dm_stats_get_region_nr_areas(dms, region)))
+ return_0;
+
+ return dm_report_field_uint64(rh, field, &area_count);
+}
+
+static int _dm_stats_program_id_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ const char *program_id;
+ if (!(program_id = dm_stats_get_current_region_program_id(dms)))
+ return_0;
+ return dm_report_field_string(rh, field, (const char * const *) &program_id);
+}
+
+static int _dm_stats_aux_data_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ const char *aux_data;
+ if (!(aux_data = dm_stats_get_current_region_aux_data(dms)))
+ return_0;
+ return dm_report_field_string(rh, field, (const char * const *) &aux_data);
+}
+
+static int _dm_stats_rrqm_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, rrqm;
+
+ if (!dm_stats_get_rd_merges_per_sec(dms, &rrqm,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", rrqm))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = rrqm;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+
+}
+
+static int _dm_stats_wrqm_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, wrqm;
+
+ if (!dm_stats_get_wr_merges_per_sec(dms, &wrqm,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", wrqm))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = wrqm;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+
+}
+
+static int _dm_stats_rs_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, rs;
+
+ if (!dm_stats_get_reads_per_sec(dms, &rs,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", rs))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = rs;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+
+}
+
+static int _dm_stats_ws_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, ws;
+
+ if (!dm_stats_get_writes_per_sec(dms, &ws,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", ws))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = ws;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+
+}
+
+static int _dm_stats_read_secs_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ const char *repstr;
+ double *sortval, rsec;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_read_sectors_per_sec(dms, &rsec,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, (uint64_t) rsec, units, 1,
+ factor, _show_units(), DM_SIZE_UNIT)))
+
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = rsec;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_write_secs_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ const char *repstr;
+ double *sortval, wsec;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_write_sectors_per_sec(dms, &wsec,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!(repstr = dm_size_to_string(mem, (uint64_t) wsec, units, 1,
+ factor, _show_units(), DM_SIZE_UNIT)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = wsec;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_arqsz_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ const char *repstr;
+ double *sortval, arqsz;
+ char units = _disp_units;
+ uint64_t factor = _disp_factor;
+
+ if (!dm_stats_get_average_request_size(dms, &arqsz,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+
+ if (!(repstr = dm_size_to_string(mem, (uint64_t) arqsz, units, 1,
+ factor, _show_units(), DM_SIZE_UNIT)))
+
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = arqsz;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_qusz_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, qusz;
+
+ if (!dm_stats_get_average_queue_size(dms, &qusz,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", qusz))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = qusz;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_await_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, await;
+
+ if (!dm_stats_get_average_wait_time(dms, &await,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ /* FIXME: make scale configurable */
+ /* display in msecs */
+ await /= NSEC_PER_MSEC;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", await))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = await;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_r_await_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, r_await;
+
+ if (!dm_stats_get_average_rd_wait_time(dms, &r_await,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ /* FIXME: make scale configurable */
+ /* display in msecs */
+ r_await /= NSEC_PER_MSEC;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", r_await))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = r_await;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_w_await_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, w_await;
+
+ if (!dm_stats_get_average_wr_wait_time(dms, &w_await,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ /* FIXME: make scale configurable */
+ /* display in msecs */
+ w_await /= NSEC_PER_MSEC;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", w_await))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = w_await;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_tput_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, tput;
+
+ if (!dm_stats_get_throughput(dms, &tput,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", tput))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = tput;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+static int _dm_stats_svctm_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ char buf[64];
+ char *repstr;
+ double *sortval, svctm;
+
+ if (!dm_stats_get_service_time(dms, &svctm,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ /* FIXME: make scale configurable */
+ /* display in msecs */
+ svctm /= NSEC_PER_MSEC;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%.2f", svctm))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(uint64_t))))
+ return_0;
+
+ *sortval = svctm;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+
+}
+
+static int _dm_stats_util_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct dm_stats *dms = (const struct dm_stats *) data;
+ dm_percent_t util;
+
+ if (!dm_stats_get_utilization(dms, &util,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT))
+ return_0;
+
+ dm_report_field_percent(rh, field, &util);
+ return 1;
+}
+
+static int _dm_stats_sample_interval_ns_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ /* FIXME: use internal interval estimate when supported by libdm */
+ return dm_report_field_uint64(rh, field, &_last_interval);
+}
+
+static int _dm_stats_sample_interval_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field, const void *data,
+ void *private __attribute__((unused)))
+{
+ char buf[64];
+ char *repstr;
+ double *sortval;
+
+ if (!(sortval = dm_pool_alloc(mem, sizeof(*sortval))))
+ return_0;
+
+ *sortval = (double)_last_interval / (double) NSEC_PER_SEC;
+
+ if (!dm_snprintf(buf, sizeof(buf), "%2.6f", *sortval))
+ return_0;
+
+ if (!(repstr = dm_pool_strdup(mem, buf)))
+ return_0;
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
static void *_task_get_obj(void *obj)
{
return ((struct dmsetup_report_obj *)obj)->task;
@@ -2775,20 +3910,29 @@ static void *_split_name_get_obj(void *obj)
return ((struct dmsetup_report_obj *)obj)->split_name;
}
+static void *_stats_get_obj(void *obj)
+{
+ return ((struct dmsetup_report_obj *)obj)->stats;
+}
+
static const struct dm_report_object_type _report_types[] = {
- { DR_TASK, "Mapped Device Name", "", _task_get_obj },
- { DR_INFO, "Mapped Device Information", "", _info_get_obj },
- { DR_DEPS, "Mapped Device Relationship Information", "", _deps_get_obj },
- { DR_TREE, "Mapped Device Relationship Information", "", _tree_get_obj },
- { DR_NAME, "Mapped Device Name Components", "", _split_name_get_obj },
- { 0, "", "", NULL },
+ { DR_TASK, "Mapped Device Name", "name_", _task_get_obj },
+ { DR_INFO, "Mapped Device Information", "info_", _info_get_obj },
+ { DR_DEPS, "Mapped Device Relationship Information", "deps_", _deps_get_obj },
+ { DR_TREE, "Mapped Device Relationship Information", "tree_", _tree_get_obj },
+ { DR_NAME, "Mapped Device Name Components", "splitname_", _split_name_get_obj },
+ { DR_STATS, "Mapped Device Statistics","stats_", _stats_get_obj },
+ { DR_STATS_META, "Mapped Device Statistics Region Information","region_", _stats_get_obj },
+ { 0, "", "", NULL }
};
/* Column definitions */
+/* N.B. Field names must not contain the substring 'help' as this will disable --count. */
#define OFFSET_OF(strct, field) (((char*)&((struct strct*)0)->field) - (char*)0)
#define STR (DM_REPORT_FIELD_TYPE_STRING)
#define NUM (DM_REPORT_FIELD_TYPE_NUMBER)
#define SIZ (DM_REPORT_FIELD_TYPE_SIZE)
+#define TIM (DM_REPORT_FIELD_TYPE_TIME)
#define FIELD_O(type, strct, sorttype, head, field, width, func, id, desc) {DR_ ## type, sorttype, OFFSET_OF(strct, field), width, id, head, &_ ## func ## _disp, desc},
#define FIELD_F(type, sorttype, head, width, func, id, desc) {DR_ ## type, sorttype, 0, width, id, head, &_ ## func ## _disp, desc},
@@ -2802,7 +3946,7 @@ FIELD_F(TASK, STR, "MangledUUID", 32, dm_mangled_uuid, "mangled_uuid", "Mangled
FIELD_F(TASK, STR, "UnmangledUUID", 32, dm_unmangled_uuid, "unmangled_uuid", "Unmangled unique (optional) identifier for mapped device.")
/* FIXME Next one should be INFO */
-FIELD_F(TASK, NUM, "RAhead", 6, dm_read_ahead, "read_ahead", "Read ahead in sectors.")
+FIELD_F(TASK, NUM, "RAhead", 6, dm_read_ahead, "read_ahead", "Read ahead value.")
FIELD_F(INFO, STR, "BlkDevName", 16, dm_blk_name, "blkdevname", "Name of block device.")
FIELD_F(INFO, STR, "Stat", 4, dm_info_status, "attr", "(L)ive, (I)nactive, (s)uspended, (r)ead-only, read-(w)rite.")
@@ -2830,22 +3974,88 @@ FIELD_O(NAME, dm_split_name, STR, "VG", vg_name, 4, dm_vg_name, "vg_name", "LVM
FIELD_O(NAME, dm_split_name, STR, "LV", lv_name, 4, dm_lv_name, "lv_name", "LVM Logical Volume name.")
FIELD_O(NAME, dm_split_name, STR, "LVLayer", lv_layer, 7, dm_lv_layer_name, "lv_layer", "LVM device layer.")
+/* basic stats counters */
+FIELD_F(STATS, NUM, "Reads", 5, dm_stats_reads, "reads", "Number of reads completed.")
+FIELD_F(STATS, NUM, "RdMrges", 5, dm_stats_reads_merged, "reads_merged", "Number of reads merged.")
+FIELD_F(STATS, NUM, "RdSectors", 5, dm_stats_read_sectors, "read_sectors", "Number of sectors read.")
+FIELD_F(STATS, NUM, "RdNsec", 5, dm_stats_read_nsecs, "read_nsecs", "Time spent reading.")
+FIELD_F(STATS, NUM, "Writes", 5, dm_stats_writes, "writes", "Number of writes completed.")
+FIELD_F(STATS, NUM, "WrMerges", 5, dm_stats_writes_merged, "writes_merged", "Number of writes merged.")
+FIELD_F(STATS, NUM, "WrSectors", 5, dm_stats_write_sectors, "write_sectors", "Number of sectors written.")
+FIELD_F(STATS, NUM, "WrNsec", 5, dm_stats_write_nsecs, "write_nsecs", "Time spent writing.")
+FIELD_F(STATS, NUM, "InProgress", 5, dm_stats_io_in_progress, "in_progress", "Number of I/Os currently in progress.")
+FIELD_F(STATS, NUM, "IoNsec", 5, dm_stats_io_nsecs, "io_nsecs", "Time spent doing I/O.")
+FIELD_F(STATS, NUM, "WtIoNsec", 5, dm_stats_weighted_io_nsecs, "weighted_io_nsecs", "Weighted time spent doing I/O.")
+FIELD_F(STATS, NUM, "TotalRdNsec", 5, dm_stats_total_read_nsecs, "total_rd_nsecs", "Total time spent reading.")
+FIELD_F(STATS, NUM, "TotalWrNsec", 5, dm_stats_total_write_nsecs, "total_wr_nsecs", "Total time spent writing.")
+
+/* Stats derived metrics */
+FIELD_F(STATS, NUM, "RRqM/s", 5, dm_stats_rrqm, "rrqm", "Read requests merged per second.")
+FIELD_F(STATS, NUM, "WRqM/s", 5, dm_stats_wrqm, "wrqm", "Write requests merged per second.")
+FIELD_F(STATS, NUM, "R/s", 5, dm_stats_rs, "rs", "Reads per second.")
+FIELD_F(STATS, NUM, "W/s", 5, dm_stats_ws, "ws", "Writes per second.")
+FIELD_F(STATS, NUM, "RSz/s", 5, dm_stats_read_secs, "rsize_sec", "Size of data read per second.")
+FIELD_F(STATS, NUM, "WSz/s", 5, dm_stats_write_secs, "wsize_sec", "Size of data written per second.")
+FIELD_F(STATS, NUM, "AvRqSz", 5, dm_stats_arqsz, "arqsz", "Average request size.")
+FIELD_F(STATS, NUM, "QSize", 5, dm_stats_qusz, "qusz", "Average queue size.")
+FIELD_F(STATS, NUM, "AWait", 5, dm_stats_await, "await", "Averate wait time.")
+FIELD_F(STATS, NUM, "RdAWait", 5, dm_stats_r_await, "r_await", "Averate read wait time.")
+FIELD_F(STATS, NUM, "WrAWait", 5, dm_stats_w_await, "w_await", "Averate write wait time.")
+FIELD_F(STATS, NUM, "TPut", 5, dm_stats_tput, "tput", "Throughput.")
+FIELD_F(STATS, NUM, "SvcTm", 5, dm_stats_svctm, "svctm", "Service time.")
+FIELD_F(STATS, NUM, "Util%", 5, dm_stats_util, "util", "Utilization.")
+
+/* Stats interval duration estimates */
+FIELD_F(STATS, NUM, "IntervalNSec", 10, dm_stats_sample_interval_ns, "interval_ns", "Sampling interval in nanoseconds.")
+FIELD_F(STATS, NUM, "Interval", 8, dm_stats_sample_interval, "interval", "Sampling interval.")
+
+/* Stats report meta-fields */
+FIELD_F(STATS_META, NUM, "RgID", 4, dm_stats_region_id, "region_id", "Region ID.")
+FIELD_F(STATS_META, SIZ, "RStart", 5, dm_stats_region_start, "region_start", "Region start.")
+FIELD_F(STATS_META, SIZ, "RSize", 5, dm_stats_region_len, "region_len", "Region length.")
+FIELD_F(STATS_META, NUM, "ArID", 4, dm_stats_area_id, "area_id", "Area ID.")
+FIELD_F(STATS_META, SIZ, "AStart", 5, dm_stats_area_start, "area_start", "Area offset from start of device.")
+FIELD_F(STATS_META, SIZ, "ASize", 5, dm_stats_area_len, "area_len", "Area length.")
+FIELD_F(STATS_META, SIZ, "AOff", 5, dm_stats_area_offset, "area_offset", "Area offset from start of region.")
+FIELD_F(STATS_META, NUM, "#Areas", 3, dm_stats_area_count, "area_count", "Area count.")
+FIELD_F(STATS_META, STR, "ProgID", 6, dm_stats_program_id, "program_id", "Program ID.")
+FIELD_F(STATS_META, STR, "AuxDat", 6, dm_stats_aux_data, "aux_data", "Auxiliary data.")
{0, 0, 0, 0, "", "", NULL, NULL},
/* *INDENT-ON* */
};
+#undef FIELD_O
+#undef FIELD_F
+
#undef STR
#undef NUM
#undef SIZ
-#undef FIELD_O
-#undef FIELD_F
static const char *default_report_options = "name,major,minor,attr,open,segments,events,uuid";
static const char *splitname_report_options = "vg_name,lv_name,lv_layer";
-static int _report_init(const struct command *cmd)
+/* Stats counters & derived metrics. */
+#define RD_COUNTERS "reads,reads_merged,read_sectors,read_nsecs,total_rd_nsecs"
+#define WR_COUNTERS "writes,writes_merged,write_sectors,write_nsecs,total_wr_nsecs"
+#define IO_COUNTERS "in_progress,io_nsecs,weighted_io_nsecs"
+#define METRICS "rrqm,wrqm,rs,ws,rsize_sec,wsize_sec,arqsz,qusz,util,await,r_await,w_await"
+#define COUNTERS RD_COUNTERS "," WR_COUNTERS "," IO_COUNTERS
+
+/* Device, region and area metadata. */
+#define STATS_DEV_INFO "name,region_id"
+#define STATS_AREA_INFO STATS_DEV_INFO ",region_start,region_len,area_count,area_id,area_start,area_len"
+#define STATS_REGION_INFO STATS_DEV_INFO ",region_start,region_len,area_count,area_len"
+
+/* Default stats report options. */
+static const char *_stats_default_report_options = STATS_DEV_INFO ",area_id,area_start,area_len," METRICS;
+static const char *_stats_raw_report_options = STATS_DEV_INFO ",area_id,area_start,area_len," COUNTERS;
+static const char *_stats_list_options = STATS_REGION_INFO ",program_id";
+static const char *_stats_area_list_options = STATS_AREA_INFO ",program_id";
+
+static int _report_init(const struct command *cmd, const char *subcommand)
{
char *options = (char *) default_report_options;
+ char *opt_fields = NULL; /* optional fields from command line */
const char *keys = "";
const char *separator = " ";
const char *selection = NULL;
@@ -2855,8 +4065,30 @@ static int _report_init(const struct command *cmd)
size_t len = 0;
int r = 0;
- if (cmd && !strcmp(cmd->name, "splitname"))
+ if (cmd && !strcmp(cmd->name, "splitname")) {
options = (char *) splitname_report_options;
+ _report_type |= DR_NAME;
+ }
+
+ if (cmd && !strcmp(cmd->name, "stats")) {
+ _report_type |= DR_STATS_META;
+ if (!strcmp(subcommand, "list"))
+ options = (char *) ((_switches[VERBOSE_ARG])
+ ? _stats_area_list_options
+ : _stats_list_options);
+ else {
+ options = (char *) ((!_switches[RAW_ARG])
+ ? _stats_default_report_options
+ : _stats_raw_report_options);
+
+ _report_type |= DR_STATS;
+ }
+ }
+
+ if (cmd && !strcmp(cmd->name, "list")) {
+ options = (char *) _stats_list_options;
+ _report_type |= DR_STATS_META;
+ }
/* emulate old dmsetup behaviour */
if (_switches[NOHEADINGS_ARG]) {
@@ -2880,21 +4112,31 @@ static int _report_init(const struct command *cmd)
}
if (_switches[OPTIONS_ARG] && _string_args[OPTIONS_ARG]) {
+ /* Count & interval forbidden for help. */
+ /* FIXME Detect "help" correctly and exit */
+ if (strstr(_string_args[OPTIONS_ARG], "help")) {
+ _switches[COUNT_ARG] = 0;
+ _count = 1;
+ _switches[INTERVAL_ARG] = 0;
+ headings = 0;
+ }
+
if (*_string_args[OPTIONS_ARG] != '+')
options = _string_args[OPTIONS_ARG];
else {
- len = strlen(default_report_options) +
- strlen(_string_args[OPTIONS_ARG]) + 1;
- if (!(options = dm_malloc(len))) {
+ char *tmpopts;
+ opt_fields = _string_args[OPTIONS_ARG] + 1;
+ len = strlen(options) + strlen(opt_fields) + 2;
+ if (!(tmpopts = dm_malloc(len))) {
err("Failed to allocate option string.");
return 0;
}
- if (dm_snprintf(options, len, "%s,%s",
- default_report_options,
- &_string_args[OPTIONS_ARG][1]) < 0) {
- err("snprintf failed");
- goto out;
+ if (dm_snprintf(tmpopts, len, "%s,%s",
+ options, opt_fields) < 0) {
+ dm_free(tmpopts);
+ return 0;
}
+ options = tmpopts;
}
}
@@ -2943,6 +4185,11 @@ static int _report_init(const struct command *cmd)
goto out;
}
+ if (!_switches[INTERVAL_ARG])
+ _int_args[INTERVAL_ARG] = 1; /* 1s default. */
+
+ _interval = NSEC_PER_SEC * (uint64_t) _int_args[INTERVAL_ARG];
+
if (field_prefixes)
dm_report_set_output_field_name_prefix(_report, "dm_");
@@ -2961,12 +4208,12 @@ out:
static int _ls(CMD_ARGS)
{
if ((_switches[TARGET_ARG] && _target) ||
- (_switches[EXEC_ARG] && _command))
- return _status(cmd, argc, argv, NULL, 0);
+ (_switches[EXEC_ARG] && _command_to_exec))
+ return _status(cmd, NULL, argc, argv, NULL, 0);
else if ((_switches[TREE_ARG]))
- return _display_tree(cmd, 0, NULL, NULL, 0);
+ return _display_tree(cmd, NULL, 0, NULL, NULL, 0);
else
- return _process_all(cmd, argc, argv, 0, _display_name);
+ return _process_all(cmd, NULL, argc, argv, 0, _display_name);
}
static int _mangle(CMD_ARGS)
@@ -2981,9 +4228,9 @@ static int _mangle(CMD_ARGS)
if (names)
name = names->name;
else {
- if (argc == 1 && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
- return _process_all(cmd, argc, argv, 0, _mangle);
- name = argv[1];
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, NULL, argc, argv, 0, _mangle);
+ name = argv[0];
}
if (!(dmt = dm_task_create(DM_DEVICE_STATUS)))
@@ -2995,7 +4242,7 @@ static int _mangle(CMD_ARGS)
if (!_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
goto out;
- if (!dm_task_run(dmt))
+ if (!_task_run(dmt))
goto out;
if (!dm_task_get_info(dmt, &info) || !info.exists)
@@ -3060,64 +4307,672 @@ out:
return r;
}
-static int _help(CMD_ARGS);
+static int _stats(CMD_ARGS);
+static int _bind_stats_device(struct dm_stats *dms, const char *name)
+{
+ if (name && !dm_stats_bind_name(dms, name))
+ return 0;
+ else if (_switches[UUID_ARG] && !dm_stats_bind_uuid(dms, _uuid))
+ return 0;
+ else if (_switches[MAJOR_ARG] && _switches[MINOR_ARG]
+ && !dm_stats_bind_devno(dms, _int_args[MAJOR_ARG],
+ _int_args[MINOR_ARG]))
+ return 0;
+
+ return 1;
+}
+
+static int _stats_clear_regions(struct dm_stats *dms, uint64_t region_id)
+{
+ int allregions = (region_id == DM_STATS_REGIONS_ALL);
+
+ if (!dm_stats_list(dms, NULL))
+ goto_out;
+
+ if (!dm_stats_get_nr_regions(dms))
+ goto done;
+
+ dm_stats_walk_do(dms) {
+ if (allregions)
+ region_id = dm_stats_get_current_region(dms);
+
+ if (!dm_stats_region_present(dms, region_id)) {
+ log_error("No such region: %"PRIu64".", region_id);
+ goto out;
+ }
+ if (!dm_stats_clear_region(dms, region_id)) {
+ log_error("Clearing statistics region %"PRIu64" failed.",
+ region_id);
+ goto out;
+ }
+ log_info("Cleared statistics region %"PRIu64".", region_id);
+ dm_stats_walk_next_region(dms);
+ } dm_stats_walk_while(dms);
+done:
+ return 1;
+
+out:
+ return 0;
+}
+
+static int _stats_clear(CMD_ARGS)
+{
+ struct dm_stats *dms;
+ uint64_t region_id;
+ char *name = NULL;
+ int allregions = _switches[ALL_REGIONS_ARG];
+
+ /* clear does not use a report */
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
+ if (!_switches[REGION_ID_ARG] && !_switches[ALL_REGIONS_ARG]) {
+ err("Please specify a --regionid or use --allregions.");
+ return 0;
+ }
+
+ if (names)
+ name = names->name;
+ else {
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, subcommand, argc, argv, 0, _stats_clear);
+ name = argv[0];
+ }
+
+ region_id = (allregions) ? DM_STATS_REGIONS_ALL
+ : (uint64_t) _int_args[REGION_ID_ARG];
+
+ dms = dm_stats_create(DM_STATS_PROGRAM_ID);
+
+ if (!_bind_stats_device(dms, name))
+ goto_out;
+
+ if (!_stats_clear_regions(dms, region_id))
+ goto_out;
+
+ dm_stats_destroy(dms);
+ return 1;
+
+out:
+ dm_stats_destroy(dms);
+ return 0;
+}
+
+static uint64_t _factor_from_units(char *argptr, char *unit_type)
+{
+ return dm_units_to_factor(argptr, unit_type, 0, NULL);
+}
+
+/**
+ * Parse a start, length, or area size argument in bytes from a string
+ * using optional units as supported by _factor_from_units().
+ */
+static int _size_from_string(char *argptr, uint64_t *size, const char *name)
+{
+ uint64_t factor;
+ char *endptr = NULL, unit_type;
+ if (!argptr)
+ return 0;
+
+ *size = strtoull(argptr, &endptr, 10);
+ if (endptr == argptr) {
+ *size = 0;
+ log_error("Invalid %s argument: \"%s\"",
+ name, (*argptr) ? argptr : "");
+ return 0;
+ }
+
+ if (*endptr == '\0') {
+ *size *= 512;
+ return 1;
+ }
+
+ factor = _factor_from_units(endptr, &unit_type);
+ if (factor)
+ *size *= factor;
+
+ return 1;
+}
+
+/*
+ * FIXME: expose this from libdm-stats
+ */
+static uint64_t _nr_areas_from_step(uint64_t len, int64_t step)
+{
+ /* Default is one area. */
+ if (!step || !len)
+ return 1;
+
+ /* --areas */
+ if (step < 0)
+ return (uint64_t)(-step);
+
+ /* --areasize - cast step to unsigned as it cannot be -ve here. */
+ return (len / (step ? : len)) + !!(len % (uint64_t) step);
+}
+
+/*
+ * Create a single region starting at start and spanning len sectors,
+ * or, if the segments argument is no-zero create one region for each
+ * segment present in the mapped device. Passing zero for segments,
+ * start, and length will create a single segment spanning the whole
+ * device.
+ */
+static int _do_stats_create_regions(struct dm_stats *dms,
+ const char *name, uint64_t start,
+ uint64_t len, int64_t step,
+ int segments,
+ const char *program_id,
+ const char *aux_data)
+{
+ uint64_t this_start = 0, this_len = len, region_id = UINT64_C(0);
+ char *target_type, *params; /* unused */
+ struct dm_task *dmt;
+ struct dm_info info;
+ void *next = NULL;
+ const char *devname = NULL;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_TABLE))) {
+ dm_stats_destroy(dms);
+ return 0;
+ }
+
+ if (!_set_task_device(dmt, name, 0))
+ goto out;
+
+ if (!dm_task_no_open_count(dmt))
+ goto out;
+
+ if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
+ goto out;
+
+ if (!_task_run(dmt))
+ goto out;
+
+ if (!dm_task_get_info(dmt, &info) || !info.exists)
+ goto out;
+
+ if (!(devname = dm_task_get_name(dmt)))
+ goto out;
+
+ do {
+ uint64_t segment_start, segment_len;
+ next = dm_get_next_target(dmt, next, &segment_start, &segment_len,
+ &target_type, &params);
+
+ /* Accumulate whole-device size for nr_areas calculation. */
+ if (!segments && !len)
+ this_len += segment_len;
+
+ /* Segments or whole-device. */
+ if (segments || !next) {
+ /*
+ * this_start and this_len hold the start and length in
+ * sectors of the to-be-created region: this is either the
+ * segment start/len (for --segments), the value of the
+ * --start/--length arguments, or 0/0 for a default
+ * whole-device region).
+ */
+ this_start = (segments) ? segment_start : start;
+ this_len = (segments) ? segment_len : this_len;
+ if (!dm_stats_create_region(dms, &region_id,
+ this_start, this_len, step,
+ program_id, aux_data)) {
+ log_error("%s: Could not create statistics region.",
+ devname);
+ goto out;
+ }
+
+ printf("%s: Created new region with "FMTu64" area(s) as "
+ "region ID "FMTu64"\n", devname,
+ _nr_areas_from_step(this_len, step), region_id);
+ }
+ } while (next);
+ r = 1;
+
+out:
+ dm_task_destroy(dmt);
+ dm_stats_destroy(dms);
+ return r;
+}
+
+static int _stats_create(CMD_ARGS)
+{
+ struct dm_stats *dms;
+ const char *name, *aux_data = "", *program_id = DM_STATS_PROGRAM_ID;
+ uint64_t start = 0, len = 0, areas = 0, area_size = 0;
+ int64_t step = 0;
+
+ /* create does not use a report */
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
+ if (_switches[ALL_REGIONS_ARG]) {
+ log_error("Cannot use --allregions with create.");
+ return 0;
+ }
+
+ if (_switches[ALL_PROGRAMS_ARG]) {
+ log_error("Cannot use --allprograms with create.");
+ return 0;
+ }
+
+ if (_switches[AREAS_ARG] && _switches[AREA_SIZE_ARG]) {
+ log_error("Please specify one of --areas and --areasize.");
+ return 0;
+ }
+
+ if (_switches[PROGRAM_ID_ARG]
+ && !strlen(_string_args[PROGRAM_ID_ARG]) && !_switches[FORCE_ARG]) {
+ log_error("Creating a region with no program "
+ "id requires --force.");
+ return 0;
+ }
+
+ if (names)
+ name = names->name;
+ else {
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG]) {
+ if (!_switches[ALL_DEVICES_ARG]) {
+ log_error("Please specify device(s) or use "
+ "--alldevices.");
+ return 0;
+ }
+ return _process_all(cmd, subcommand, argc, argv, 0, _stats_create);
+ }
+ name = argv[0];
+ }
+
+ if (_switches[AREAS_ARG])
+ areas = (uint64_t) _int_args[AREAS_ARG];
+
+ if (_switches[AREA_SIZE_ARG])
+ if (!_size_from_string(_string_args[AREA_SIZE_ARG],
+ &area_size, "areasize"))
+ return 0;
+
+ areas = (areas) ? areas : 1;
+ /* bytes to sectors or -(areas): promote to signed before conversion */
+ step = (area_size) ? ((int64_t) area_size / 512) : -((int64_t) areas);
+
+ if (_switches[START_ARG]) {
+ if (!_size_from_string(_string_args[START_ARG],
+ &start, "start"))
+ return 0;
+ }
+
+ /* bytes to sectors */
+ start /= 512;
+
+ if (_switches[LENGTH_ARG]) {
+ if (!_size_from_string(_string_args[LENGTH_ARG],
+ &len, "length"))
+ return 0;
+ }
+
+ /* bytes to sectors */
+ len /= 512;
+
+ if (_switches[PROGRAM_ID_ARG])
+ program_id = _string_args[PROGRAM_ID_ARG];
+ if (!strlen(program_id) && !_switches[FORCE_ARG])
+ program_id = DM_STATS_PROGRAM_ID;
+
+ if (_switches[AUX_DATA_ARG])
+ aux_data = _string_args[AUX_DATA_ARG];
+
+ dms = dm_stats_create(DM_STATS_PROGRAM_ID);
+ if (!_bind_stats_device(dms, name))
+ goto_out;
+
+ if (!strlen(program_id))
+ /* force creation of a region with no id */
+ dm_stats_set_program_id(dms, 1, NULL);
+
+ return _do_stats_create_regions(dms, name, start, len, step,
+ _switches[SEGMENTS_ARG],
+ program_id, aux_data);
+
+out:
+ dm_stats_destroy(dms);
+ return 0;
+}
+
+static int _stats_delete(CMD_ARGS)
+{
+ struct dm_stats *dms;
+ uint64_t region_id;
+ char *name = NULL;
+ const char *program_id = DM_STATS_PROGRAM_ID;
+ int allregions = _switches[ALL_REGIONS_ARG];
+
+ /* delete does not use a report */
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
+ if (!_switches[REGION_ID_ARG] && !allregions) {
+ err("Please specify a --regionid or use --allregions.");
+ return 0;
+ }
+
+ if (names)
+ name = names->name;
+ else {
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG]) {
+ if (!_switches[ALL_DEVICES_ARG]) {
+ log_error("Please specify device(s) or use "
+ "--alldevices.");
+ return 0;
+ }
+ return _process_all(cmd, subcommand, argc, argv, 0, _stats_delete);
+ }
+ name = argv[0];
+ }
+
+ if (_switches[ALL_PROGRAMS_ARG])
+ program_id = DM_STATS_ALL_PROGRAMS;
+
+ region_id = (uint64_t) _int_args[REGION_ID_ARG];
+
+ dms = dm_stats_create(program_id);
+
+ if (!_bind_stats_device(dms, name))
+ goto_out;
+
+ if (allregions && !dm_stats_list(dms, program_id))
+ goto_out;
+
+ if (allregions && !dm_stats_get_nr_regions(dms))
+ /* no regions present */
+ goto done;
+
+ dm_stats_walk_do(dms) {
+ if (_switches[ALL_REGIONS_ARG])
+ region_id = dm_stats_get_current_region(dms);
+ if (!dm_stats_delete_region(dms, region_id)) {
+ log_error("Could not delete statistics region.");
+ goto out;
+ }
+ log_info("Deleted statistics region %" PRIu64, region_id);
+ dm_stats_walk_next_region(dms);
+ } dm_stats_walk_while(dms);
+
+done:
+ dm_stats_destroy(dms);
+ return 1;
+
+out:
+ dm_stats_destroy(dms);
+ return 0;
+}
+
+static int _stats_print(CMD_ARGS)
+{
+ struct dm_stats *dms;
+ char *name, *stbuff = NULL;
+ uint64_t region_id;
+ unsigned clear = (unsigned) _switches[CLEAR_ARG];
+ int allregions = _switches[ALL_REGIONS_ARG];
+
+ /* print does not use a report */
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
+ if (!_switches[REGION_ID_ARG] && !allregions) {
+ err("Please specify a --regionid or use --allregions.");
+ return 0;
+ }
+
+ if (names)
+ name = names->name;
+ else {
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, subcommand, argc, argv, 0, _stats_print);
+ name = argv[0];
+ }
+
+ region_id = (uint64_t) _int_args[REGION_ID_ARG];
+
+ dms = dm_stats_create(DM_STATS_PROGRAM_ID);
+
+ if (!_bind_stats_device(dms, name))
+ goto_out;
+
+ if (!dm_stats_list(dms, NULL))
+ goto_out;
+
+ if (allregions && !dm_stats_get_nr_regions(dms))
+ goto done;
+
+ dm_stats_walk_do(dms) {
+ if (_switches[ALL_REGIONS_ARG])
+ region_id = dm_stats_get_current_region(dms);
+
+ if (!dm_stats_region_present(dms, region_id)) {
+ log_error("No such region: %"PRIu64".", region_id);
+ goto out;
+ }
+
+ /*FIXME: line control for large regions */
+ if (!(stbuff = dm_stats_print_region(dms, region_id, 0, 0, clear))) {
+ log_error("Could not print statistics region.");
+ goto out;
+ }
+
+ printf("%s", stbuff);
+
+ dm_stats_buffer_destroy(dms, stbuff);
+ dm_stats_walk_next_region(dms);
+
+ } dm_stats_walk_while(dms);
+
+done:
+ dm_stats_destroy(dms);
+ return 1;
+
+out:
+ dm_stats_destroy(dms);
+ return 0;
+}
+
+static int _stats_report(CMD_ARGS)
+{
+ int r = 0;
+
+ struct dm_task *dmt;
+ char *name = NULL;
+
+ if (_switches[PROGRAM_ID_ARG])
+ _program_id = _string_args[PROGRAM_ID_ARG];
+
+ if (_switches[ALL_PROGRAMS_ARG])
+ _program_id = "";
+
+ if (!_switches[VERBOSE_ARG] && !strcmp(subcommand, "list"))
+ _stats_report_by_areas = 0;
+
+ if (names)
+ name = names->name;
+ else {
+ if (!argc && !_switches[UUID_ARG] && !_switches[MAJOR_ARG])
+ return _process_all(cmd, subcommand, argc, argv, 0, _info);
+ name = argv[0];
+ }
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ return 0;
+
+ if (!_set_task_device(dmt, name, 0))
+ goto out;
+
+ if (_switches[CHECKS_ARG] && !dm_task_enable_checks(dmt))
+ goto out;
+
+ if (!_task_run(dmt))
+ goto out;
+
+ r = _display_info(dmt);
+
+ out:
+ dm_task_destroy(dmt);
+ if (!r && _report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+ return r;
+}
+
+/*
+ * Command dispatch tables and usage.
+ */
+static int _stats_help(CMD_ARGS);
/*
- * Dispatch table
+ * dmsetup stats <cmd> [options] [device_name]
+ * dmstats <cmd> [options] [device_name]
+ *
+ * clear [--regionid id] <device_name>
+ * create [--areas nr_areas] [--areasize size]
+ * [ [--start start] [--length len] | [--segments]]
+ * [--auxdata data] [--programid id] [<device_name>]
+ * delete [--regionid] <device_name>
+ * delete_all [--programid id]
+ * list [--programid id] [<device_name>]
+ * print [--clear] [--programid id] [--regionid id] [<device_name>]
+ * report [--interval seconds] [--count count] [--units units] [--regionid id]
+ * [--programid id] [<device>]
*/
-static struct command _commands[] = {
- {"help", "[-c|-C|--columns]", 0, 0, 0, _help},
- {"create", "<dev_name> [-j|--major <major> -m|--minor <minor>]\n"
- "\t [-U|--uid <uid>] [-G|--gid <gid>] [-M|--mode <octal_mode>]\n"
- "\t [-u|uuid <uuid>] [{--addnodeonresume|--addnodeoncreate}]\n"
- "\t [--notable | --table <table> | <table_file>]",
- 1, 2,0, _create},
- {"remove", "[-f|--force] [--deferred] <device>", 0, -1, 1, _remove},
- {"remove_all", "[-f|--force]", 0, 0, 0, _remove_all},
- {"suspend", "[--noflush] <device>", 0, -1, 1, _suspend},
- {"resume", "<device> [{--addnodeonresume|--addnodeoncreate}]", 0, -1, 1, _resume},
- {"load", "<device> [<table_file>]", 0, 2, 0, _load},
- {"clear", "<device>", 0, -1, 1, _clear},
- {"reload", "<device> [<table_file>]", 0, 2, 0, _load},
- {"wipe_table", "<device>", 0, -1, 1, _error_device},
- {"rename", "<device> [--setuuid] <new_name_or_uuid>", 1, 2, 0, _rename},
- {"message", "<device> <sector> <message>", 2, -1, 0, _message},
- {"ls", "[--target <target_type>] [--exec <command>] [-o options] [--tree]", 0, 0, 0, _ls},
- {"info", "[<device>]", 0, -1, 1, _info},
- {"deps", "[-o options] [<device>]", 0, -1, 1, _deps},
- {"status", "[<device>] [--noflush] [--target <target_type>]", 0, -1, 1, _status},
- {"table", "[<device>] [--target <target_type>] [--showkeys]", 0, -1, 1, _status},
- {"wait", "<device> [<event_nr>] [--noflush]", 0, 2, 0, _wait},
- {"mknodes", "[<device>]", 0, -1, 1, _mknodes},
- {"mangle", "[<device>]", 0, -1, 1, _mangle},
- {"udevcreatecookie", "", 0, 0, 0, _udevcreatecookie},
- {"udevreleasecookie", "[<cookie>]", 0, 1, 0, _udevreleasecookie},
- {"udevflags", "<cookie>", 1, 1, 0, _udevflags},
- {"udevcomplete", "<cookie>", 1, 1, 0, _udevcomplete},
- {"udevcomplete_all", "<age_in_minutes>", 0, 1, 0, _udevcomplete_all},
- {"udevcookies", "", 0, 0, 0, _udevcookies},
- {"targets", "", 0, 0, 0, _targets},
- {"version", "", 0, 0, 0, _version},
- {"setgeometry", "<device> <cyl> <head> <sect> <start>", 5, 5, 0, _setgeometry},
- {"splitname", "<device> [<subsystem>]", 1, 2, 0, _splitname},
- {NULL, NULL, 0, 0, 0, NULL}
+
+#define AREA_OPTS "[--areas <nr_areas>] [--areasize <size>] "
+#define CREATE_OPTS "[--start <start> [--length <len>]]\n\t\t" AREA_OPTS
+#define ID_OPTS "[--programid <id>] [--auxdata <data> ] "
+#define SELECT_OPTS "[--programid <id>] [--regionid <id>] "
+#define PRINT_OPTS "[--clear] " SELECT_OPTS
+#define REPORT_OPTS "[--interval <seconds>] [--count <cnt>]\n\t\t[--units <u>]" SELECT_OPTS
+
+static struct command _stats_subcommands[] = {
+ {"help", "", 0, 0, 0, 0, _stats_help},
+ {"clear", "--regionid <id> [<device>]", 0, -1, 1, 0, _stats_clear},
+ {"create", CREATE_OPTS "\n\t\t" ID_OPTS "[<device>]", 0, -1, 1, 0, _stats_create},
+ {"delete", "--regionid <id> <device>", 1, -1, 1, 0, _stats_delete},
+ {"list", "[--programid <id>] [<device>]", 0, -1, 1, 0, _stats_report},
+ {"print", PRINT_OPTS "[<device>]", 0, -1, 1, 0, _stats_print},
+ {"report", REPORT_OPTS "[<device>]", 0, -1, 1, 0, _stats_report},
+ {"version", "", 0, -1, 1, 0, _version},
+ {NULL, NULL, 0, 0, 0, 0, NULL}
};
-static void _usage(FILE *out)
+#undef AREA_OPTS
+#undef CREATE_OPTS
+#undef ID_OPTS
+#undef PRINT_OPTS
+#undef REPORT_OPTS
+#undef SELECT_OPTS
+
+static int _dmsetup_help(CMD_ARGS);
+
+static struct command _dmsetup_commands[] = {
+ {"help", "[-c|-C|--columns]", 0, 0, 0, 0, _dmsetup_help},
+ {"create", "<dev_name>\n"
+ "\t [-j|--major <major> -m|--minor <minor>]\n"
+ "\t [-U|--uid <uid>] [-G|--gid <gid>] [-M|--mode <octal_mode>]\n"
+ "\t [-u|uuid <uuid>] [{--addnodeonresume|--addnodeoncreate}]\n"
+ "\t [--notable | --table <table> | <table_file>]", 1, 2, 0, 0, _create},
+ {"remove", "[-f|--force] [--deferred] <device>", 0, -1, 1, 0, _remove},
+ {"remove_all", "[-f|--force]", 0, 0, 0, 0, _remove_all},
+ {"suspend", "[--noflush] <device>", 0, -1, 1, 0, _suspend},
+ {"resume", "<device> [{--addnodeonresume|--addnodeoncreate}]", 0, -1, 1, 0, _resume},
+ {"load", "<device> [<table_file>]", 0, 2, 0, 0, _load},
+ {"clear", "<device>", 0, -1, 1, 0, _clear},
+ {"reload", "<device> [<table_file>]", 0, 2, 0, 0, _load},
+ {"wipe_table", "<device>", 1, -1, 1, 0, _error_device},
+ {"rename", "<device> [--setuuid] <new_name_or_uuid>", 1, 2, 0, 0, _rename},
+ {"message", "<device> <sector> <message>", 2, -1, 0, 0, _message},
+ {"ls", "[--target <target_type>] [--exec <command>] [-o options] [--tree]", 0, 0, 0, 0, _ls},
+ {"info", "[<device>]", 0, -1, 1, 0, _info},
+ {"deps", "[-o options] [<device>]", 0, -1, 1, 0, _deps},
+ {"stats", "<command> [<options>] [<devices>]", 1, -1, 1, 1, _stats},
+ {"status", "[<device>] [--noflush] [--target <target_type>]", 0, -1, 1, 0, _status},
+ {"table", "[<device>] [--target <target_type>] [--showkeys]", 0, -1, 1, 0, _status},
+ {"wait", "<device> [<event_nr>] [--noflush]", 0, 2, 0, 0, _wait},
+ {"mknodes", "[<device>]", 0, -1, 1, 0, _mknodes},
+ {"mangle", "[<device>]", 0, -1, 1, 0, _mangle},
+ {"udevcreatecookie", "", 0, 0, 0, 0, _udevcreatecookie},
+ {"udevreleasecookie", "[<cookie>]", 0, 1, 0, 0, _udevreleasecookie},
+ {"udevflags", "<cookie>", 1, 1, 0, 0, _udevflags},
+ {"udevcomplete", "<cookie>", 1, 1, 0, 0, _udevcomplete},
+ {"udevcomplete_all", "<age_in_minutes>", 0, 1, 0, 0, _udevcomplete_all},
+ {"udevcookies", "", 0, 0, 0, 0, _udevcookies},
+ {"targets", "", 0, 0, 0, 0, _targets},
+ {"version", "", 0, 0, 0, 0, _version},
+ {"setgeometry", "<device> <cyl> <head> <sect> <start>", 5, 5, 0, 0, _setgeometry},
+ {"splitname", "<device> [<subsystem>]", 1, 2, 0, 0, _splitname},
+ {NULL, NULL, 0, 0, 0, 0, NULL}
+};
+
+/*
+ * Usage and help text.
+ */
+
+static void _devmap_name_usage(FILE *out)
+{
+ fprintf(out, "Usage: " DEVMAP_NAME_CMD_NAME " <major> <minor>\n\n");
+}
+
+static void _stats_usage(FILE *out)
{
int i;
fprintf(out, "Usage:\n\n");
- fprintf(out, "dmsetup [--version] [-h|--help [-c|-C|--columns]]\n"
- " [--checks] [--manglename <mangling_mode>] [-v|--verbose [-v|--verbose ...]]\n"
+ fprintf(out, "%s\n", _base_commands[_base_command].name);
+ fprintf(out, " [-h|--help]\n");
+ fprintf(out, " [-v|--verbose [-v|--verbose ...]]\n");
+ fprintf(out, " [--areas <nr_areas>] [--areasize <size>]\n");
+ fprintf(out, " [--auxdata <data>] [--clear]\n");
+ fprintf(out, " [--count <count>] [--interval <seconds>]\n");
+ fprintf(out, " [-o <fields>] [-O|--sort <sort_fields>]\n");
+ fprintf(out, " [--programid <id>]\n");
+ fprintf(out, " [--start <start>] [--length <length>]\n");
+ fprintf(out, " [--segments] [--units <units>]\n\n");
+
+ for (i = 0; _stats_subcommands[i].name; i++)
+ fprintf(out, "\t%s %s\n", _stats_subcommands[i].name, _stats_subcommands[i].help);
+
+ fprintf(out, "<device> may be device name or -u <uuid> or "
+ "-j <major> -m <minor>\n");
+ fprintf(out, "<fields> are comma-separated. Use 'help -c' for list.\n");
+ fprintf(out, "\n");
+}
+
+static void _dmsetup_usage(FILE *out)
+{
+ int i;
+
+ fprintf(out, "Usage:\n\n");
+ fprintf(out, "%s\n"
+ " [--version] [-h|--help [-c|-C|--columns]]\n"
+ " [-v|--verbose [-v|--verbose ...]]\n"
+ " [--checks] [--manglename <mangling_mode>]\n"
" [-r|--readonly] [--noopencount] [--nolockfs] [--inactive]\n"
" [--udevcookie [cookie]] [--noudevrules] [--noudevsync] [--verifyudev]\n"
" [-y|--yes] [--readahead [+]<sectors>|auto|none] [--retry]\n"
" [-c|-C|--columns] [-o <fields>] [-O|--sort <sort_fields>]\n"
" [-S|--select <selection>] [--nameprefixes] [--noheadings]\n"
- " [--separator <separator>]\n\n");
- for (i = 0; _commands[i].name; i++)
- fprintf(out, "\t%s %s\n", _commands[i].name, _commands[i].help);
+ " [--separator <separator>]\n\n",
+ _base_commands[_base_command].name);
+
+ for (i = 0; _dmsetup_commands[i].name; i++)
+ fprintf(out, "\t%s %s\n", _dmsetup_commands[i].name, _dmsetup_commands[i].help);
+
fprintf(out, "\n<device> may be device name or -u <uuid> or "
"-j <major> -m <minor>\n");
fprintf(out, "<mangling_mode> is one of 'none', 'auto' and 'hex'.\n");
@@ -3132,11 +4987,50 @@ static void _usage(FILE *out)
static void _losetup_usage(FILE *out)
{
fprintf(out, "Usage:\n\n");
- fprintf(out, "losetup [-d|-a] [-e encryption] "
- "[-o offset] [-f|loop_device] [file]\n\n");
+ fprintf(out, "%s [-d|-a] [-e encryption] "
+ "[-o offset] [-f|loop_device] [file]\n\n",
+ _base_commands[_base_command].name);
+}
+
+static void _usage(FILE *out)
+{
+ switch (_base_commands[_base_command].type) {
+ case DMSETUP_TYPE:
+ return _dmsetup_usage(out);
+ case LOSETUP_TYPE:
+ return _losetup_usage(out);
+ case STATS_TYPE:
+ return _stats_usage(out);
+ case DEVMAP_NAME_TYPE:
+ return _devmap_name_usage(out);
+ }
}
-static int _help(CMD_ARGS)
+static int _stats_help(CMD_ARGS)
+{
+ _usage(stderr);
+
+ if (_switches[COLS_ARG] || (argc && !strcmp(argv[0], "report"))) {
+ _switches[OPTIONS_ARG] = 1;
+ _string_args[OPTIONS_ARG] = (char *) "help";
+ _switches[SORT_ARG] = 0;
+
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+
+ (void) _report_init(cmd, "help");
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
+ }
+
+ return 1;
+}
+
+static int _dmsetup_help(CMD_ARGS)
{
_usage(stderr);
@@ -3149,23 +5043,70 @@ static int _help(CMD_ARGS)
dm_report_free(_report);
_report = NULL;
}
- (void) _report_init(cmd);
+ (void) _report_init(cmd, "");
+ if (_report) {
+ dm_report_free(_report);
+ _report = NULL;
+ }
}
return 1;
}
-static struct command *_find_command(const char *name)
+static const struct command *_find_command(const struct command *commands,
+ const char *name)
{
int i;
- for (i = 0; _commands[i].name; i++)
- if (!strcmp(_commands[i].name, name))
- return _commands + i;
+ for (i = 0; commands[i].name; i++)
+ if (!strcmp(commands[i].name, name))
+ return commands + i;
return NULL;
}
+static const struct command *_find_dmsetup_command(const char *name)
+{
+ return _find_command(_dmsetup_commands, name);
+}
+
+static const struct command *_find_stats_subcommand(const char *name)
+{
+ return _find_command(_stats_subcommands, name);
+}
+
+static int _stats(CMD_ARGS)
+{
+ const struct command *stats_cmd;
+
+ if (!(stats_cmd = _find_stats_subcommand(subcommand))) {
+ log_error("Unknown stats command.");
+ _stats_help(stats_cmd, NULL, argc, argv, NULL, multiple_devices);
+ return 0;
+ }
+
+ if (_switches[ALL_PROGRAMS_ARG] && _switches[PROGRAM_ID_ARG]) {
+ log_error("Please supply one of --allprograms and --programid");
+ return 0;
+ }
+
+ if (_switches[ALL_REGIONS_ARG] && _switches[REGION_ID_ARG]) {
+ log_error("Please supply one of --allregions and --regionid");
+ return 0;
+ }
+
+ /*
+ * Pass the sub-command through to allow a single function to be
+ * used to implement several distinct sub-commands (e.g. 'report'
+ * and 'list' share a single implementation.
+ */
+ if (!stats_cmd->fn(stats_cmd, subcommand, argc, argv, NULL,
+ multiple_devices))
+ return 0;
+
+ return 1;
+}
+
static int _process_tree_options(const char *options)
{
const char *s, *end;
@@ -3325,7 +5266,7 @@ static int _loop_table(char *table, size_t tlen, char *file,
sectors = size >> SECTOR_SHIFT;
if (_switches[VERBOSE_ARG])
- fprintf(stderr, "losetup: set loop size to %llukB "
+ fprintf(stderr, LOSETUP_CMD_NAME ": set loop size to %llukB "
"(%llu sectors)\n", (long long unsigned) sectors >> 1,
(long long unsigned) sectors);
@@ -3338,7 +5279,7 @@ static int _loop_table(char *table, size_t tlen, char *file,
#endif
if (close(fd))
- log_sys_error("close", file);
+ log_sys_error("close", file);
if (dm_snprintf(table, tlen, "%llu %llu loop %s %llu\n", 0ULL,
(long long unsigned)sectors, file, (long long unsigned)off) < 0)
@@ -3356,7 +5297,7 @@ error:
return 0;
}
-static int _process_losetup_switches(const char *base, int *argc, char ***argv,
+static int _process_losetup_switches(const char *base, int *argcp, char ***argvp,
const char *dev_dir)
{
int c;
@@ -3373,7 +5314,7 @@ static int _process_losetup_switches(const char *base, int *argc, char ***argv,
optarg = 0;
optind = OPTIND_INIT;
- while ((c = GETOPTLONG_FN(*argc, *argv, "ade:fo:v",
+ while ((c = GETOPTLONG_FN(*argcp, *argvp, "ade:fo:v",
long_options, NULL)) != -1 ) {
if (c == ':' || c == '?')
return 0;
@@ -3391,8 +5332,8 @@ static int _process_losetup_switches(const char *base, int *argc, char ***argv,
_switches[VERBOSE_ARG]++;
}
- *argv += optind ;
- *argc -= optind ;
+ *argvp += optind ;
+ *argcp -= optind ;
if (encrypt_loop){
fprintf(stderr, "%s: Sorry, cryptoloop is not yet implemented "
@@ -3409,44 +5350,44 @@ static int _process_losetup_switches(const char *base, int *argc, char ***argv,
if (find) {
fprintf(stderr, "%s: Sorry, find is not yet implemented "
"in this version.\n", base);
- if (!*argc)
+ if (!*argcp)
return 0;
}
- if (!*argc) {
+ if (!*argcp) {
fprintf(stderr, "%s: Please specify loop_device.\n", base);
- _losetup_usage(stderr);
+ _usage(stderr);
return 0;
}
- if (!(device_name = parse_loop_device_name((*argv)[0], dev_dir))) {
+ if (!(device_name = parse_loop_device_name((*argvp)[0], dev_dir))) {
fprintf(stderr, "%s: Could not parse loop_device %s\n",
- base, (*argv)[0]);
- _losetup_usage(stderr);
+ base, (*argvp)[0]);
+ _usage(stderr);
return 0;
}
if (delete) {
- *argc = 2;
+ *argcp = 1;
- (*argv)[1] = device_name;
- (*argv)[0] = (char *) "remove";
+ (*argvp)[0] = device_name;
+ _command = "remove";
return 1;
}
- if (*argc != 2) {
+ if (*argcp != 2) {
fprintf(stderr, "%s: Too few arguments\n", base);
- _losetup_usage(stderr);
+ _usage(stderr);
dm_free(device_name);
return 0;
}
/* FIXME move these to make them available to native dmsetup */
- if (!(loop_file = _get_abspath((*argv)[(find) ? 0 : 1]))) {
+ if (!(loop_file = _get_abspath((*argvp)[(find) ? 0 : 1]))) {
fprintf(stderr, "%s: Could not parse loop file name %s\n",
- base, (*argv)[1]);
- _losetup_usage(stderr);
+ base, (*argvp)[1]);
+ _usage(stderr);
dm_free(device_name);
return 0;
}
@@ -3454,14 +5395,15 @@ static int _process_losetup_switches(const char *base, int *argc, char ***argv,
_table = dm_malloc(LOOP_TABLE_SIZE);
if (!_table ||
!_loop_table(_table, (size_t) LOOP_TABLE_SIZE, loop_file, device_name, offset)) {
- fprintf(stderr, "Could not build device-mapper table for %s\n", (*argv)[0]);
+ fprintf(stderr, "Could not build device-mapper table for %s\n", (*argvp)[0]);
dm_free(device_name);
return 0;
}
_switches[TABLE_ARG]++;
- (*argv)[0] = (char *) "create";
- (*argv)[1] = device_name ;
+ _command = "create";
+ (*argvp)[0] = device_name ;
+ *argcp = 1;
return 1;
}
@@ -3510,18 +5452,26 @@ static int _process_options(const char *options)
return 1;
}
-static int _process_switches(int *argc, char ***argv, const char *dev_dir)
+static int _process_switches(int *argcp, char ***argvp, const char *dev_dir)
{
const char *base;
char *namebase, *s;
static int ind;
- int c, r;
+ int c, r, i;
#ifdef HAVE_GETOPTLONG
static struct option long_options[] = {
{"readonly", 0, &ind, READ_ONLY},
+ {"alldevices", 0, &ind, ALL_DEVICES_ARG},
+ {"allprograms", 0, &ind, ALL_PROGRAMS_ARG},
+ {"allregions", 0, &ind, ALL_REGIONS_ARG},
+ {"areas", 1, &ind, AREAS_ARG},
+ {"areasize", 1, &ind, AREA_SIZE_ARG},
+ {"auxdata", 1, &ind, AUX_DATA_ARG},
{"checks", 0, &ind, CHECKS_ARG},
+ {"clear", 0, &ind, CLEAR_ARG},
{"columns", 0, &ind, COLS_ARG},
+ {"count", 1, &ind, COUNT_ARG},
{"deferred", 0, &ind, DEFERRED_ARG},
{"select", 1, &ind, SELECT_ARG},
{"exec", 1, &ind, EXEC_ARG},
@@ -3529,6 +5479,8 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
{"gid", 1, &ind, GID_ARG},
{"help", 0, &ind, HELP_ARG},
{"inactive", 0, &ind, INACTIVE_ARG},
+ {"interval", 1, &ind, INTERVAL_ARG},
+ {"length", 1, &ind, LENGTH_ARG},
{"manglename", 1, &ind, MANGLENAME_ARG},
{"major", 1, &ind, MAJOR_ARG},
{"minor", 1, &ind, MINOR_ARG},
@@ -3538,22 +5490,29 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
{"noheadings", 0, &ind, NOHEADINGS_ARG},
{"nolockfs", 0, &ind, NOLOCKFS_ARG},
{"noopencount", 0, &ind, NOOPENCOUNT_ARG},
+ {"nosuffix", 0, &ind, NOSUFFIX_ARG},
{"notable", 0, &ind, NOTABLE_ARG},
{"udevcookie", 1, &ind, UDEVCOOKIE_ARG},
{"noudevrules", 0, &ind, NOUDEVRULES_ARG},
{"noudevsync", 0, &ind, NOUDEVSYNC_ARG},
{"options", 1, &ind, OPTIONS_ARG},
+ {"programid", 1, &ind, PROGRAM_ID_ARG},
+ {"raw", 0, &ind, RAW_ARG},
{"readahead", 1, &ind, READAHEAD_ARG},
+ {"regionid", 1, &ind, REGION_ID_ARG},
{"retry", 0, &ind, RETRY_ARG},
{"rows", 0, &ind, ROWS_ARG},
+ {"segments", 0, &ind, SEGMENTS_ARG},
{"separator", 1, &ind, SEPARATOR_ARG},
{"setuuid", 0, &ind, SETUUID_ARG},
{"showkeys", 0, &ind, SHOWKEYS_ARG},
{"sort", 1, &ind, SORT_ARG},
+ {"start", 1, &ind, START_ARG},
{"table", 1, &ind, TABLE_ARG},
{"target", 1, &ind, TARGET_ARG},
{"tree", 0, &ind, TREE_ARG},
{"uid", 1, &ind, UID_ARG},
+ {"units", 1, &ind, UNITS_ARG},
{"uuid", 1, &ind, UUID_ARG},
{"unbuffered", 0, &ind, UNBUFFERED_ARG},
{"unquoted", 0, &ind, UNQUOTED_ARG},
@@ -3576,14 +5535,25 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
memset(&_int_args, 0, sizeof(_int_args));
_read_ahead_flags = 0;
- if (!(namebase = strdup((*argv)[0]))) {
+ if (!(namebase = strdup((*argvp)[0]))) {
fprintf(stderr, "Failed to duplicate name.\n");
return 0;
}
+
base = dm_basename(namebase);
- if (!strcmp(base, "devmap_name")) {
- free(namebase);
+ i = 0;
+ do {
+ if (!strcmp(base, _base_commands[i].name)) {
+ _base_command = _base_commands[i].command;
+ _base_command_type = _base_commands[i].type;
+ break;
+ }
+ } while (++i < _num_base_commands);
+
+ free(namebase);
+
+ if (_base_command_type == DEVMAP_NAME_TYPE) {
_switches[COLS_ARG]++;
_switches[NOHEADINGS_ARG]++;
_switches[OPTIONS_ARG]++;
@@ -3591,48 +5561,72 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[MINOR_ARG]++;
_string_args[OPTIONS_ARG] = (char *) "name";
- if (*argc == 3) {
- _int_args[MAJOR_ARG] = atoi((*argv)[1]);
- _int_args[MINOR_ARG] = atoi((*argv)[2]);
- *argc -= 2;
- *argv += 2;
- } else if ((*argc == 2) &&
- (2 == sscanf((*argv)[1], "%i:%i",
+ if (*argcp == 3) {
+ _int_args[MAJOR_ARG] = atoi((*argvp)[1]);
+ _int_args[MINOR_ARG] = atoi((*argvp)[2]);
+ *argcp -= 2;
+ *argvp += 2;
+ } else if ((*argcp == 2) &&
+ (2 == sscanf((*argvp)[1], "%i:%i",
&_int_args[MAJOR_ARG],
&_int_args[MINOR_ARG]))) {
- *argc -= 1;
- *argv += 1;
+ *argcp -= 1;
+ *argvp += 1;
} else {
- fprintf(stderr, "Usage: devmap_name <major> <minor>\n");
+ _usage(stderr);
return 0;
}
- (*argv)[0] = (char *) "info";
+ _command = "info";
+ (*argvp)++;
+ (*argcp)--;
+
return 1;
}
- if (!strcmp(base, "losetup") || !strcmp(base, "dmlosetup")){
- r = _process_losetup_switches(base, argc, argv, dev_dir);
- free(namebase);
+ if (_base_command_type == LOSETUP_TYPE) {
+ r = _process_losetup_switches(_base_commands[_base_command].name, argcp, argvp, dev_dir);
return r;
}
- free(namebase);
-
optarg = 0;
optind = OPTIND_INIT;
- while ((ind = -1, c = GETOPTLONG_FN(*argc, *argv, "cCfG:hj:m:M:no:O:rS:u:U:vy",
+ while ((ind = -1, c = GETOPTLONG_FN(*argcp, *argvp, "cCfG:hj:m:M:no:O:rS:u:U:vy",
long_options, NULL)) != -1) {
+ if (ind == ALL_DEVICES_ARG)
+ _switches[ALL_DEVICES_ARG]++;
+ if (ind == ALL_PROGRAMS_ARG)
+ _switches[ALL_PROGRAMS_ARG]++;
+ if (ind == ALL_REGIONS_ARG)
+ _switches[ALL_REGIONS_ARG]++;
+ if (ind == AREAS_ARG) {
+ _switches[AREAS_ARG]++;
+ _int_args[AREAS_ARG] = atoi(optarg);
+ }
+ if (ind == AREA_SIZE_ARG) {
+ _switches[AREA_SIZE_ARG]++;
+ _string_args[AREA_SIZE_ARG] = optarg;
+ }
+ if (ind == AUX_DATA_ARG) {
+ _switches[AUX_DATA_ARG]++;
+ _string_args[AUX_DATA_ARG] = optarg;
+ }
if (c == ':' || c == '?')
return 0;
if (c == 'h' || ind == HELP_ARG)
_switches[HELP_ARG]++;
+ if (ind == CLEAR_ARG)
+ _switches[CLEAR_ARG]++;
if (c == 'c' || c == 'C' || ind == COLS_ARG)
_switches[COLS_ARG]++;
if (c == 'f' || ind == FORCE_ARG)
_switches[FORCE_ARG]++;
if (c == 'r' || ind == READ_ONLY)
_switches[READ_ONLY]++;
+ if (ind == LENGTH_ARG) {
+ _switches[LENGTH_ARG]++;
+ _string_args[LENGTH_ARG] = optarg;
+ }
if (c == 'j' || ind == MAJOR_ARG) {
_switches[MAJOR_ARG]++;
_int_args[MAJOR_ARG] = atoi(optarg);
@@ -3641,16 +5635,32 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[MINOR_ARG]++;
_int_args[MINOR_ARG] = atoi(optarg);
}
+ if (ind == NOSUFFIX_ARG)
+ _switches[NOSUFFIX_ARG]++;
if (c == 'n' || ind == NOTABLE_ARG)
_switches[NOTABLE_ARG]++;
if (c == 'o' || ind == OPTIONS_ARG) {
_switches[OPTIONS_ARG]++;
_string_args[OPTIONS_ARG] = optarg;
}
+ if (ind == PROGRAM_ID_ARG) {
+ _switches[PROGRAM_ID_ARG]++;
+ _string_args[PROGRAM_ID_ARG] = optarg;
+ }
+ if (ind == RAW_ARG)
+ _switches[RAW_ARG]++;
+ if (ind == REGION_ID_ARG) {
+ _switches[REGION_ID_ARG]++;
+ _int_args[REGION_ID_ARG] = atoi(optarg);
+ }
if (ind == SEPARATOR_ARG) {
_switches[SEPARATOR_ARG]++;
_string_args[SEPARATOR_ARG] = optarg;
}
+ if (ind == UNITS_ARG) {
+ _switches[UNITS_ARG]++;
+ _string_args[UNITS_ARG] = optarg;
+ }
if (c == 'O' || ind == SORT_ARG) {
_switches[SORT_ARG]++;
_string_args[SORT_ARG] = optarg;
@@ -3659,6 +5669,10 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[SELECT_ARG]++;
_string_args[SELECT_ARG] = optarg;
}
+ if (ind == START_ARG) {
+ _switches[START_ARG]++;
+ _string_args[START_ARG] = optarg;
+ }
if (c == 'v' || ind == VERBOSE_ARG)
_switches[VERBOSE_ARG]++;
if (c == 'u' || ind == UUID_ARG) {
@@ -3673,6 +5687,14 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[ADD_NODE_ON_CREATE_ARG]++;
if (ind == CHECKS_ARG)
_switches[CHECKS_ARG]++;
+ if (ind == COUNT_ARG) {
+ _switches[COUNT_ARG]++;
+ _int_args[COUNT_ARG] = atoi(optarg);
+ if (_int_args[COUNT_ARG] < 0) {
+ log_error("Count must be zero or greater.");
+ return 0;
+ }
+ }
if (ind == UDEVCOOKIE_ARG) {
_switches[UDEVCOOKIE_ARG]++;
_udev_cookie = _get_cookie_value(optarg);
@@ -3700,14 +5722,24 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[DEFERRED_ARG]++;
if (ind == EXEC_ARG) {
_switches[EXEC_ARG]++;
- _command = optarg;
+ _command_to_exec = optarg;
}
if (ind == TARGET_ARG) {
_switches[TARGET_ARG]++;
_target = optarg;
}
+ if (ind == SEGMENTS_ARG)
+ _switches[SEGMENTS_ARG]++;
if (ind == INACTIVE_ARG)
_switches[INACTIVE_ARG]++;
+ if (ind == INTERVAL_ARG) {
+ _switches[INTERVAL_ARG]++;
+ _int_args[INTERVAL_ARG] = atoi(optarg);
+ if (_int_args[INTERVAL_ARG] <= 0) {
+ log_error("Interval must be a positive integer.");
+ return 0;
+ }
+ }
if (ind == MANGLENAME_ARG) {
_switches[MANGLENAME_ARG]++;
if (!strcasecmp(optarg, "none"))
@@ -3775,8 +5807,17 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
_switches[VERSION_ARG]++;
}
- if (_switches[VERBOSE_ARG] > 1)
+ if (_switches[VERBOSE_ARG] > 1) {
dm_log_init_verbose(_switches[VERBOSE_ARG] - 1);
+ if (_switches[VERBOSE_ARG] > 2) {
+ if (!(_initial_timestamp = dm_timestamp_alloc()))
+ stack;
+ else if (!dm_timestamp_get(_initial_timestamp))
+ stack;
+ else
+ log_debug("Timestamp: 0.000000000 seconds");
+ }
+ }
if ((_switches[MAJOR_ARG] && !_switches[MINOR_ARG]) ||
(!_switches[MAJOR_ARG] && _switches[MINOR_ARG])) {
@@ -3795,16 +5836,51 @@ static int _process_switches(int *argc, char ***argv, const char *dev_dir)
return 0;
}
- *argv += optind;
- *argc -= optind;
+ *argvp += optind;
+ *argcp -= optind;
+
+ if (!*argcp)
+ _command = NULL;
+ else if (!strcmp((*argvp)[0], "stats")) {
+ _base_command = DMSETUP_STATS_CMD;
+ _base_command_type = STATS_TYPE;
+ _command = "stats";
+ (*argvp)++;
+ (*argcp)--;
+ } else if (_base_command == DMSTATS_CMD) {
+ _command = "stats";
+ } else if (*argcp) {
+ _command = (*argvp)[0];
+ (*argvp)++;
+ (*argcp)--;
+ }
+
+ return 1;
+}
+
+static int _perform_command_for_all_repeatable_args(CMD_ARGS)
+{
+ do {
+ if (!cmd->fn(cmd, subcommand, argc, argv++, NULL, multiple_devices)) {
+ fprintf(stderr, "Command failed\n");
+ return 0;
+ }
+ } while (cmd->repeatable_cmd && argc-- > 1);
+
return 1;
}
+static int _do_report_wait(void)
+{
+ return _do_timer_wait();
+}
+
int main(int argc, char **argv)
{
- int r = 1;
+ int ret = 1, r;
const char *dev_dir;
const struct command *cmd;
+ const char *subcommand = NULL;
int multiple_devices;
(void) setlocale(LC_ALL, "");
@@ -3824,31 +5900,45 @@ int main(int argc, char **argv)
}
if (_switches[HELP_ARG]) {
- if ((cmd = _find_command("help")))
- goto doit;
- goto unknown;
+ switch (_base_command_type) {
+ case STATS_TYPE:
+ if ((cmd = _find_stats_subcommand("help")))
+ goto doit;
+ goto unknown;
+ default:
+ if ((cmd = _find_dmsetup_command("help")))
+ goto doit;
+ goto unknown;
+ }
}
if (_switches[VERSION_ARG]) {
- if ((cmd = _find_command("version")))
- goto doit;
- goto unknown;
+ switch (_base_command_type) {
+ case STATS_TYPE:
+ if ((cmd = _find_stats_subcommand("version")))
+ goto doit;
+ goto unknown;
+ default:
+ if ((cmd = _find_dmsetup_command("version")))
+ goto doit;
+ goto unknown;
+ }
}
- if (argc == 0) {
+ if (!_command) {
_usage(stderr);
goto out;
}
- if (!(cmd = _find_command(argv[0]))) {
+ if (!(cmd = _find_dmsetup_command(_command))) {
unknown:
fprintf(stderr, "Unknown command\n");
_usage(stderr);
goto out;
}
- if (argc < cmd->min_args + 1 ||
- (cmd->max_args >= 0 && argc > cmd->max_args + 1)) {
+ if (argc < cmd->min_args ||
+ (cmd->max_args >= 0 && argc > cmd->max_args)) {
fprintf(stderr, "Incorrect number of arguments\n");
_usage(stderr);
goto out;
@@ -3857,6 +5947,14 @@ unknown:
if (!_switches[COLS_ARG] && !strcmp(cmd->name, "splitname"))
_switches[COLS_ARG]++;
+ if (!strcmp(cmd->name, "stats")) {
+ _switches[COLS_ARG]++;
+ if (!_switches[UNITS_ARG]) {
+ _switches[UNITS_ARG]++;
+ _string_args[UNITS_ARG] = (char *) "h";
+ }
+ }
+
if (!strcmp(cmd->name, "mangle"))
dm_set_name_mangling_mode(DM_STRING_MANGLING_NONE);
@@ -3865,43 +5963,81 @@ unknown:
goto out;
}
- if (_switches[COLS_ARG]) {
- if (!_report_init(cmd))
- goto out;
- if (!_report) {
- if (!strcmp(cmd->name, "info"))
- r = 0; /* info -c -o help */
+#ifdef UDEV_SYNC_SUPPORT
+ if (!_set_up_udev_support(dev_dir))
+ goto out;
+#endif
+
+ /*
+ * Extract subcommand?
+ * dmsetup <command> <subcommand> [args...]
+ */
+ if (cmd->has_subcommands) {
+ subcommand = argv[0];
+ argc--, argv++;
+ } else
+ subcommand = (char *) "";
+
+ if (_switches[COLS_ARG] && !_report_init(cmd, subcommand))
+ goto out;
+
+ if (_switches[COUNT_ARG])
+ _count = ((uint32_t)_int_args[COUNT_ARG]) ? : UINT32_MAX;
+ else if (_switches[INTERVAL_ARG])
+ _count = UINT32_MAX;
+
+ if (_switches[UNITS_ARG]) {
+ _disp_factor = _factor_from_units(_string_args[UNITS_ARG],
+ &_disp_units);
+ if (!_disp_factor) {
+ log_error("Invalid --units argument.");
goto out;
}
}
- #ifdef UDEV_SYNC_SUPPORT
- if (!_set_up_udev_support(dev_dir))
- goto out;
- #endif
+ /* Start interval timer. */
+ if (_count > 1)
+ if (!_start_timer())
+ goto_out;
+
+doit:
+ multiple_devices = (cmd->repeatable_cmd && argc != 1 &&
+ (argc || (!_switches[UUID_ARG] && !_switches[MAJOR_ARG])));
- doit:
- multiple_devices = (cmd->repeatable_cmd && argc != 2 &&
- (argc != 1 || (!_switches[UUID_ARG] && !_switches[MAJOR_ARG])));
do {
- if (!cmd->fn(cmd, argc--, argv++, NULL, multiple_devices)) {
- fprintf(stderr, "Command failed\n");
- goto out;
+ r = _perform_command_for_all_repeatable_args(cmd, subcommand, argc, argv, NULL, multiple_devices);
+ if (_report) {
+ /* only output headings for repeating reports */
+ if (_int_args[COUNT_ARG] != 1 && !dm_report_is_empty(_report))
+ dm_report_column_headings(_report);
+ dm_report_output(_report);
+
+ if (_count > 1 && r) {
+ printf("\n");
+ /* wait for --interval and update timestamps */
+ if (!_do_report_wait())
+ goto_out;
+ }
}
- } while (cmd->repeatable_cmd && argc > 1);
- r = 0;
+ if (!r)
+ goto_out;
+ } while (--_count);
+
+ /* Success */
+ ret = 0;
out:
- if (_report) {
- dm_report_output(_report);
+ if (_report)
dm_report_free(_report);
- }
if (_dtree)
dm_tree_free(_dtree);
dm_free(_table);
- return r;
+ if (_initial_timestamp)
+ dm_timestamp_destroy(_initial_timestamp);
+
+ return ret;
}
diff --git a/tools/dumpconfig.c b/tools/dumpconfig.c
index 09f2119e5..c9c5bf9ba 100644
--- a/tools/dumpconfig.c
+++ b/tools/dumpconfig.c
@@ -50,6 +50,9 @@ static int _do_def_check(struct config_def_tree_spec *spec,
handle->check_diff = 0;
}
+ handle->ignoreunsupported = spec->ignoreunsupported;
+ handle->ignoreadvanced = spec->ignoreadvanced;
+
config_def_check(handle);
*cft_check_handle = handle;
@@ -88,7 +91,7 @@ static int _config_validate(struct cmd_context *cmd, struct dm_config_tree *cft)
int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
{
const char *file = arg_str_value(cmd, file_ARG, NULL);
- const char *type = arg_str_value(cmd, configtype_ARG, "current");
+ const char *type = arg_str_value(cmd, configtype_ARG, arg_count(cmd, list_ARG) ? "list" : "current");
struct config_def_tree_spec tree_spec = {0};
struct dm_config_tree *cft = NULL;
struct cft_check_handle *cft_check_handle = NULL;
@@ -102,30 +105,71 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
- if (arg_count(cmd, atversion_ARG) && !arg_count(cmd, configtype_ARG)) {
- log_error("--atversion requires --type");
+ if (arg_count(cmd, configtype_ARG) && arg_count(cmd, list_ARG)) {
+ log_error("Only one of --type and --list permitted.");
+ return EINVALID_CMD_LINE;
+ }
+
+ if (arg_count(cmd, atversion_ARG) && !arg_count(cmd, configtype_ARG) &&
+ !arg_count(cmd, list_ARG)) {
+ log_error("--atversion requires --type or --list");
return EINVALID_CMD_LINE;
}
if (arg_count(cmd, ignoreadvanced_ARG))
tree_spec.ignoreadvanced = 1;
- if (arg_count(cmd, ignoreunsupported_ARG))
+ if (arg_count(cmd, ignoreunsupported_ARG)) {
+ if (arg_count(cmd, showunsupported_ARG)) {
+ log_error("Only one of --ignoreunsupported and --showunsupported permitted.");
+ return EINVALID_CMD_LINE;
+ }
+ tree_spec.ignoreunsupported = 1;
+ } else if (arg_count(cmd, showunsupported_ARG)) {
+ tree_spec.ignoreunsupported = 0;
+ } else if (strcmp(type, "current") && strcmp(type, "diff")) {
+ /*
+ * By default hide unsupported settings
+ * for all display types except "current"
+ * and "diff".
+ */
tree_spec.ignoreunsupported = 1;
+ }
+
+ if (strcmp(type, "current") && strcmp(type, "diff")) {
+ /*
+ * By default hide deprecated settings
+ * for all display types except "current"
+ * and "diff" unless --showdeprecated is set.
+ *
+ * N.B. Deprecated settings are visible if
+ * --atversion is used with a version that
+ * is lower than the version in which the
+ * setting was deprecated.
+ */
+ if (!arg_count(cmd, showdeprecated_ARG))
+ tree_spec.ignoredeprecated = 1;
+ }
- if (!strcmp(type, "current")) {
+ if (arg_count(cmd, ignorelocal_ARG))
+ tree_spec.ignorelocal = 1;
+
+ if (!strcmp(type, "current") || !strcmp(type, "full")) {
if (arg_count(cmd, atversion_ARG)) {
- log_error("--atversion has no effect with --type current");
+ log_error("--atversion has no effect with --type %s", type);
return EINVALID_CMD_LINE;
}
- if ((tree_spec.ignoreadvanced || tree_spec.ignoreunsupported)) {
+ if ((arg_count(cmd, ignoreunsupported_ARG) ||
+ arg_count(cmd, ignoreadvanced_ARG)) &&
+ !strcmp(type, "current")) {
+ /* FIXME: allow these even for --type current */
log_error("--ignoreadvanced and --ignoreunsupported has "
"no effect with --type current");
return EINVALID_CMD_LINE;
}
} else if (arg_count(cmd, mergedconfig_ARG)) {
- log_error("--mergedconfig has no effect without --type current");
+ log_error("--mergedconfig has no effect without --type current or --type full");
return EINVALID_CMD_LINE;
}
@@ -148,7 +192,7 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
* Set the 'cft' to work with based on whether we need the plain
* config tree or merged config tree cascade if --mergedconfig is used.
*/
- if (arg_count(cmd, mergedconfig_ARG) && cmd->cft->cascade) {
+ if ((arg_count(cmd, mergedconfig_ARG) || !strcmp(type, "full")) && cmd->cft->cascade) {
if (!_merge_config_cascade(cmd, cmd->cft, &cft)) {
log_error("Failed to merge configuration.");
r = ECMD_FAILED;
@@ -156,6 +200,7 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
}
} else
cft = cmd->cft;
+ tree_spec.current_cft = cft;
if (arg_count(cmd, validate_ARG)) {
if (_config_validate(cmd, cft)) {
@@ -168,7 +213,20 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
}
}
- if (!strcmp(type, "current")) {
+ if (!strcmp(type, "list") || arg_count(cmd, list_ARG)) {
+ tree_spec.type = CFG_DEF_TREE_LIST;
+ if (arg_count(cmd, withcomments_ARG)) {
+ log_error("--withcomments has no effect with --type list");
+ return EINVALID_CMD_LINE;
+ }
+ /* list type does not require status check */
+ } else if (!strcmp(type, "full")) {
+ tree_spec.type = CFG_DEF_TREE_FULL;
+ if (!_do_def_check(&tree_spec, cft, &cft_check_handle)) {
+ r = ECMD_FAILED;
+ goto_out;
+ }
+ } else if (!strcmp(type, "current")) {
tree_spec.type = CFG_DEF_TREE_CURRENT;
if (!_do_def_check(&tree_spec, cft, &cft_check_handle)) {
r = ECMD_FAILED;
@@ -211,18 +269,25 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
}
else {
log_error("Incorrect type of configuration specified. "
- "Expected one of: current, default, missing, new, "
- "profilable, profilable-command, profilable-metadata.");
+ "Expected one of: current, default, diff, full, list, missing, "
+ "new, profilable, profilable-command, profilable-metadata.");
r = EINVALID_CMD_LINE;
goto out;
}
+ if (arg_count(cmd, withsummary_ARG) || arg_count(cmd, list_ARG))
+ tree_spec.withsummary = 1;
if (arg_count(cmd, withcomments_ARG))
tree_spec.withcomments = 1;
+ if (arg_count(cmd, unconfigured_ARG))
+ tree_spec.unconfigured = 1;
if (arg_count(cmd, withversions_ARG))
tree_spec.withversions = 1;
+ if (arg_count(cmd, withspaces_ARG))
+ tree_spec.withspaces = 1;
+
if (cft_check_handle)
tree_spec.check_status = cft_check_handle->status;
@@ -238,8 +303,17 @@ int dumpconfig(struct cmd_context *cmd, int argc, char **argv)
r = ECMD_FAILED;
}
out:
+ if (tree_spec.current_cft && (tree_spec.current_cft != cft) &&
+ (tree_spec.current_cft != cmd->cft))
+ /*
+ * This happens in case of CFG_DEF_TREE_FULL where we
+ * have merged explicitly defined config trees and also
+ * we have used default tree.
+ */
+ dm_config_destroy(tree_spec.current_cft);
+
if (cft && (cft != cmd->cft))
- dm_pool_destroy(cft->mem);
+ dm_config_destroy(cft);
else if (profile)
remove_config_tree_by_source(cmd, CONFIG_PROFILE_COMMAND);
@@ -250,3 +324,13 @@ out:
return r;
}
+
+int config(struct cmd_context *cmd, int argc, char **argv)
+{
+ return dumpconfig(cmd, argc, argv);
+}
+
+int lvmconfig(struct cmd_context *cmd, int argc, char **argv)
+{
+ return dumpconfig(cmd, argc, argv);
+}
diff --git a/tools/lvchange.c b/tools/lvchange.c
index eab9c8f91..a9d72bf13 100644
--- a/tools/lvchange.c
+++ b/tools/lvchange.c
@@ -14,36 +14,53 @@
*/
#include "tools.h"
+
#include "memlock.h"
-static int lvchange_permission(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_permission(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
uint32_t lv_access;
struct lvinfo info;
+ unsigned info_obtained = 0;
lv_access = arg_uint_value(cmd, permission_ARG, 0);
- if ((lv_access & LVM_WRITE) && (lv->status & LVM_WRITE)) {
- log_error("Logical volume \"%s\" is already writable",
- lv->name);
+ if (lv_is_external_origin(lv)) {
+ log_error("Cannot change permissions of external origin "
+ "\"%s\".", lv->name);
return 0;
}
if (!(lv_access & LVM_WRITE) && !(lv->status & LVM_WRITE)) {
+ /* Refresh if it's read-only in metadata but read-write in kernel */
+ if (lv_info(cmd, lv, 0, &info, 0, 0) &&
+ (info_obtained = 1, info.exists) && !info.read_only) {
+ log_print_unless_silent("Logical volume \"%s\" is already read-only. Refreshing kernel state.",
+ lv->name);
+ return lv_refresh(cmd, lv);
+ }
log_error("Logical volume \"%s\" is already read only",
lv->name);
return 0;
}
- if (lv_is_external_origin(lv)) {
- log_error("Cannot change permissions of external origin "
- "\"%s\".", lv->name);
+ if ((lv_access & LVM_WRITE) && (lv->status & LVM_WRITE)) {
+ /* Refresh if it's read-write in metadata but read-only in kernel */
+ if (lv_info(cmd, lv, 0, &info, 0, 0) &&
+ (info_obtained = 1, info.exists) && info.read_only) {
+ log_print_unless_silent("Logical volume \"%s\" is already writable. Refreshing kernel state.",
+ lv->name);
+ return lv_refresh(cmd, lv);
+ }
+
+ log_error("Logical volume \"%s\" is already writable",
+ lv->name);
return 0;
}
if (lv_is_mirrored(lv) && vg_is_clustered(lv->vg) &&
- lv_info(cmd, lv, 0, &info, 0, 0) && info.exists) {
+ (info_obtained || lv_info(cmd, lv, 0, &info, 0, 0)) && info.exists) {
log_error("Cannot change permissions of mirror \"%s\" "
"while active.", lv->name);
return 0;
@@ -79,8 +96,8 @@ static int lvchange_permission(struct cmd_context *cmd,
return 1;
}
-static int lvchange_pool_update(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_pool_update(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
int update = 0;
unsigned val;
@@ -126,8 +143,8 @@ static int lvchange_pool_update(struct cmd_context *cmd,
return 1;
}
-static int lvchange_monitoring(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_monitoring(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
struct lvinfo info;
@@ -148,8 +165,8 @@ static int lvchange_monitoring(struct cmd_context *cmd,
return 1;
}
-static int lvchange_background_polling(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_background_polling(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
struct lvinfo info;
@@ -170,6 +187,19 @@ static int _lvchange_activate(struct cmd_context *cmd, struct logical_volume *lv
activate = (activation_change_t) arg_uint_value(cmd, activate_ARG, CHANGE_AY);
+ /*
+ * We can get here in the odd case where an LV is already active in
+ * a foreign VG, which allows the VG to be accessed by lvchange -a
+ * so the LV can be deactivated.
+ */
+ if (lv->vg->system_id && lv->vg->system_id[0] &&
+ cmd->system_id && cmd->system_id[0] &&
+ strcmp(lv->vg->system_id, cmd->system_id) &&
+ is_change_activating(activate)) {
+ log_error("Cannot activate LVs in a foreign VG.");
+ return ECMD_FAILED;
+ }
+
if (lv_activation_skip(lv, activate, arg_count(cmd, ignoreactivationskip_ARG)))
return 1;
@@ -183,6 +213,19 @@ static int _lvchange_activate(struct cmd_context *cmd, struct logical_volume *lv
if (!lv_change_activate(cmd, lv, activate))
return_0;
+ /*
+ * FIXME: lvchange should defer background polling in a similar
+ * way as vgchange does. First activate all relevant LVs
+ * initate background polling later (for all actually
+ * activated LVs). So we can avoid duplicate background
+ * polling for pvmove (2 or more locked LVs on single pvmove
+ * LV)
+ */
+ if (background_polling() && is_change_activating(activate) &&
+ (lv_is_pvmove(lv) || lv_is_locked(lv) || lv_is_converting(lv) ||
+ lv_is_merging(lv)))
+ lv_spawn_background_polling(cmd, lv);
+
return 1;
}
@@ -245,7 +288,7 @@ static int attach_metadata_devices(struct lv_segment *seg, struct dm_list *list)
*
* Suspend and resume a logical volume.
*/
-static int lvchange_refresh(struct cmd_context *cmd, struct logical_volume *lv)
+static int _lvchange_refresh(struct cmd_context *cmd, struct logical_volume *lv)
{
log_verbose("Refreshing logical volume \"%s\" (if active)", lv->name);
@@ -273,7 +316,7 @@ static int _reactivate_lv(struct logical_volume *lv,
*
* Force a mirror or RAID array to undergo a complete initializing resync.
*/
-static int lvchange_resync(struct cmd_context *cmd, struct logical_volume *lv)
+static int _lvchange_resync(struct cmd_context *cmd, struct logical_volume *lv)
{
int active = 0;
int exclusive = 0;
@@ -373,7 +416,7 @@ static int lvchange_resync(struct cmd_context *cmd, struct logical_volume *lv)
if (!_reactivate_lv(lv, active, exclusive)) {
log_error("Failed to reactivate %s to resynchronize "
- "mirror", lv->name);
+ SEG_TYPE_NAME_MIRROR, lv->name);
return 0;
}
@@ -430,7 +473,12 @@ static int lvchange_resync(struct cmd_context *cmd, struct logical_volume *lv)
}
}
- sync_local_dev_names(lv->vg->cmd); /* Wait until devices are away */
+ /* Wait until devices are away */
+ if (!sync_local_dev_names(lv->vg->cmd)) {
+ log_error("Failed to sync local devices after updating %s",
+ display_lvname(lv));
+ return 0;
+ }
/* Put metadata sub-LVs back in place */
if (!attach_metadata_devices(seg, &device_list)) {
@@ -456,7 +504,7 @@ static int lvchange_resync(struct cmd_context *cmd, struct logical_volume *lv)
return 1;
}
-static int lvchange_alloc(struct cmd_context *cmd, struct logical_volume *lv)
+static int _lvchange_alloc(struct cmd_context *cmd, struct logical_volume *lv)
{
int want_contiguous = arg_int_value(cmd, contiguous_ARG, 0);
alloc_policy_t alloc = (alloc_policy_t)
@@ -487,8 +535,30 @@ static int lvchange_alloc(struct cmd_context *cmd, struct logical_volume *lv)
return 1;
}
-static int lvchange_readahead(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_errorwhenfull(struct cmd_context *cmd,
+ struct logical_volume *lv)
+{
+ unsigned ewf = arg_int_value(cmd, errorwhenfull_ARG, 0);
+
+ if (ewf == lv_is_error_when_full(lv)) {
+ log_error("Error when full is already %sset for %s.",
+ (ewf) ? "" : "un", display_lvname(lv));
+ return 0;
+ }
+
+ if (ewf)
+ lv->status |= LV_ERROR_WHEN_FULL;
+ else
+ lv->status &= ~LV_ERROR_WHEN_FULL;
+
+ if (!lv_update_and_reload(lv))
+ return_0;
+
+ return 1;
+}
+
+static int _lvchange_readahead(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
unsigned read_ahead = 0;
unsigned pagesize = (unsigned) lvm_getpagesize() >> SECTOR_SHIFT;
@@ -532,11 +602,14 @@ static int lvchange_readahead(struct cmd_context *cmd,
return 1;
}
-static int lvchange_persistent(struct cmd_context *cmd,
- struct logical_volume *lv)
+static int _lvchange_persistent(struct cmd_context *cmd,
+ struct logical_volume *lv)
{
enum activation_change activate = CHANGE_AN;
+ /* The LV lock in lvmlockd should remain as it is. */
+ cmd->lockd_lv_disable = 1;
+
if (!get_and_validate_major_minor(cmd, lv->vg->fid->fmt,
&lv->major, &lv->minor))
return_0;
@@ -605,31 +678,33 @@ static int lvchange_persistent(struct cmd_context *cmd,
return 1;
}
-static int lvchange_cachepolicy(struct cmd_context *cmd, struct logical_volume *lv)
+static int _lvchange_cachepolicy(struct cmd_context *cmd, struct logical_volume *lv)
{
- struct dm_config_tree *policy = NULL;
+ const char *name;
+ struct dm_config_tree *settings = NULL;
int r = 0;
if (!lv_is_cache(lv) && !lv_is_cache_pool(lv)) {
log_error("LV %s is not a cache LV.", lv->name);
log_error("Only cache or cache pool devices can have --cachepolicy set.");
- goto_out;
+ goto out;
}
- if (!(policy = get_cachepolicy_params(cmd)))
+ if (!get_cache_params(cmd, NULL, &name, &settings))
goto_out;
- if (!lv_cache_setpolicy(lv, policy))
+ if (!cache_set_policy(first_seg(lv), name, settings))
goto_out;
if (!lv_update_and_reload(lv))
goto_out;
r = 1;
out:
- if (policy)
- dm_config_destroy(policy);
+ if (settings)
+ dm_config_destroy(settings);
+
return r;
}
-static int lvchange_tag(struct cmd_context *cmd, struct logical_volume *lv, int arg)
+static int _lvchange_tag(struct cmd_context *cmd, struct logical_volume *lv, int arg)
{
if (!change_tag(cmd, NULL, lv, NULL, arg))
return_0;
@@ -645,7 +720,7 @@ static int lvchange_tag(struct cmd_context *cmd, struct logical_volume *lv, int
return 1;
}
-static int lvchange_writemostly(struct logical_volume *lv)
+static int _lvchange_writemostly(struct logical_volume *lv)
{
int s, pv_count, i = 0;
char **pv_names;
@@ -745,7 +820,7 @@ static int lvchange_writemostly(struct logical_volume *lv)
return 1;
}
-static int lvchange_recovery_rate(struct logical_volume *lv)
+static int _lvchange_recovery_rate(struct logical_volume *lv)
{
struct cmd_context *cmd = lv->vg->cmd;
struct lv_segment *raid_seg = first_seg(lv);
@@ -776,7 +851,7 @@ static int lvchange_recovery_rate(struct logical_volume *lv)
return 1;
}
-static int lvchange_profile(struct logical_volume *lv)
+static int _lvchange_profile(struct logical_volume *lv)
{
const char *old_profile_name, *new_profile_name;
struct profile *new_profile;
@@ -807,7 +882,7 @@ static int lvchange_profile(struct logical_volume *lv)
return 1;
}
-static int lvchange_activation_skip(struct logical_volume *lv)
+static int _lvchange_activation_skip(struct logical_volume *lv)
{
int skip = arg_int_value(lv->vg->cmd, setactivationskip_ARG, 0);
@@ -826,7 +901,7 @@ static int lvchange_activation_skip(struct logical_volume *lv)
static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
int doit = 0, docmds = 0;
struct logical_volume *origin;
@@ -883,7 +958,11 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
!arg_count(cmd, setactivationskip_ARG))
/* Rest can be changed for stacked thin pool meta/data volumes */
;
+#if 1
+ else if (!lv_is_raid(lv) && !lv_is_visible(lv) && !lv_is_virtual_origin(lv)) {
+#else
else if (!lv_is_visible(lv) && !lv_is_virtual_origin(lv)) {
+#endif
log_error("Unable to change internal LV %s directly",
lv->name);
return ECMD_FAILED;
@@ -910,6 +989,32 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
}
}
+ if (arg_is_set(cmd, errorwhenfull_ARG) && !lv_is_thin_pool(lv)) {
+ log_error("Option --errorwhenfull is only supported with thin pools.");
+ return ECMD_FAILED;
+ }
+
+ if (arg_count(cmd, persistent_ARG) && lv_is_pool(lv)) {
+ log_error("Persistent major and minor numbers are not supported with pools.");
+ return ECMD_FAILED;
+ }
+
+ if (!arg_count(cmd, activate_ARG) && !arg_count(cmd, refresh_ARG)) {
+ /*
+ * If a persistent lv lock already exists from activation
+ * (with the needed mode or higher), this will be a no-op.
+ * Otherwise, the lv lock will be taken as non-persistent
+ * and released when this command exits.
+ *
+ * FIXME: use "sh" if the options imply that the lvchange
+ * operation does not modify the LV.
+ */
+ if (!lockd_lv(cmd, lv, "ex", 0)) {
+ stack;
+ return ECMD_FAILED;
+ }
+ }
+
/*
* FIXME: DEFAULT_BACKGROUND_POLLING should be "unspecified".
* If --poll is explicitly provided use it; otherwise polling
@@ -927,7 +1032,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, permission_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_permission(cmd, lv);
+ doit += _lvchange_permission(cmd, lv);
docmds++;
}
@@ -935,7 +1040,15 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, contiguous_ARG) || arg_count(cmd, alloc_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_alloc(cmd, lv);
+ doit += _lvchange_alloc(cmd, lv);
+ docmds++;
+ }
+
+ /* error when full change */
+ if (arg_count(cmd, errorwhenfull_ARG)) {
+ if (!archive(lv->vg))
+ return_ECMD_FAILED;
+ doit += _lvchange_errorwhenfull(cmd, lv);
docmds++;
}
@@ -943,7 +1056,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, readahead_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_readahead(cmd, lv);
+ doit += _lvchange_readahead(cmd, lv);
docmds++;
}
@@ -951,7 +1064,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, persistent_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_persistent(cmd, lv);
+ doit += _lvchange_persistent(cmd, lv);
docmds++;
if (sigint_caught())
return_ECMD_FAILED;
@@ -961,7 +1074,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
arg_count(cmd, zero_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_pool_update(cmd, lv);
+ doit += _lvchange_pool_update(cmd, lv);
docmds++;
}
@@ -969,7 +1082,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, addtag_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_tag(cmd, lv, addtag_ARG);
+ doit += _lvchange_tag(cmd, lv, addtag_ARG);
docmds++;
}
@@ -977,7 +1090,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, deltag_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_tag(cmd, lv, deltag_ARG);
+ doit += _lvchange_tag(cmd, lv, deltag_ARG);
docmds++;
}
@@ -985,7 +1098,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, writemostly_ARG) || arg_count(cmd, writebehind_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_writemostly(lv);
+ doit += _lvchange_writemostly(lv);
docmds++;
}
@@ -994,7 +1107,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
arg_count(cmd, maxrecoveryrate_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_recovery_rate(lv);
+ doit += _lvchange_recovery_rate(lv);
docmds++;
}
@@ -1003,21 +1116,21 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
arg_count(cmd, detachprofile_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_profile(lv);
+ doit += _lvchange_profile(lv);
docmds++;
}
if (arg_count(cmd, setactivationskip_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_activation_skip(lv);
+ doit += _lvchange_activation_skip(lv);
docmds++;
}
if (arg_count(cmd, cachepolicy_ARG) || arg_count(cmd, cachesettings_ARG)) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- doit += lvchange_cachepolicy(cmd, lv);
+ doit += _lvchange_cachepolicy(cmd, lv);
docmds++;
}
@@ -1025,7 +1138,7 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
log_print_unless_silent("Logical volume \"%s\" changed.", lv->name);
if (arg_count(cmd, resync_ARG) &&
- !lvchange_resync(cmd, lv))
+ !_lvchange_resync(cmd, lv))
return_ECMD_FAILED;
if (arg_count(cmd, syncaction_ARG) &&
@@ -1037,15 +1150,15 @@ static int _lvchange_single(struct cmd_context *cmd, struct logical_volume *lv,
if (!_lvchange_activate(cmd, lv))
return_ECMD_FAILED;
} else if (arg_count(cmd, refresh_ARG)) {
- if (!lvchange_refresh(cmd, lv))
+ if (!_lvchange_refresh(cmd, lv))
return_ECMD_FAILED;
} else {
if (arg_count(cmd, monitor_ARG) &&
- !lvchange_monitoring(cmd, lv))
+ !_lvchange_monitoring(cmd, lv))
return_ECMD_FAILED;
if (arg_count(cmd, poll_ARG) &&
- !lvchange_background_polling(cmd, lv))
+ !_lvchange_background_polling(cmd, lv))
return_ECMD_FAILED;
}
@@ -1076,6 +1189,7 @@ int lvchange(struct cmd_context *cmd, int argc, char **argv)
int update_partial_unsafe =
arg_count(cmd, alloc_ARG) ||
arg_count(cmd, discards_ARG) ||
+ arg_count(cmd, errorwhenfull_ARG) ||
arg_count(cmd, minrecoveryrate_ARG) ||
arg_count(cmd, maxrecoveryrate_ARG) ||
arg_count(cmd, resync_ARG) ||
@@ -1088,8 +1202,8 @@ int lvchange(struct cmd_context *cmd, int argc, char **argv)
int update = update_partial_safe || update_partial_unsafe;
if (!update &&
- !arg_count(cmd, activate_ARG) && !arg_count(cmd, refresh_ARG) &&
- !arg_count(cmd, monitor_ARG) && !arg_count(cmd, poll_ARG)) {
+ !arg_count(cmd, activate_ARG) && !arg_count(cmd, refresh_ARG) &&
+ !arg_count(cmd, monitor_ARG) && !arg_count(cmd, poll_ARG)) {
log_error("Need 1 or more of -a, -C, -M, -p, -r, -Z, "
"--resync, --refresh, --alloc, --addtag, --deltag, "
"--monitor, --poll or --discards");
@@ -1116,8 +1230,8 @@ int lvchange(struct cmd_context *cmd, int argc, char **argv)
if (!update || !update_partial_unsafe)
cmd->handles_missing_pvs = 1;
- if (!argc) {
- log_error("Please give logical volume path(s)");
+ if (!argc && !arg_is_set(cmd, select_ARG)) {
+ log_error("Please give logical volume path(s) or use --select for selection.");
return EINVALID_CMD_LINE;
}
@@ -1171,6 +1285,26 @@ int lvchange(struct cmd_context *cmd, int argc, char **argv)
}
}
+ /*
+ * Include foreign VGs that contain active LVs.
+ * That shouldn't happen in general, but if it does by some
+ * mistake, then we want to allow those LVs to be deactivated.
+ */
+ if (arg_is_set(cmd, activate_ARG))
+ cmd->include_active_foreign_vgs = 1;
+
+ /*
+ * The default vg lock mode for lvchange is ex, but these options
+ * are cases where lvchange does not modify the vg, so they can use
+ * the sh lock mode.
+ */
+ if (arg_count(cmd, activate_ARG) || arg_count(cmd, refresh_ARG)) {
+ cmd->lockd_vg_default_sh = 1;
+ /* Allow deactivating if locks fail. */
+ if (is_change_activating((activation_change_t)arg_uint_value(cmd, activate_ARG, CHANGE_AY)))
+ cmd->lockd_vg_enforce_sh = 1;
+ }
+
return process_each_lv(cmd, argc, argv,
update ? READ_FOR_UPDATE : 0, NULL,
&_lvchange_single);
diff --git a/tools/lvconvert.c b/tools/lvconvert.c
index 49196cb9c..51d459417 100644
--- a/tools/lvconvert.c
+++ b/tools/lvconvert.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -13,17 +13,10 @@
*/
#include "tools.h"
+
#include "polldaemon.h"
#include "lv_alloc.h"
-
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
+#include "lvconvert_poll.h"
struct lvconvert_params {
int cache;
@@ -57,7 +50,9 @@ struct lvconvert_params {
uint32_t stripes;
uint32_t stripe_size;
uint32_t read_ahead;
- uint64_t feature_flags; /* cache_pool */
+ const char *cache_mode; /* cache */
+ const char *policy_name; /* cache */
+ struct dm_config_tree *policy_settings; /* cache */
const struct segment_type *segtype;
unsigned target_attr;
@@ -73,6 +68,7 @@ struct lvconvert_params {
struct dm_list *replace_pvh;
struct logical_volume *lv_to_poll;
+ struct dm_list idls;
uint32_t pool_metadata_extents;
int passed_args;
@@ -85,6 +81,13 @@ struct lvconvert_params {
thin_discards_t discards;
};
+struct convert_poll_id_list {
+ struct dm_list list;
+ struct poll_operation_id *id;
+ unsigned is_merging_origin:1;
+ unsigned is_merging_origin_thin:1;
+};
+
static int _lvconvert_validate_names(struct lvconvert_params *lp)
{
int i, j;
@@ -237,22 +240,11 @@ static int _check_conversion_type(struct cmd_context *cmd, const char *type_str)
if (!type_str || !*type_str)
return 1;
-#if 0
- /* HM FIXME: TESTME to convert from raid1 to mirror */
- if (!strcmp(type_str, "mirror")) {
- if (!arg_count(cmd, mirrors_ARG)) {
- log_error("--type mirror requires -m/--mirrors");
- return 0;
- }
- return 1;
- }
-#endif
-
/* FIXME: Check thin-pool and thin more thoroughly! */
if (!strcmp(type_str, "snapshot") ||
!strcmp(type_str, "linear") ||
!strcmp(type_str, "striped") ||
- !strcmp(type_str, "mirror") || /* HM FIXME: TESTME to convert raid1 to mirror */
+ !strcmp(type_str, SEG_TYPE_NAME_MIRROR) ||
!strncmp(type_str, "raid", 4) ||
!strcmp(type_str, "cache-pool") || !strcmp(type_str, "cache") ||
!strcmp(type_str, "thin-pool") || !strcmp(type_str, "thin"))
@@ -269,8 +261,9 @@ static int _snapshot_type_requested(struct cmd_context *cmd, const char *type_st
/* mirror/raid* (1,10,4,5,6 and their variants) reshape */
static int _mirror_or_raid_type_requested(struct cmd_context *cmd, const char *type_str) {
return (arg_count(cmd, mirrors_ARG) ||
+ arg_count(cmd, regionsize_ARG) ||
!strncmp(type_str, "raid", 4) ||
- !strcmp(type_str, "mirror"));
+ !strcmp(type_str, SEG_TYPE_NAME_MIRROR));
}
static int _read_pool_params(struct cmd_context *cmd, int *pargc, char ***pargv,
@@ -304,21 +297,14 @@ static int _read_pool_params(struct cmd_context *cmd, int *pargc, char ***pargv,
} else if (!strcmp(type_str, "thin-pool"))
thinpool = 1;
- if (cachepool) {
- const char *cachemode = arg_str_value(cmd, cachemode_ARG, NULL);
- if (!cachemode)
- cachemode = find_config_tree_str(cmd, allocation_cache_pool_cachemode_CFG, NULL);
-
- if (!set_cache_pool_feature(&lp->feature_flags, cachemode))
- return_0;
- } else {
- if (arg_from_list_is_set(cmd, "is valid only with cache pools",
- cachepool_ARG, cachemode_ARG, -1))
- return_0;
- if (lp->cache) {
- log_error("--cache requires --cachepool.");
- return 0;
- }
+ if (lp->cache && !cachepool) {
+ log_error("--cache requires --cachepool.");
+ return 0;
+ }
+ if ((lp->cache || cachepool) &&
+ !get_cache_params(cmd, &lp->cache_mode, &lp->policy_name, &lp->policy_settings)) {
+ log_error("Failed to parse cache policy and/or settings.");
+ return 0;
}
if (thinpool) {
@@ -329,7 +315,8 @@ static int _read_pool_params(struct cmd_context *cmd, int *pargc, char ***pargv,
discards_ARG, originname_ARG, thinpool_ARG,
zero_ARG, -1))
return_0;
- if (lp->thin) {
+ if (lp->thin &&
+ !arg_count(cmd, duplicate_ARG)) {
log_error("--thin requires --thinpool.");
return 0;
}
@@ -393,7 +380,6 @@ static int _read_params(struct cmd_context *cmd, int argc, char **argv,
if (!_check_conversion_type(cmd, type_str))
return_0;
-
#if 1
/* FIXME: TESTME */
if (arg_count(cmd, type_ARG) &&
@@ -539,14 +525,35 @@ static int _read_params(struct cmd_context *cmd, int argc, char **argv,
lp->keep_mimages = 1;
lp->mirrors = arg_uint_value(cmd, splitmirrors_ARG, 0);
lp->mirrors_sign = SIGN_MINUS;
- } else if (arg_count(cmd, name_ARG)) {
+ /* --unduplicate given -> check for sub lv name or mirrors/stripes/stripe_size/region_size provided */
+ } else if (arg_count(cmd, unduplicate_ARG)) {
+ int ac = arg_count(cmd, mirrors_ARG) +
+ arg_count(cmd, stripes_ARG) +
+ arg_count(cmd, stripesize_ARG) +
+ arg_count(cmd, regionsize_ARG);
+
+PFL();
+ if (arg_count(cmd, name_ARG)) {
+ if (ac) {
+ log_error("Can't provide any mirrors/stripes/stripesize/regionsize option with --name");
+ return 0;
+ }
+
+ lp->lv_split_name = arg_str_value(cmd, name_ARG, NULL);
+PFLA("lp->lv_split_name=%s", lp->lv_split_name);
+
+ }
+
+ } else if (arg_count(cmd, name_ARG) &&
+ !arg_count(cmd, duplicate_ARG)) {
log_error("The 'name' argument is only valid"
" with --splitmirrors");
return 0;
}
if (arg_count(cmd, merge_ARG)) {
- if ((argc == 1) && strstr(argv[0], "_rimage_"))
+ if ((argc == 1) &&
+ (strstr(argv[0], "_rimage_") || strstr(argv[0], "_dup_")))
lp->merge_mirror = 1;
else
lp->merge = 1;
@@ -725,253 +732,100 @@ static int _read_params(struct cmd_context *cmd, int argc, char **argv,
return 1;
}
-static struct volume_group *_get_lvconvert_vg(struct cmd_context *cmd,
- const char *name,
- const char *uuid __attribute__((unused)))
-{
- dev_close_all();
-
- if (name && !strchr(name, '/'))
- return vg_read_for_update(cmd, name, NULL, 0);
-
- /* 'name' is the full LV name; must extract_vgname() */
- return vg_read_for_update(cmd, extract_vgname(cmd, name),
- NULL, 0);
-}
-
-static struct logical_volume *_get_lvconvert_lv(struct cmd_context *cmd __attribute__((unused)),
- struct volume_group *vg,
- const char *name,
- const char *uuid,
- uint64_t lv_type __attribute__((unused)))
-{
- struct logical_volume *lv = find_lv(vg, name);
-
- if (!lv || (uuid && strcmp(uuid, (char *)&lv->lvid)))
- return NULL;
-
- return lv;
-}
-
-static int _finish_lvconvert_mirror(struct cmd_context *cmd,
- struct volume_group *vg,
- struct logical_volume *lv,
- struct dm_list *lvs_changed __attribute__((unused)))
-{
- if (!lv_is_converting(lv))
- return 1;
-
- if (!collapse_mirrored_lv(lv)) {
- log_error("Failed to remove temporary sync layer.");
- return 0;
- }
-
- lv->status &= ~CONVERTING;
-
- if (!lv_update_and_reload(lv))
- return_0;
+static struct poll_functions _lvconvert_mirror_fns = {
+ .poll_progress = poll_mirror_progress,
+ .finish_copy = lvconvert_mirror_finish,
+};
- log_print_unless_silent("Logical volume %s converted.", lv->name);
+static struct poll_functions _lvconvert_merge_fns = {
+ .poll_progress = poll_merge_progress,
+ .finish_copy = lvconvert_merge_finish,
+};
- return 1;
-}
+static struct poll_functions _lvconvert_thin_merge_fns = {
+ .poll_progress = poll_thin_merge_progress,
+ .finish_copy = lvconvert_merge_finish,
+};
-/* Swap lvid and LV names */
-static int _swap_lv_identifiers(struct cmd_context *cmd,
- struct logical_volume *a, struct logical_volume *b)
+static void _destroy_id(struct cmd_context *cmd, struct poll_operation_id *id)
{
- union lvid lvid;
- const char *name;
-
- lvid = a->lvid;
- a->lvid = b->lvid;
- b->lvid = lvid;
-
- name = a->name;
- a->name = b->name;
- if (!lv_rename_update(cmd, b, name, 0))
- return_0;
-
- return 1;
-}
+ if (!id)
+ return;
-static void _move_lv_attributes(struct logical_volume *to, struct logical_volume *from)
-{
- /* Maybe move this code into _finish_thin_merge() */
- to->status = from->status; // FIXME maybe some masking ?
- to->alloc = from->alloc;
- to->profile = from->profile;
- to->read_ahead = from->read_ahead;
- to->major = from->major;
- to->minor = from->minor;
- to->timestamp = from->timestamp;
- to->hostname = from->hostname;
-
- /* Move tags */
- dm_list_init(&to->tags);
- dm_list_splice(&to->tags, &from->tags);
-
- /* Anything else to preserve? */
+ dm_pool_free(cmd->mem, (void *)id);
}
-/* Finalise merging of lv into merge_lv */
-static int _finish_thin_merge(struct cmd_context *cmd,
- struct logical_volume *merge_lv,
- struct logical_volume *lv)
+static struct poll_operation_id *_create_id(struct cmd_context *cmd,
+ const char *vg_name,
+ const char *lv_name,
+ const char *uuid)
{
- if (!_swap_lv_identifiers(cmd, merge_lv, lv)) {
- log_error("Failed to swap %s with merging %s.",
- lv->name, merge_lv->name);
- return 0;
+ char lv_full_name[NAME_LEN];
+ struct poll_operation_id *id = dm_pool_alloc(cmd->mem, sizeof(struct poll_operation_id));
+ if (!id) {
+ log_error("Poll operation ID allocation failed.");
+ return NULL;
}
- /* Preserve origins' attributes */
- _move_lv_attributes(lv, merge_lv);
-
- /* Removed LV has to be visible */
- if (!lv_remove_single(cmd, merge_lv, DONT_PROMPT, 1))
- return_0;
-
- return 1;
-}
-
-static int _finish_lvconvert_merge(struct cmd_context *cmd,
- struct volume_group *vg,
- struct logical_volume *lv,
- struct dm_list *lvs_changed __attribute__((unused)))
-{
- struct lv_segment *snap_seg = find_snapshot(lv);
-
- if (!lv_is_merging_origin(lv)) {
- log_error("Logical volume %s has no merging snapshot.", lv->name);
- return 0;
+ if (dm_snprintf(lv_full_name, sizeof(lv_full_name), "%s/%s", vg_name, lv_name) < 0) {
+ log_error(INTERNAL_ERROR "Name \"%s/%s\" is too long.", vg_name, lv_name);
+ _destroy_id(cmd, id);
+ return NULL;
}
- log_print_unless_silent("Merge of snapshot into logical volume %s has finished.", lv->name);
-
- if (seg_is_thin_volume(snap_seg)) {
- clear_snapshot_merge(lv);
+ id->display_name = dm_pool_strdup(cmd->mem, lv_full_name);
+ id->vg_name = vg_name ? dm_pool_strdup(cmd->mem, vg_name) : NULL;
+ id->lv_name = id->display_name ? strchr(id->display_name, '/') + 1 : NULL;
+ id->uuid = uuid ? dm_pool_strdup(cmd->mem, uuid) : NULL;
- if (!_finish_thin_merge(cmd, lv, snap_seg->lv))
- return_0;
-
- } else if (!lv_remove_single(cmd, snap_seg->cow, DONT_PROMPT, 0)) {
- log_error("Could not remove snapshot %s merged into %s.",
- snap_seg->cow->name, lv->name);
- return 0;
+ if (!id->vg_name || !id->lv_name || !id->display_name || !id->uuid) {
+ log_error("Failed to copy one or more poll operation ID members.");
+ _destroy_id(cmd, id);
+ id = NULL;
}
- return 1;
+ return id;
}
-static progress_t _poll_merge_progress(struct cmd_context *cmd,
- struct logical_volume *lv,
- const char *name __attribute__((unused)),
- struct daemon_parms *parms)
+static int _lvconvert_poll_by_id(struct cmd_context *cmd, struct poll_operation_id *id,
+ unsigned background,
+ int is_merging_origin,
+ int is_merging_origin_thin)
{
- dm_percent_t percent = DM_PERCENT_0;
-
- if (!lv_is_merging_origin(lv) ||
- !lv_snapshot_percent(lv, &percent)) {
- log_error("%s: Failed query for merging percentage. Aborting merge.", lv->name);
- return PROGRESS_CHECK_FAILED;
- } else if (percent == DM_PERCENT_INVALID) {
- log_error("%s: Merging snapshot invalidated. Aborting merge.", lv->name);
- return PROGRESS_CHECK_FAILED;
- } else if (percent == LVM_PERCENT_MERGE_FAILED) {
- log_error("%s: Merge failed. Retry merge or inspect manually.", lv->name);
- return PROGRESS_CHECK_FAILED;
- }
-
- if (parms->progress_display)
- log_print_unless_silent("%s: %s: %.1f%%", lv->name, parms->progress_title,
- 100.0 - dm_percent_to_float(percent));
+ if (is_merging_origin)
+ return poll_daemon(cmd, background,
+ (MERGING | (is_merging_origin_thin ? THIN_VOLUME : SNAPSHOT)),
+ is_merging_origin_thin ? &_lvconvert_thin_merge_fns : &_lvconvert_merge_fns,
+ "Merged", id);
else
- log_verbose("%s: %s: %.1f%%", lv->name, parms->progress_title,
- 100.0 - dm_percent_to_float(percent));
-
- if (percent == DM_PERCENT_0)
- return PROGRESS_FINISHED_ALL;
-
- return PROGRESS_UNFINISHED;
+ return poll_daemon(cmd, background, CONVERTING,
+ &_lvconvert_mirror_fns, "Converted", id);
}
-static progress_t _poll_thin_merge_progress(struct cmd_context *cmd,
- struct logical_volume *lv,
- const char *name __attribute__((unused)),
- struct daemon_parms *parms)
-{
- uint32_t device_id;
-
- if (!lv_thin_device_id(lv, &device_id)) {
- stack;
- return PROGRESS_CHECK_FAILED;
- }
-
- /*
- * There is no need to poll more than once,
- * a thin snapshot merge is immediate.
- */
-
- if (device_id != find_snapshot(lv)->device_id) {
- log_error("LV %s is not merged.", lv->name);
- return PROGRESS_CHECK_FAILED;
- }
-
- return PROGRESS_FINISHED_ALL; /* Merging happend */
-}
-
-static struct poll_functions _lvconvert_mirror_fns = {
- .get_copy_vg = _get_lvconvert_vg,
- .get_copy_lv = _get_lvconvert_lv,
- .poll_progress = poll_mirror_progress,
- .finish_copy = _finish_lvconvert_mirror,
-};
-
-static struct poll_functions _lvconvert_merge_fns = {
- .get_copy_vg = _get_lvconvert_vg,
- .get_copy_lv = _get_lvconvert_lv,
- .poll_progress = _poll_merge_progress,
- .finish_copy = _finish_lvconvert_merge,
-};
-
-static struct poll_functions _lvconvert_thin_merge_fns = {
- .get_copy_vg = _get_lvconvert_vg,
- .get_copy_lv = _get_lvconvert_lv,
- .poll_progress = _poll_thin_merge_progress,
- .finish_copy = _finish_lvconvert_merge,
-};
-
int lvconvert_poll(struct cmd_context *cmd, struct logical_volume *lv,
unsigned background)
{
- /*
- * FIXME allocate an "object key" structure with split
- * out members (vg_name, lv_name, uuid, etc) and pass that
- * around the lvconvert and polldaemon code
- * - will avoid needless work, e.g. extract_vgname()
- * - unfortunately there are enough overloaded "name" dragons in
- * the polldaemon, lvconvert, pvmove code that a comprehensive
- * audit/rework is needed
- */
- char uuid[sizeof(lv->lvid)];
- char lv_full_name[NAME_LEN];
+ int r;
+ struct poll_operation_id *id = _create_id(cmd, lv->vg->name, lv->name, lv->lvid.s);
+ int is_merging_origin = 0;
+ int is_merging_origin_thin = 0;
- if (dm_snprintf(lv_full_name, sizeof(lv_full_name), "%s/%s", lv->vg->name, lv->name) < 0) {
- log_error(INTERNAL_ERROR "Name \"%s/%s\" is too long.", lv->vg->name, lv->name);
+ if (!id) {
+ log_error("Failed to allocate poll identifier for lvconvert.");
return ECMD_FAILED;
}
- memcpy(uuid, &lv->lvid, sizeof(lv->lvid));
+ /* FIXME: check this in polling instead */
+ if (lv_is_merging_origin(lv)) {
+ is_merging_origin = 1;
+ is_merging_origin_thin = seg_is_thin_volume(find_snapshot(lv));
+ }
+
+ r = _lvconvert_poll_by_id(cmd, id, background, is_merging_origin, is_merging_origin_thin);
- if (lv_is_merging_origin(lv))
- return poll_daemon(cmd, lv_full_name, uuid, background, 0,
- seg_is_thin_volume(find_snapshot(lv)) ?
- &_lvconvert_thin_merge_fns : &_lvconvert_merge_fns,
- "Merged");
+ _destroy_id(cmd, id);
- return poll_daemon(cmd, lv_full_name, uuid, background, 0,
- &_lvconvert_mirror_fns, "Converted");
+ return r;
}
static int _insert_lvconvert_layer(struct cmd_context *cmd,
@@ -1033,10 +887,10 @@ static int _failed_mirrors_count(struct logical_volume *lv)
ret += _failed_mirrors_count(seg_lv(lvseg, s));
else if (seg_lv(lvseg, s)->status & PARTIAL_LV)
++ ret;
- else if (seg_type(lvseg, s) == AREA_PV &&
- is_missing_pv(seg_pv(lvseg, s)))
- ++ret;
}
+ else if (seg_type(lvseg, s) == AREA_PV &&
+ is_missing_pv(seg_pv(lvseg, s)))
+ ++ret;
}
}
@@ -1238,7 +1092,8 @@ static int _lv_update_log_type(struct cmd_context *cmd,
if (old_log_count < log_count) {
region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count,
- region_size, 0);
+ region_size, 0,
+ vg_is_clustered(lv->vg));
if (!add_mirror_log(cmd, original_lv, log_count,
region_size, operable_pvs, alloc))
@@ -1323,6 +1178,12 @@ static int _lvconvert_mirrors_parse_params(struct cmd_context *cmd,
*old_mimage_count = lv_mirror_count(lv);
*old_log_count = _get_log_count(lv);
+ if (is_lockd_type(lv->vg->lock_type) && arg_count(cmd, splitmirrors_ARG)) {
+ /* FIXME: we need to create a lock for the new LV. */
+ log_error("Unable to split mirrors in VG with lock_type %s", lv->vg->lock_type);
+ return 0;
+ }
+
/*
* Collapsing a stack of mirrors:
*
@@ -1455,7 +1316,8 @@ static int _lvconvert_mirrors_aux(struct cmd_context *cmd,
region_size = adjusted_mirror_region_size(lv->vg->extent_size,
lv->le_count,
- lp->region_size, 0);
+ lp->region_size, 0,
+ vg_is_clustered(lv->vg));
if (!operable_pvs)
operable_pvs = lp->pvh;
@@ -1834,58 +1696,6 @@ static int _lvconvert_mirrors(struct cmd_context *cmd,
return 1;
}
-static int _is_valid_raid_conversion(const struct segment_type *from_segtype,
- const struct segment_type *to_segtype)
-{
- if (from_segtype == to_segtype)
- return 1;
-
-PFLA("from_segtype=%s to_segtype=%s\n", from_segtype->name, to_segtype->name);
- /* From striped to mirror or vice-versa */
- if (segtype_is_striped(from_segtype) &&
- segtype_is_mirror(to_segtype))
- return 1;
-
- if (segtype_is_mirror(from_segtype) &&
- segtype_is_striped(to_segtype))
- return 1;
-
- /* From striped to raid0 or vice-versa */
- if (segtype_is_striped(from_segtype) &&
- segtype_is_raid(to_segtype))
- return 1;
-
- if (segtype_is_raid(from_segtype) &&
- (segtype_is_linear(to_segtype) || segtype_is_striped(to_segtype)))
- return 1;
-
- /* From linear to raid1 or vice-versa */
- if (segtype_is_linear(from_segtype) &&
- segtype_is_raid1(to_segtype))
- return 1;
-
- if (segtype_is_raid1(from_segtype) &&
- segtype_is_linear(to_segtype))
- return 1;
-
- /* From mirror to raid1 */
- if (segtype_is_mirror(from_segtype) &&
- segtype_is_raid1(to_segtype))
- return 1;
-
- /* From mirror to raid1 */
- if (segtype_is_raid1(from_segtype) &&
- segtype_is_mirror(to_segtype))
- return 1;
-
- /* From raid to raid */
- if (segtype_is_raid(from_segtype) &&
- segtype_is_raid(to_segtype))
- return 1;
-
- return 0;
-}
-
static void _lvconvert_raid_repair_ask(struct cmd_context *cmd,
struct lvconvert_params *lp,
int *replace_dev)
@@ -1915,38 +1725,29 @@ static void _lvconvert_raid_repair_ask(struct cmd_context *cmd,
static int _lvconvert_raid(struct logical_volume *lv, struct lvconvert_params *lp)
{
- int replace = 0, image_count = 0;
+ int data_copies, image_count = 0, replace = 0;
struct dm_list *failed_pvs;
struct cmd_context *cmd = lv->vg->cmd;
struct lv_segment *seg = first_seg(lv);
dm_percent_t sync_percent;
- if (!arg_count(cmd, type_ARG))
- lp->segtype = seg->segtype;
-PFLA("stripes_ARG=%u stripes_long_ARG=%u", arg_count(lv->vg->cmd, stripes_ARG), arg_count(lv->vg->cmd, stripes_long_ARG));
-
/* -mN can change image count for mirror/raid1 and linear (converting it to mirror/raid1) */
/* -m0 can change raid0 with one stripe and raid4/5 with 2 to linear */
if (arg_count(cmd, mirrors_ARG) &&
!seg_is_linear(seg) &&
+ !seg_is_raid(seg) &&
!seg_is_mirrored(seg) &&
- !(seg_is_any_raid0(seg) && seg->area_count == 1) &&
- !(seg_is_raid4(seg) && seg->area_count == 2) &&
- !(seg_is_any_raid5(seg) && seg->area_count == 2)) {
+ !seg_is_thin_volume(seg) &&
+ !seg_is_striped(seg)) {
log_error("'--mirrors/-m' is not compatible with %s",
lvseg_name(seg));
return 0;
}
+#if 0
if (!_lvconvert_validate_thin(lv, lp))
return_0;
-
- if (!_is_valid_raid_conversion(seg->segtype, lp->segtype)) {
- log_error("Unable to convert %s/%s from %s to %s",
- lv->vg->name, lv->name,
- lvseg_name(seg), lp->segtype->name);
- return 0;
- }
+#endif
/* Change number of RAID1 images */
if (arg_count(cmd, mirrors_ARG) || arg_count(cmd, splitmirrors_ARG)) {
@@ -1974,40 +1775,97 @@ PFLA("image_count=%u\n", image_count);
return lv_raid_merge(lv);
if (arg_count(cmd, trackchanges_ARG))
- return lv_raid_split_and_track(lv, lp->pvh);
+ return lv_raid_split_and_track(lv, lp->yes, lp->lv_split_name, lp->pvh);
if (arg_count(cmd, splitmirrors_ARG))
- return lv_raid_split(lv, lp->lv_split_name,
- image_count, lp->pvh);
+ return lv_raid_split(lv, lp->yes, lp->lv_split_name, image_count, lp->pvh);
-PFLA("lp->segtype=%s\n", lp->segtype->name);
- if ((seg_is_linear(seg) || seg_is_striped(seg) || seg_is_mirrored(seg) || lv_is_raid(lv)) &&
+ if ((seg_is_linear(seg) || seg_is_striped(seg) || seg_is_thin(seg) || seg_is_mirror(seg) || seg_is_raid(seg)) &&
(arg_count(cmd, type_ARG) ||
- image_count ||
- arg_count(cmd, stripes_ARG) ||
- arg_count(cmd, stripes_long_ARG) ||
- arg_count(cmd, stripesize_ARG))) {
- unsigned stripes = (arg_count(cmd, stripes_ARG) || arg_count(cmd, stripes_long_ARG)) ? lp->stripes : 0;
+ arg_is_set(cmd, mirrors_ARG) ||
+ arg_is_set(cmd, regionsize_ARG) ||
+ arg_is_set(cmd, stripes_long_ARG) ||
+ arg_is_set(cmd, stripesize_ARG) ||
+ arg_is_set(cmd, duplicate_ARG) ||
+ arg_is_set(cmd, unduplicate_ARG))) {
+ unsigned stripes = 0;
unsigned stripe_size = arg_count(cmd, stripesize_ARG) ? lp->stripe_size : 0;
-PFLA("stripes=%u stripe_size=%u\n", stripes, stripe_size);
- if (seg_is_striped(seg))
- seg->region_size = lp->region_size;
+ if (arg_is_set(cmd, duplicate_ARG) &&
+ arg_is_set(cmd, unduplicate_ARG)) {
+ log_error("--duplicate and --unduplicate are mutually exclusive");
+ return 0;
+ }
+
+ /* Check for raid0 support if requested */
+ if ((segtype_is_any_raid0(seg->segtype) || segtype_is_any_raid0(lp->segtype)) &&
+ !(lp->target_attr & RAID_FEATURE_RAID0)) {
+ log_error("RAID module does not support RAID0.");
+ return 0;
+ }
/* Check for reshaping support if requested */
- if (((seg->segtype != lp->segtype && !strncmp(seg->segtype->name, lp->segtype->name, 5)) ||
- (stripes && stripes != seg->area_count - seg->segtype->parity_devs) ||
+ if (seg->segtype == lp->segtype &&
+ ((stripes && stripes != seg->area_count - seg->segtype->parity_devs) ||
(stripe_size && stripe_size != seg->stripe_size)) &&
!(lp->target_attr & RAID_FEATURE_RESHAPING)) {
log_error("RAID module does not support reshaping.");
return 0;
}
- return lv_raid_convert(lv, lp->segtype, lp->yes, lp->force, image_count, stripes, stripe_size, lp->pvh);
+ if (arg_count(cmd, stripes_long_ARG)) {
+ unsigned stripe_count = seg->area_count - seg->segtype->parity_devs;
+
+ stripes = lp->stripes;
+
+ switch (arg_sign_value(cmd, stripes_long_ARG, SIGN_NONE)) {
+ case SIGN_PLUS:
+ stripes += stripe_count;
+ break;
+ case SIGN_MINUS:
+ stripes -= stripe_count - stripes;
+ break;
+ case SIGN_NONE:
+ break;
+ }
+ }
+
+ /* Special case "linear", which is no individual segtype */
+ if (arg_count(cmd, type_ARG) &&
+ !strcmp(arg_str_value(cmd, type_ARG, NULL), "linear")) {
+ if (lp->mirrors || stripes > 1 || image_count > 1) {
+ log_error("--mirrors/--stripes invalid with linear");
+ return 0;
+ }
+
+ data_copies = stripes = 1;
+ stripe_size = 0;
+
+ } else
+ data_copies = arg_is_set(cmd, mirrors_ARG) ? lp->mirrors + 1 : -1;
+
+ if (segtype_is_thin(lp->segtype) && arg_count(cmd, name_ARG))
+ lp->pool_data_name = arg_str_value(cmd, name_ARG, NULL);
+
+PFLA("lp->region_size=%u lp->segtype=%s lp->pool_data_name=%s", lp->region_size, lp->segtype ? lp->segtype->name : "", lp->pool_data_name ?: "");
+PFLA("lp->region_size=%u lp->segtype=%s", lp->region_size, lp->segtype ? lp->segtype->name : "");
+PFLA("lp->pool_data_name=%s lp->lv_split_name=%s lp->lv_name=%s", lp->pool_data_name, lp->lv_split_name, lp->lv_name);
+ return lv_raid_convert(lv, (struct lv_raid_convert_params)
+ { .segtype = arg_count(cmd, type_ARG) ? (struct segment_type *) lp->segtype : NULL,
+ .yes = lp->yes,
+ .force = lp->force,
+ .duplicate = arg_is_set(cmd, duplicate_ARG),
+ .unduplicate = arg_is_set(cmd, unduplicate_ARG),
+ .data_copies = data_copies,
+ .region_size = arg_count(cmd, regionsize_ARG) ? lp->region_size : 0,
+ .stripes = stripes,
+ .stripe_size = stripe_size,
+ .lv_name = lp->pool_data_name ?: lp->lv_split_name,
+ .allocate_pvs= lp->pvh } );
}
if (arg_count(cmd, replace_ARG))
- return lv_raid_replace(lv, lp->replace_pvh, lp->pvh);
+ return lv_raid_replace(lv, lp->yes, lp->replace_pvh, lp->pvh);
if (arg_count(cmd, repair_ARG)) {
if (!lv_is_active_exclusive_locally(lv_lock_holder(lv))) {
@@ -2019,7 +1877,7 @@ PFLA("stripes=%u stripe_size=%u\n", stripes, stripe_size);
}
if (!seg_is_striped(seg) &&
- !seg_is_raid0(seg) &&
+ !seg_is_any_raid0(seg) &&
!lv_raid_percent(lv, &sync_percent)) {
log_error("Unable to determine sync status of %s/%s.",
lv->vg->name, lv->name);
@@ -2044,45 +1902,12 @@ PFLA("replace=%d", replace);
PFLA("replace=%d", replace);
if (replace) {
-
-
-#if 0
-{
- int r;
- struct pv_list *pvl;
- struct volume_group *vg;
-
-#if 0
- dm_list_iterate_items(pvl, &lv->vg->pvs);
- if (pvl->pv && pvl->pv->dev)
- lvmetad_pv_gone_by_dev(pvl->pv->dev, NULL);
- // lvmetad_pvscan_single(cmd, pvl->pv->dev, NULL);
-#endif
-#if 1
- r = lvmetad_pvscan_all_devs(cmd, NULL);
- log_warn("lvmetad_pvscan_all_devs returnd %d", r);
-#if 0
- vg = vg_read(cmd, lv->vg->name, NULL, 0);
- log_warn("vg_read returnd %p lv->vg==vg=%d", vg, lv->vg == vg);
- if (!vg)
- return 0;
-
- lv->vg = vg;
-#endif
-#else
- r = pvscan(cmd, 0, NULL);
- log_warn("pvscan returnd %d", r);
-#endif
-}
-#endif
-
-
if (!(failed_pvs = _failed_pv_list(lv->vg)))
return_0;
PFLA("dm_list_size(failed_pvs)=%u", dm_list_size(failed_pvs));
- if (!lv_raid_replace(lv, failed_pvs, lp->pvh)) {
+ if (!lv_raid_replace(lv, lp->yes, failed_pvs, lp->pvh)) {
log_error("Failed to replace faulty devices in"
" %s/%s.", lv->vg->name, lv->name);
return 0;
@@ -2139,6 +1964,12 @@ static int _lvconvert_splitsnapshot(struct cmd_context *cmd, struct logical_volu
return 0;
}
+ if (is_lockd_type(vg->lock_type)) {
+ /* FIXME: we need to create a lock for the new LV. */
+ log_error("Unable to split snapshots in VG with lock_type %s", vg->lock_type);
+ return 0;
+ }
+
if (!vg_check_status(vg, LVM_WRITE))
return_0;
@@ -2274,7 +2105,7 @@ static int _lvconvert_uncache(struct cmd_context *cmd,
return 0;
}
- if (!lv_remove_single(cmd, first_seg(lv)->pool_lv, lp->force, 0))
+ if (!lv_remove_single(cmd, first_seg(lv)->pool_lv, (force_t) lp->force, 0))
return_0;
log_print_unless_silent("Logical volume %s is not cached.", display_lvname(lv));
@@ -2557,7 +2388,7 @@ static int _lvconvert_merge_thin_snapshot(struct cmd_context *cmd,
* Both thin snapshot and origin are inactive,
* replace the origin LV with its snapshot LV.
*/
- if (!_finish_thin_merge(cmd, origin, lv))
+ if (!thin_merge_finish(cmd, origin, lv))
goto_out;
if (origin_is_active && !activate_lv(cmd, lv)) {
@@ -2600,7 +2431,7 @@ static int _lvconvert_pool_repair(struct cmd_context *cmd,
int ret = 0, status;
int args = 0;
const char *argv[19]; /* Max supported 10 args */
- char *split, *dm_name, *trans_id_str;
+ char *dm_name, *trans_id_str;
char meta_path[PATH_MAX];
char pms_path[PATH_MAX];
uint64_t trans_id;
@@ -2640,22 +2471,18 @@ static int _lvconvert_pool_repair(struct cmd_context *cmd,
return 0;
}
- if ((cn = find_config_tree_node(cmd, global_thin_repair_options_CFG, NULL))) {
- for (cv = cn->v; cv && args < 16; cv = cv->next) {
- if (cv->type != DM_CFG_STRING) {
- log_error("Invalid string in config file: "
- "global/thin_repair_options");
- return 0;
- }
- argv[++args] = cv->v.str;
- }
- } else {
- /* Use default options (no support for options with spaces) */
- if (!(split = dm_pool_strdup(cmd->mem, DEFAULT_THIN_REPAIR_OPTIONS))) {
- log_error("Failed to duplicate thin repair string.");
+ if (!(cn = find_config_tree_array(cmd, global_thin_repair_options_CFG, NULL))) {
+ log_error(INTERNAL_ERROR "Unable to find configuration for global/thin_repair_options");
+ return 0;
+ }
+
+ for (cv = cn->v; cv && args < 16; cv = cv->next) {
+ if (cv->type != DM_CFG_STRING) {
+ log_error("Invalid string in config file: "
+ "global/thin_repair_options");
return 0;
}
- args = dm_split_words(split, 16, 0, (char**) argv + 1);
+ argv[++args] = cv->v.str;
}
if (args == 10) {
@@ -2706,14 +2533,14 @@ static int _lvconvert_pool_repair(struct cmd_context *cmd,
* Scan only the 1st. line for transation id.
* Watch out, if the thin_dump format changes
*/
- if ((fgets(meta_path, sizeof(meta_path), f) > 0) &&
+ if (fgets(meta_path, sizeof(meta_path), f) &&
(trans_id_str = strstr(meta_path, "transaction=\"")) &&
- (sscanf(trans_id_str + 13, "%" PRIu64, &trans_id) == 1) &&
+ (sscanf(trans_id_str + 13, FMTu64, &trans_id) == 1) &&
(trans_id != first_seg(pool_lv)->transaction_id) &&
((trans_id - 1) != first_seg(pool_lv)->transaction_id))
- log_error("Transaction id %" PRIu64 " from pool \"%s/%s\" "
+ log_error("Transaction id " FMTu64 " from pool \"%s/%s\" "
"does not match repaired transaction id "
- "%" PRIu64 " from %s.",
+ FMTu64 " from %s.",
first_seg(pool_lv)->transaction_id,
pool_lv->vg->name, pool_lv->name, trans_id,
pms_path);
@@ -2762,7 +2589,7 @@ deactivate_pmslv:
if (!detach_pool_metadata_lv(first_seg(pool_lv), &mlv))
return_0;
- if (!_swap_lv_identifiers(cmd, mlv, pmslv))
+ if (!swap_lv_identifiers(cmd, mlv, pmslv))
return_0;
/* Used _pmspare will become _tmeta */
@@ -2827,6 +2654,22 @@ static int _lvconvert_thin(struct cmd_context *cmd,
return 0;
}
+ if (is_lockd_type(lv->vg->lock_type)) {
+ /*
+ * FIXME: external origins don't work in lockd VGs.
+ * Prior to the lvconvert, there's a lock associated with
+ * the uuid of the external origin LV. After the convert,
+ * that uuid belongs to the new thin LV, and a new LV with
+ * a new uuid exists as the non-thin, readonly external LV.
+ * We'd need to remove the lock for the previous uuid
+ * (the new thin LV will have no lock), and create a new
+ * lock for the new LV uuid used by the external LV.
+ */
+ log_error("Can't use lock_type %s LV as external origin.",
+ lv->vg->lock_type);
+ return 0;
+ }
+
dm_list_init(&lvc.tags);
if (!pool_supports_external_origin(first_seg(pool_lv), lv))
@@ -2854,7 +2697,7 @@ static int _lvconvert_thin(struct cmd_context *cmd,
* which could be easily removed by the user after i.e. power-off
*/
- if (!_swap_lv_identifiers(cmd, torigin_lv, lv)) {
+ if (!swap_lv_identifiers(cmd, torigin_lv, lv)) {
stack;
goto revert_new_lv;
}
@@ -2880,7 +2723,7 @@ static int _lvconvert_thin(struct cmd_context *cmd,
return 1;
deactivate_and_revert_new_lv:
- if (!_swap_lv_identifiers(cmd, torigin_lv, lv))
+ if (!swap_lv_identifiers(cmd, torigin_lv, lv))
stack;
if (!deactivate_lv(cmd, torigin_lv)) {
@@ -2944,6 +2787,12 @@ static int _lvconvert_pool(struct cmd_context *cmd,
struct logical_volume *data_lv;
struct logical_volume *metadata_lv = NULL;
struct logical_volume *pool_metadata_lv;
+ char *lockd_data_args = NULL;
+ char *lockd_meta_args = NULL;
+ char *lockd_data_name = NULL;
+ char *lockd_meta_name = NULL;
+ struct id lockd_data_id;
+ struct id lockd_meta_id;
char metadata_name[NAME_LEN], data_name[NAME_LEN];
int activate_pool;
@@ -2960,6 +2809,13 @@ static int _lvconvert_pool(struct cmd_context *cmd,
}
}
+ /* An existing LV needs to have its lock freed once it becomes a data LV. */
+ if (is_lockd_type(vg->lock_type) && !lv_is_pool(pool_lv) && pool_lv->lock_args) {
+ lockd_data_args = dm_pool_strdup(cmd->mem, pool_lv->lock_args);
+ lockd_data_name = dm_pool_strdup(cmd->mem, pool_lv->name);
+ memcpy(&lockd_data_id, &pool_lv->lvid.id[1], sizeof(struct id));
+ }
+
if (!lv_is_visible(pool_lv)) {
log_error("Can't convert internal LV %s.", display_lvname(pool_lv));
return 0;
@@ -3015,6 +2871,13 @@ static int _lvconvert_pool(struct cmd_context *cmd,
lp->pool_metadata_extents = lp->pool_metadata_lv->le_count;
metadata_lv = lp->pool_metadata_lv;
+ /* An existing LV needs to have its lock freed once it becomes a meta LV. */
+ if (is_lockd_type(vg->lock_type) && metadata_lv->lock_args) {
+ lockd_meta_args = dm_pool_strdup(cmd->mem, metadata_lv->lock_args);
+ lockd_meta_name = dm_pool_strdup(cmd->mem, metadata_lv->name);
+ memcpy(&lockd_meta_id, &metadata_lv->lvid.id[1], sizeof(struct id));
+ }
+
if (metadata_lv == pool_lv) {
log_error("Can't use same LV for pool data and metadata LV %s.",
display_lvname(metadata_lv));
@@ -3094,6 +2957,7 @@ static int _lvconvert_pool(struct cmd_context *cmd,
return 0;
}
+ lp->passed_args |= PASS_ARG_CHUNK_SIZE | PASS_ARG_DISCARDS | PASS_ARG_ZERO;
seg = first_seg(pool_lv);
/* Normally do NOT change chunk size when swapping */
@@ -3265,7 +3129,7 @@ static int _lvconvert_pool(struct cmd_context *cmd,
/* Allocate a new pool segment */
if (!(seg = alloc_lv_segment(lp->segtype, pool_lv, 0, data_lv->le_count, 0,
pool_lv->status, 0, NULL, 1,
- data_lv->le_count, 0, 0, 0, NULL)))
+ data_lv->le_count, 1, 0, 0, 0, NULL)))
return_0;
/* Add the new segment to the layer LV */
@@ -3276,6 +3140,27 @@ static int _lvconvert_pool(struct cmd_context *cmd,
if (!attach_pool_data_lv(seg, data_lv))
return_0;
+ /*
+ * Create a new lock for a thin pool LV. A cache pool LV has no lock.
+ * Locks are removed from existing LVs that are being converted to
+ * data and meta LVs (they are unlocked and deleted below.)
+ */
+ if (is_lockd_type(vg->lock_type)) {
+ if (segtype_is_cache_pool(lp->segtype)) {
+ data_lv->lock_args = NULL;
+ metadata_lv->lock_args = NULL;
+ } else {
+ data_lv->lock_args = NULL;
+ metadata_lv->lock_args = NULL;
+
+ if (!strcmp(vg->lock_type, "sanlock"))
+ pool_lv->lock_args = "pending";
+ else if (!strcmp(vg->lock_type, "dlm"))
+ pool_lv->lock_args = "dlm";
+ /* The lock_args will be set in vg_write(). */
+ }
+ }
+
/* FIXME: revert renamed LVs in fail path? */
/* FIXME: any common code with metadata/thin_manip.c extend_pool() ? */
@@ -3286,7 +3171,10 @@ mda_write:
seg->chunk_size = lp->chunk_size;
seg->discards = lp->discards;
seg->zero_new_blocks = lp->zero ? 1 : 0;
- seg->feature_flags = lp->feature_flags; /* cache-pool */
+
+ if ((lp->policy_name || lp->policy_settings) &&
+ !cache_set_policy(seg, lp->policy_name, lp->policy_settings))
+ return_0;
/* Rename deactivated metadata LV to have _tmeta suffix */
/* Implicit checks if metadata_lv is visible */
@@ -3309,6 +3197,11 @@ mda_write:
log_warn("WARNING: Pool zeroing and large %s chunk size slows down "
"provisioning.", display_size(cmd, seg->chunk_size));
+ if (activate_pool && !lockd_lv(cmd, pool_lv, "ex", LDLV_PERSISTENT)) {
+ log_error("Failed to lock pool LV %s/%s", vg->name, pool_lv->name);
+ goto out;
+ }
+
if (activate_pool &&
!activate_lv_excl(cmd, pool_lv)) {
log_error("Failed to activate pool logical volume %s.",
@@ -3333,6 +3226,22 @@ out:
(segtype_is_cache_pool(lp->segtype)) ?
"cache" : "thin");
+ /*
+ * Unlock and free the locks from existing LVs that became pool data
+ * and meta LVs.
+ */
+ if (lockd_data_name) {
+ if (!lockd_lv_name(cmd, vg, lockd_data_name, &lockd_data_id, lockd_data_args, "un", LDLV_PERSISTENT))
+ log_error("Failed to unlock pool data LV %s/%s", vg->name, lockd_data_name);
+ lockd_free_lv(cmd, vg, lockd_data_name, &lockd_data_id, lockd_data_args);
+ }
+
+ if (lockd_meta_name) {
+ if (!lockd_lv_name(cmd, vg, lockd_meta_name, &lockd_meta_id, lockd_meta_args, "un", LDLV_PERSISTENT))
+ log_error("Failed to unlock pool metadata LV %s/%s", vg->name, lockd_meta_name);
+ lockd_free_lv(cmd, vg, lockd_meta_name, &lockd_meta_id, lockd_meta_args);
+ }
+
return r;
#if 0
revert_new_lv:
@@ -3372,6 +3281,12 @@ static int _lvconvert_cache(struct cmd_context *cmd,
if (!(cache_lv = lv_cache_create(pool_lv, origin_lv)))
return_0;
+ if (!cache_set_mode(first_seg(cache_lv), lp->cache_mode))
+ return_0;
+
+ if (!cache_set_policy(first_seg(cache_lv), lp->policy_name, lp->policy_settings))
+ return_0;
+
if (!lv_update_and_reload(cache_lv))
return_0;
@@ -3492,22 +3407,23 @@ static int _lvconvert_single(struct cmd_context *cmd, struct logical_volume *lv,
} else if (lp->snapshot) {
if (!_lvconvert_snapshot(cmd, lv, lp))
return_ECMD_FAILED;
- } else if (segtype_is_pool(lp->segtype) || lp->thin || lp->cache) {
+ } else if (!arg_is_set(cmd, duplicate_ARG) &&
+ (segtype_is_pool(lp->segtype) || lp->thin || lp->cache)) {
if (!_lvconvert_pool(cmd, lv, lp))
return_ECMD_FAILED;
if ((lp->thin && !_lvconvert_thin(cmd, lv, lp)) ||
(lp->cache && !_lvconvert_cache(cmd, lv, lp)))
return_ECMD_FAILED;
- } else if (segtype_is_raid(lp->segtype) ||
- lv_is_raid(lv) || lp->merge_mirror) {
+ } else if (!segtype_is_raid(lp->segtype) && !seg_is_raid(first_seg(lv)) &&
+ (arg_count(cmd, mirrors_ARG) ||
+ arg_count(cmd, splitmirrors_ARG) ||
+ lv_is_mirrored(lv))) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- if (!_lvconvert_raid(lv, lp)) {
- log_error("Failed to convert LV %s", lv->name);
+ if (!_lvconvert_mirrors(cmd, lv, lp))
return_ECMD_FAILED;
- }
if (!(failed_pvs = _failed_pv_list(lv->vg)))
return_ECMD_FAILED;
@@ -3515,14 +3431,19 @@ static int _lvconvert_single(struct cmd_context *cmd, struct logical_volume *lv,
/* If repairing and using policies, remove missing PVs from VG */
if (arg_count(cmd, repair_ARG) && arg_count(cmd, use_policies_ARG))
_remove_missing_empty_pv(lv->vg, failed_pvs);
- } else if (arg_count(cmd, mirrors_ARG) ||
- arg_count(cmd, splitmirrors_ARG) ||
- lv_is_mirrored(lv)) {
+ } else if (arg_is_set(cmd, duplicate_ARG) ||
+ arg_is_set(cmd, unduplicate_ARG) ||
+ segtype_is_raid(lp->segtype) ||
+segtype_is_striped(lp->segtype) ||
+(segtype_is_mirror(lp->segtype) && !arg_is_set(cmd, mirrorlog_ARG)) ||
+ lv_is_raid(lv) || lp->merge_mirror) {
if (!archive(lv->vg))
return_ECMD_FAILED;
- if (!_lvconvert_mirrors(cmd, lv, lp))
+ if (!_lvconvert_raid(lv, lp)) {
+ log_error("Failed to convert LV %s", lv->name);
return_ECMD_FAILED;
+ }
if (!(failed_pvs = _failed_pv_list(lv->vg)))
return_ECMD_FAILED;
@@ -3531,60 +3452,63 @@ static int _lvconvert_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, repair_ARG) && arg_count(cmd, use_policies_ARG))
_remove_missing_empty_pv(lv->vg, failed_pvs);
} else
- log_error("Nothing to do");
+ log_error("Unsupported conversion");
return ECMD_PROCESSED;
}
-/*
- * FIXME move to toollib along with the rest of the drop/reacquire
- * VG locking that is used by _lvconvert_merge_single()
- */
-static struct logical_volume *get_vg_lock_and_logical_volume(struct cmd_context *cmd,
- const char *vg_name,
- const char *lv_name)
+static struct convert_poll_id_list* _convert_poll_id_list_create(struct cmd_context *cmd,
+ const struct logical_volume *lv)
{
- /*
- * Returns NULL if the requested LV doesn't exist;
- * otherwise the caller must release_vg(lv->vg)
- * - it is also up to the caller to unlock_vg() as needed
- */
- struct volume_group *vg;
- struct logical_volume* lv = NULL;
+ struct convert_poll_id_list *idl = (struct convert_poll_id_list *) dm_pool_alloc(cmd->mem, sizeof(struct convert_poll_id_list));
- vg = _get_lvconvert_vg(cmd, vg_name, NULL);
- if (vg_read_error(vg)) {
- release_vg(vg);
- return_NULL;
+ if (!idl) {
+ log_error("Convert poll ID list allocation failed.");
+ return NULL;
}
- if (!(lv = _get_lvconvert_lv(cmd, vg, lv_name, NULL, 0))) {
- log_error("Can't find LV %s in VG %s", lv_name, vg_name);
- unlock_and_release_vg(cmd, vg, vg_name);
- return NULL;
+ if (!(idl->id = _create_id(cmd, lv->vg->name, lv->name, lv->lvid.s))) {
+ dm_pool_free(cmd->mem, idl);
+ return_NULL;
}
- return lv;
+ idl->is_merging_origin = lv_is_merging_origin(lv);
+ idl->is_merging_origin_thin = idl->is_merging_origin && seg_is_thin_volume(find_snapshot(lv));
+
+ return idl;
}
-static int _poll_logical_volume(struct cmd_context *cmd, struct logical_volume *lv,
- int wait_completion)
+static int _convert_and_add_to_poll_list(struct cmd_context *cmd,
+ struct lvconvert_params *lp,
+ struct logical_volume *lv)
{
+ int ret;
struct lvinfo info;
+ struct convert_poll_id_list *idl;
- if (!lv_info(cmd, lv, 0, &info, 0, 0) || !info.exists) {
- log_print_unless_silent("Conversion starts after activation.");
- return ECMD_PROCESSED;
+ /* _lvconvert_single() call may alter the reference in lp->lv_to_poll */
+ if ((ret = _lvconvert_single(cmd, lv, lp)) != ECMD_PROCESSED)
+ stack;
+ else if (lp->need_polling) {
+ if (!lv_info(cmd, lp->lv_to_poll, 0, &info, 0, 0) || !info.exists)
+ log_print_unless_silent("Conversion starts after activation.");
+ else {
+ if (!(idl = _convert_poll_id_list_create(cmd, lp->lv_to_poll)))
+ return_ECMD_FAILED;
+ dm_list_add(&lp->idls, &idl->list);
+ }
}
- return lvconvert_poll(cmd, lv, wait_completion ? 0 : 1U);
+ return ret;
}
static int lvconvert_single(struct cmd_context *cmd, struct lvconvert_params *lp)
{
struct logical_volume *lv;
+ struct volume_group *vg;
int ret = ECMD_FAILED;
int saved_ignore_suspended_devices = ignore_suspended_devices();
+ uint32_t lockd_state = 0;
if (arg_count(cmd, repair_ARG)) {
init_ignore_suspended_devices(1);
@@ -3597,105 +3521,116 @@ static int lvconvert_single(struct cmd_context *cmd, struct lvconvert_params *lp
#endif
}
- if (!(lv = get_vg_lock_and_logical_volume(cmd, lp->vg_name, lp->lv_name)))
+ /* Unlock on error paths not required, it's automatic when command exits. */
+ if (!lockd_vg(cmd, lp->vg_name, "ex", 0, &lockd_state))
+ goto_out;
+
+ vg = vg_read(cmd, lp->vg_name, NULL, READ_FOR_UPDATE, lockd_state);
+ if (vg_read_error(vg)) {
+ release_vg(vg);
goto_out;
+ }
+
+ if (!(lv = find_lv(vg, lp->lv_name))) {
+ log_error("Can't find LV %s in VG %s", lp->lv_name, lp->vg_name);
+ unlock_and_release_vg(cmd, vg, lp->vg_name);
+ goto_out;
+ }
+
+ /*
+ * Request a transient lock. If the LV is active, it has a persistent
+ * lock already, and this request does nothing. If the LV is not
+ * active, this acquires a transient lock that will be released when
+ * the command exits.
+ */
+ if (!lockd_lv(cmd, lv, "ex", 0))
+ goto_bad;
/*
* lp->pvh holds the list of PVs available for allocation or removal
*/
if (lp->pv_count) {
- if (!(lp->pvh = create_pv_list(cmd->mem, lv->vg, lp->pv_count,
- lp->pvs, 0)))
+ if (!(lp->pvh = create_pv_list(cmd->mem, vg, lp->pv_count, lp->pvs, 0)))
goto_bad;
} else
- lp->pvh = &lv->vg->pvs;
+ lp->pvh = &vg->pvs;
if (lp->replace_pv_count &&
- !(lp->replace_pvh = create_pv_list(cmd->mem, lv->vg,
+ !(lp->replace_pvh = create_pv_list(cmd->mem, vg,
lp->replace_pv_count,
lp->replace_pvs, 0)))
goto_bad;
lp->lv_to_poll = lv;
- ret = _lvconvert_single(cmd, lv, lp);
+ ret = _convert_and_add_to_poll_list(cmd, lp, lv);
+
bad:
unlock_vg(cmd, lp->vg_name);
- if (ret == ECMD_PROCESSED && lp->need_polling)
- ret = _poll_logical_volume(cmd, lp->lv_to_poll,
- lp->wait_completion);
+ /* Unlock here so it's not held during polling. */
+ if (!lockd_vg(cmd, lp->vg_name, "un", 0, &lockd_state))
+ stack;
- release_vg(lv->vg);
+ release_vg(vg);
out:
init_ignore_suspended_devices(saved_ignore_suspended_devices);
return ret;
}
static int _lvconvert_merge_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
- struct lvconvert_params *lp = handle;
- const char *vg_name;
- struct logical_volume *refreshed_lv;
- int ret;
+ struct lvconvert_params *lp = (struct lvconvert_params *) handle->custom_handle;
- /*
- * FIXME can't trust lv's VG to be current given that caller
- * is process_each_lv() -- _poll_logical_volume() may have
- * already updated the VG's metadata in an earlier iteration.
- * - preemptively drop the VG lock, as is needed for
- * _poll_logical_volume(), refresh LV (and VG in the process).
- */
- vg_name = lv->vg->name;
- unlock_vg(cmd, vg_name);
- refreshed_lv = get_vg_lock_and_logical_volume(cmd, vg_name, lv->name);
- if (!refreshed_lv) {
- log_error("ABORTING: Can't reread LV %s/%s", vg_name, lv->name);
- return ECMD_FAILED;
- }
-
- lp->lv_to_poll = refreshed_lv;
- if ((ret = _lvconvert_single(cmd, refreshed_lv, lp)) != ECMD_PROCESSED)
- stack;
-
- if (ret == ECMD_PROCESSED && lp->need_polling) {
- /*
- * Must drop VG lock, because lvconvert_poll() needs it,
- * then reacquire it after polling completes
- */
- unlock_vg(cmd, vg_name);
-
- if (!(ret = _poll_logical_volume(cmd, lp->lv_to_poll,
- lp->wait_completion)) != ECMD_PROCESSED)
- stack;
-
- /* use LCK_VG_WRITE to match lvconvert()'s READ_FOR_UPDATE */
- if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL)) {
- log_error("ABORTING: Can't relock VG for %s "
- "after polling finished", vg_name);
- ret = ECMD_FAILED;
- }
- }
-
- release_vg(refreshed_lv->vg);
+ lp->lv_to_poll = lv;
- return ret;
+ return _convert_and_add_to_poll_list(cmd, lp, lv);
}
int lvconvert(struct cmd_context * cmd, int argc, char **argv)
{
+ int poll_ret, ret;
+ struct convert_poll_id_list *idl;
struct lvconvert_params lp = {
.target_attr = ~0,
};
+ struct processing_handle *handle = NULL;
+
+ dm_list_init(&lp.idls);
+
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ ret = ECMD_FAILED;
+ goto out;
+ }
+
+ handle->custom_handle = &lp;
if (!_read_params(cmd, argc, argv, &lp)) {
- stack;
- return EINVALID_CMD_LINE;
+ ret = EINVALID_CMD_LINE;
+ goto_out;
}
if (lp.merge)
- return process_each_lv(cmd, argc, argv, READ_FOR_UPDATE, &lp,
- &_lvconvert_merge_single);
+ ret = process_each_lv(cmd, argc, argv, READ_FOR_UPDATE, handle,
+ &_lvconvert_merge_single);
+ else
+ ret = lvconvert_single(cmd, &lp);
- return lvconvert_single(cmd, &lp);
+ dm_list_iterate_items(idl, &lp.idls) {
+ poll_ret = _lvconvert_poll_by_id(cmd, idl->id,
+ lp.wait_completion ? 0 : 1U,
+ idl->is_merging_origin,
+ idl->is_merging_origin_thin);
+ if (poll_ret > ret)
+ ret = poll_ret;
+ }
+
+out:
+ if (!dm_list_empty(&lp.idls))
+ dm_pool_free(cmd->mem, dm_list_item(dm_list_first(&lp.idls), struct convert_poll_id_list));
+ if (lp.policy_settings)
+ dm_config_destroy(lp.policy_settings);
+ destroy_processing_handle(cmd, handle);
+ return ret;
}
diff --git a/tools/lvconvert_poll.c b/tools/lvconvert_poll.c
new file mode 100644
index 000000000..e3a370986
--- /dev/null
+++ b/tools/lvconvert_poll.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "tools.h"
+
+#include "lvconvert_poll.h"
+
+int lvconvert_mirror_finish(struct cmd_context *cmd,
+ struct volume_group *vg,
+ struct logical_volume *lv,
+ struct dm_list *lvs_changed __attribute__((unused)))
+{
+ if (!lv_is_converting(lv))
+ return 1;
+
+ if (!collapse_mirrored_lv(lv)) {
+ log_error("Failed to remove temporary sync layer.");
+ return 0;
+ }
+
+ lv->status &= ~CONVERTING;
+
+ if (!lv_update_and_reload(lv))
+ return_0;
+
+ log_print_unless_silent("Logical volume %s converted.", lv->name);
+
+ return 1;
+}
+
+/* Swap lvid and LV names */
+int swap_lv_identifiers(struct cmd_context *cmd,
+ struct logical_volume *a, struct logical_volume *b)
+{
+ union lvid lvid;
+ const char *name;
+
+ lvid = a->lvid;
+ a->lvid = b->lvid;
+ b->lvid = lvid;
+
+ name = a->name;
+ a->name = b->name;
+ if (!lv_rename_update(cmd, b, name, 0))
+ return_0;
+
+ return 1;
+}
+
+static void _move_lv_attributes(struct logical_volume *to, struct logical_volume *from)
+{
+ /* Maybe move this code into thin_merge_finish() */
+ to->status = from->status; // FIXME maybe some masking ?
+ to->alloc = from->alloc;
+ to->profile = from->profile;
+ to->read_ahead = from->read_ahead;
+ to->major = from->major;
+ to->minor = from->minor;
+ to->timestamp = from->timestamp;
+ to->hostname = from->hostname;
+
+ /* Move tags */
+ dm_list_init(&to->tags);
+ dm_list_splice(&to->tags, &from->tags);
+
+ /* Anything else to preserve? */
+}
+
+/* Finalise merging of lv into merge_lv */
+int thin_merge_finish(struct cmd_context *cmd,
+ struct logical_volume *merge_lv,
+ struct logical_volume *lv)
+{
+ if (!swap_lv_identifiers(cmd, merge_lv, lv)) {
+ log_error("Failed to swap %s with merging %s.",
+ lv->name, merge_lv->name);
+ return 0;
+ }
+
+ /* Preserve origins' attributes */
+ _move_lv_attributes(lv, merge_lv);
+
+ /* Removed LV has to be visible */
+ if (!lv_remove_single(cmd, merge_lv, DONT_PROMPT, 1))
+ return_0;
+
+ return 1;
+}
+
+int lvconvert_merge_finish(struct cmd_context *cmd,
+ struct volume_group *vg,
+ struct logical_volume *lv,
+ struct dm_list *lvs_changed __attribute__((unused)))
+{
+ struct lv_segment *snap_seg = find_snapshot(lv);
+
+ if (!lv_is_merging_origin(lv)) {
+ log_error("Logical volume %s has no merging snapshot.", lv->name);
+ return 0;
+ }
+
+ log_print_unless_silent("Merge of snapshot into logical volume %s has finished.", lv->name);
+
+ if (seg_is_thin_volume(snap_seg)) {
+ clear_snapshot_merge(lv);
+
+ if (!thin_merge_finish(cmd, lv, snap_seg->lv))
+ return_0;
+
+ } else if (!lv_remove_single(cmd, snap_seg->cow, DONT_PROMPT, 0)) {
+ log_error("Could not remove snapshot %s merged into %s.",
+ snap_seg->cow->name, lv->name);
+ return 0;
+ }
+
+ return 1;
+}
+
+progress_t poll_merge_progress(struct cmd_context *cmd,
+ struct logical_volume *lv,
+ const char *name __attribute__((unused)),
+ struct daemon_parms *parms)
+{
+ dm_percent_t percent = DM_PERCENT_0;
+
+ if (!lv_is_merging_origin(lv) ||
+ !lv_snapshot_percent(lv, &percent)) {
+ log_error("%s: Failed query for merging percentage. Aborting merge.", lv->name);
+ return PROGRESS_CHECK_FAILED;
+ } else if (percent == DM_PERCENT_INVALID) {
+ log_error("%s: Merging snapshot invalidated. Aborting merge.", lv->name);
+ return PROGRESS_CHECK_FAILED;
+ } else if (percent == LVM_PERCENT_MERGE_FAILED) {
+ log_error("%s: Merge failed. Retry merge or inspect manually.", lv->name);
+ return PROGRESS_CHECK_FAILED;
+ }
+
+ if (parms->progress_display)
+ log_print_unless_silent("%s: %s: %.1f%%", lv->name, parms->progress_title,
+ 100.0 - dm_percent_to_float(percent));
+ else
+ log_verbose("%s: %s: %.1f%%", lv->name, parms->progress_title,
+ 100.0 - dm_percent_to_float(percent));
+
+ if (percent == DM_PERCENT_0)
+ return PROGRESS_FINISHED_ALL;
+
+ return PROGRESS_UNFINISHED;
+}
+
+progress_t poll_thin_merge_progress(struct cmd_context *cmd,
+ struct logical_volume *lv,
+ const char *name __attribute__((unused)),
+ struct daemon_parms *parms)
+{
+ uint32_t device_id;
+
+ if (!lv_thin_device_id(lv, &device_id)) {
+ stack;
+ return PROGRESS_CHECK_FAILED;
+ }
+
+ /*
+ * There is no need to poll more than once,
+ * a thin snapshot merge is immediate.
+ */
+
+ if (device_id != find_snapshot(lv)->device_id) {
+ log_error("LV %s is not merged.", lv->name);
+ return PROGRESS_CHECK_FAILED;
+ }
+
+ return PROGRESS_FINISHED_ALL; /* Merging happend */
+}
diff --git a/tools/lvconvert_poll.h b/tools/lvconvert_poll.h
new file mode 100644
index 000000000..7c97ec870
--- /dev/null
+++ b/tools/lvconvert_poll.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVCONVERT_H
+#define _LVM_LVCONVERT_H
+
+#include "polldaemon.h"
+
+struct cmd_context;
+struct logical_volume;
+struct volume_group;
+
+int lvconvert_mirror_finish(struct cmd_context *cmd,
+ struct volume_group *vg,
+ struct logical_volume *lv,
+ struct dm_list *lvs_changed __attribute__((unused)));
+
+int swap_lv_identifiers(struct cmd_context *cmd,
+ struct logical_volume *a, struct logical_volume *b);
+
+int thin_merge_finish(struct cmd_context *cmd,
+ struct logical_volume *merge_lv,
+ struct logical_volume *lv);
+
+int lvconvert_merge_finish(struct cmd_context *cmd,
+ struct volume_group *vg,
+ struct logical_volume *lv,
+ struct dm_list *lvs_changed __attribute__((unused)));
+
+progress_t poll_merge_progress(struct cmd_context *cmd,
+ struct logical_volume *lv,
+ const char *name __attribute__((unused)),
+ struct daemon_parms *parms);
+
+progress_t poll_thin_merge_progress(struct cmd_context *cmd,
+ struct logical_volume *lv,
+ const char *name __attribute__((unused)),
+ struct daemon_parms *parms);
+
+#endif /* _LVM_LVCONVERT_H */
diff --git a/tools/lvcreate.c b/tools/lvcreate.c
index b79001765..a779aab51 100644
--- a/tools/lvcreate.c
+++ b/tools/lvcreate.c
@@ -17,15 +17,6 @@
#include <fcntl.h>
-/* HM FIXME: REMOVEME: devel output */
-#ifdef USE_PFL
-#define PFL() printf("%s %u\n", __func__, __LINE__);
-#define PFLA(format, arg...) printf("%s %u " format "\n", __func__, __LINE__, arg);
-#else
-#define PFL()
-#define PFLA(format, arg...)
-#endif
-
struct lvcreate_cmdline_params {
percent_type_t percent;
uint64_t size;
@@ -462,28 +453,52 @@ static int _read_mirror_params(struct cmd_context *cmd,
static int _read_raid_params(struct cmd_context *cmd,
struct lvcreate_params *lp)
{
- if ((lp->stripes < 2) && segtype_is_raid10(lp->segtype)) {
- if (arg_count(cmd, stripes_ARG)) {
- /* User supplied the bad argument */
- log_error("Segment type 'raid10' requires 2 or more stripes.");
- return 0;
+ if (seg_is_any_raid10(lp)) {
+ if (lp->stripes * lp->mirrors < 2) {
+ if (arg_count(cmd, stripes_ARG) || arg_count(cmd, mirrors_ARG)) {
+ /* User supplied the bad argument */
+ log_error("Segment type '%s' requires 3 or more devices.", lp->segtype->name);
+ return 0;
+ }
+
+ /* No stripe argument was given - default to 3 */
+ lp->stripes = 3;
+ log_warn("Defaulting to %u stripes with %s", lp->stripes, lp->segtype->name);
}
- /* No stripe argument was given - default to 2 */
- lp->stripes = 2;
- lp->stripe_size = find_config_tree_int(cmd, metadata_stripesize_CFG, NULL) * 2;
}
+ if (seg_is_raid01(lp)) {
+ if (lp->stripes < 2) {
+ lp->stripes = 2;
+ log_warn("Defaulting to %u stripes with %s", lp->stripes, lp->segtype->name);
+ }
+ if (lp->mirrors < 2) {
+ lp->mirrors = 2;
+ log_warn("Defaulting to %u mirrors with %s", lp->mirrors, lp->segtype->name);
+ }
+ }
+
+ if (!lp->stripe_size && !seg_is_raid1(lp))
+ lp->stripe_size = find_config_tree_int(cmd, metadata_stripesize_CFG, NULL) * 2;
+
/*
* RAID1 does not take a stripe arg
*/
if ((lp->stripes > 1) &&
- (seg_is_mirrored(lp) || segtype_is_raid1(lp->segtype)) &&
- !segtype_is_raid10(lp->segtype)) {
+ ((seg_is_mirrored(lp) && !seg_is_raid01(lp)) || seg_is_raid1(lp)) &&
+ !seg_is_any_raid10(lp)) {
log_error("Stripe argument cannot be used with segment type, %s",
lp->segtype->name);
return 0;
}
+ if (arg_count(cmd, mirrors_ARG) &&
+ !(seg_is_raid1(lp) ||seg_is_raid01(lp) || seg_is_any_raid10(lp))) {
+ log_error("Mirror argument cannot be used with segment type, %s",
+ lp->segtype->name);
+ return 0;
+ }
+
/* Rates are recorded in kiB/sec/disk, not sectors/sec/disk */
lp->min_recovery_rate = arg_uint_value(cmd, minrecoveryrate_ARG, 0) / 2;
lp->max_recovery_rate = arg_uint_value(cmd, maxrecoveryrate_ARG, 0) / 2;
@@ -513,6 +528,9 @@ static int _read_mirror_and_raid_params(struct cmd_context *cmd,
if (segtype_is_raid1(lp->segtype)) {
type = SEG_TYPE_NAME_RAID1;
max_images = DEFAULT_RAID_MAX_IMAGES;
+ } else if (segtype_is_any_raid10(lp->segtype)) {
+ type = lp->segtype->name;
+ max_images = DEFAULT_RAID_MAX_IMAGES;
} else {
type = "mirror";
max_images = DEFAULT_MIRROR_MAX_IMAGES;
@@ -524,15 +542,10 @@ static int _read_mirror_and_raid_params(struct cmd_context *cmd,
return 0;
}
- if (lp->mirrors > 2 &&
- segtype_is_raid10(lp->segtype)) {
- /*
- * FIXME: When RAID10 is no longer limited to
- * 2-way mirror, 'lv_mirror_count()'
- * must also change for RAID10.
- */
- log_error("RAID10 currently supports "
- "only 2-way mirroring (i.e. '-m 1')");
+PFLA("lp->mirrors=%u lp->stripes=%u", lp->mirrors, lp->stripes);
+ if (segtype_is_any_raid10(lp->segtype) &&
+ lp->mirrors > lp->stripes) {
+ log_error("RAID10 mirrors have to be less than stripes (i.e. -mN with N < #stripes)");
return 0;
}
@@ -547,12 +560,20 @@ static int _read_mirror_and_raid_params(struct cmd_context *cmd,
/* Default to 2 mirrored areas if '--type mirror|raid1|raid10' */
lp->mirrors = seg_is_mirrored(lp) ? 2 : 1;
-PFLA("mirrors=%u stripes=%u", lp->mirrors, lp->stripes);
- if (lp->stripes < 2 &&
- (segtype_is_any_raid0(lp->segtype) || segtype_is_raid10(lp->segtype)))
+PFLA("lp->mirrors=%u lp->stripes=%u", lp->mirrors, lp->stripes);
+ if (lp->stripes < 2 && segtype_is_any_raid0(lp->segtype))
if (arg_count(cmd, stripes_ARG)) {
/* User supplied the bad argument */
- log_error("Segment type 'raid(1)0' requires 2 or more stripes.");
+PFL();
+ log_error("Segment type '%s' requires 2 or more stripes.", lp->segtype->name);
+ return 0;
+ }
+
+ if (lp->stripes * lp->mirrors < 2 && segtype_is_raid10_near(lp->segtype))
+ if (arg_count(cmd, stripes_ARG) || arg_count(cmd, mirrors_ARG)) {
+ /* User supplied the bad arguments */
+PFL();
+ log_error("Segment type '%s' requires 2 or more stripes.", lp->segtype->name);
return 0;
}
@@ -570,26 +591,6 @@ PFLA("mirrors=%u stripes=%u", lp->mirrors, lp->stripes);
return 0;
}
- /*
- * RAID1 does not take a stripe arg
- */
- if ((lp->stripes > 1) &&
- (seg_is_mirrored(lp) || segtype_is_raid1(lp->segtype)) &&
- !segtype_is_any_raid0(lp->segtype) &&
- !segtype_is_raid10(lp->segtype)) {
- log_error("Stripe argument cannot be used with segment type, %s",
- lp->segtype->name);
- return 0;
- }
-
- if (arg_count(cmd, mirrors_ARG) && segtype_is_raid(lp->segtype) &&
- !segtype_is_raid1(lp->segtype) &&
- !segtype_is_raid10(lp->segtype)) {
- log_error("Mirror argument cannot be used with segment type, %s",
- lp->segtype->name);
- return 0;
- }
-
if (lp->region_size % (pagesize >> SECTOR_SHIFT)) {
log_error("Region size (%" PRIu32 ") must be a multiple of "
"machine memory page size (%d)",
@@ -609,22 +610,20 @@ PFLA("mirrors=%u stripes=%u", lp->mirrors, lp->stripes);
static int _read_cache_params(struct cmd_context *cmd,
struct lvcreate_params *lp)
{
- const char *cachemode;
-
if (!seg_is_cache(lp) && !seg_is_cache_pool(lp))
return 1;
- if (!(cachemode = arg_str_value(cmd, cachemode_ARG, NULL)))
- cachemode = find_config_tree_str(cmd, allocation_cache_pool_cachemode_CFG, NULL);
-
- if (!set_cache_pool_feature(&lp->feature_flags, cachemode))
+ if (!get_cache_params(cmd,
+ &lp->cache_mode,
+ &lp->policy_name,
+ &lp->policy_settings))
return_0;
return 1;
}
static int _read_activation_params(struct cmd_context *cmd,
- struct volume_group *vg,
+ struct volume_group *vg,
struct lvcreate_params *lp)
{
unsigned pagesize = lvm_getpagesize() >> SECTOR_SHIFT;
@@ -632,6 +631,14 @@ static int _read_activation_params(struct cmd_context *cmd,
lp->activate = (activation_change_t)
arg_uint_value(cmd, activate_ARG, CHANGE_AY);
+ /* Error when full */
+ if (arg_is_set(cmd, errorwhenfull_ARG)) {
+ lp->error_when_full = arg_uint_value(cmd, errorwhenfull_ARG, 0);
+ } else
+ lp->error_when_full =
+ seg_can_error_when_full(lp) &&
+ find_config_tree_bool(cmd, activation_error_when_full_CFG, NULL);
+
/* Read ahead */
lp->read_ahead = arg_uint_value(cmd, readahead_ARG,
cmd->default_settings.read_ahead);
@@ -906,6 +913,7 @@ static int _lvcreate_params(struct cmd_context *cmd,
SIZE_ARGS,
THIN_POOL_ARGS,
chunksize_ARG,
+ errorwhenfull_ARG,
snapshot_ARG,
thin_ARG,
virtualsize_ARG,
@@ -925,6 +933,7 @@ static int _lvcreate_params(struct cmd_context *cmd,
SIZE_ARGS,
chunksize_ARG,
discards_ARG,
+ errorwhenfull_ARG,
zero_ARG,
-1))
return_0;
@@ -991,6 +1000,12 @@ static int _lvcreate_params(struct cmd_context *cmd,
-1))
return_0;
+ if (!seg_can_error_when_full(lp) && !lp->create_pool &&
+ arg_is_set(cmd, errorwhenfull_ARG)) {
+ log_error("Segment type %s does not support --errorwhenfull.", lp->segtype->name);
+ return 0;
+ }
+
/* Basic segment type validation finished here */
if (activation() && lp->segtype->ops->target_present) {
@@ -1000,13 +1015,13 @@ static int _lvcreate_params(struct cmd_context *cmd,
return 0;
}
- if (segtype_is_raid0(lp->segtype) &&
+ if (segtype_is_any_raid0(lp->segtype) &&
!(lp->target_attr & RAID_FEATURE_RAID0)) {
log_error("RAID module does not support RAID0.");
return 0;
}
- if (segtype_is_raid10(lp->segtype) &&
+ if (segtype_is_any_raid10(lp->segtype) &&
!(lp->target_attr & RAID_FEATURE_RAID10)) {
log_error("RAID module does not support RAID10.");
return 0;
@@ -1068,13 +1083,6 @@ static int _lvcreate_params(struct cmd_context *cmd,
return 0;
}
- if ((arg_count(cmd, cachepolicy_ARG) || arg_count(cmd, cachesettings_ARG)) &&
- !(lp->cache_policy = get_cachepolicy_params(cmd)))
- {
- log_error("Failed to parse cache policy and/or settings.");
- return 0;
- }
-
dm_list_iterate_items(current_group, &cmd->arg_value_groups) {
if (!grouped_arg_is_set(current_group->arg_values, addtag_ARG))
continue;
@@ -1123,8 +1131,6 @@ static int _determine_cache_argument(struct volume_group *vg,
/* If cache args not given, use those from cache pool */
if (!arg_is_set(cmd, chunksize_ARG))
lp->chunk_size = first_seg(lv)->chunk_size;
- if (!arg_is_set(cmd, cachemode_ARG))
- lp->feature_flags = first_seg(lv)->feature_flags;
} else if (lv) {
/* Origin exists, create cache pool volume */
if (!validate_lv_cache_create_origin(lv))
@@ -1237,31 +1243,37 @@ static int _check_raid_parameters(struct volume_group *vg,
struct cmd_context *cmd = vg->cmd;
/*
- * If number of devices was not supplied, we can infer from
- * the PVs given.
+ * If number of devices was not supplied, limit
+ * so we ain't get too many stripes with many PVs
*/
if (!seg_is_mirrored(lp)) {
if (!arg_count(cmd, stripes_ARG) &&
- (devs > 2 * lp->segtype->parity_devs)) {
- lp->stripes = devs - lp->segtype->parity_devs;
-lp->stripes = 2; /* Or stripe bomb with many devs given */
- }
+ (devs > 2 * lp->segtype->parity_devs))
+ lp->stripes = devs < 3 ? devs : 3; /* Or stripe bomb with many allocatable PVs given */
if (!lp->stripe_size)
lp->stripe_size = find_config_tree_int(cmd, metadata_stripesize_CFG, NULL) * 2;
- if (lp->stripes < 2) { // <= lp->segtype->parity_devs) {
+ if (lp->stripes < lp->segtype->parity_devs) {
log_error("Number of stripes must be at least %d for %s",
- lp->segtype->parity_devs + 1,
+ lp->segtype->parity_devs,
lp->segtype->name);
return 0;
}
} else if (segtype_is_any_raid0(lp->segtype) ||
- segtype_is_raid10(lp->segtype)) {
+ segtype_is_any_raid10(lp->segtype)) {
+PFLA("lp->stripes=%u lp->mirrors=%u", lp->stripes, lp->mirrors);
+
+ if (!lp->mirrors && seg_is_any_raid10(lp))
+ lp->mirrors = 1;
+
if (!arg_count(cmd, stripes_ARG))
- lp->stripes = devs / lp->mirrors;
+ lp->stripes = devs < 3 ? devs : 3; /* Or stripe bomb with many allocatable PVs given */
+
+PFLA("lp->stripes=%u lp->mirrors=%u", lp->stripes, lp->mirrors);
+
if (lp->stripes < 2) {
- log_error("Unable to create RAID(1)0 LV,"
+ log_error("Unable to create RAID LV,"
" insufficient number of devices.");
return 0;
}
@@ -1474,9 +1486,10 @@ static int _validate_internal_thin_processing(const struct lvcreate_params *lp)
static void _destroy_lvcreate_params(struct lvcreate_params *lp)
{
- if (lp->cache_policy)
- dm_config_destroy(lp->cache_policy);
- lp->cache_policy = NULL;
+ if (lp->policy_settings) {
+ dm_config_destroy(lp->policy_settings);
+ lp->policy_settings = NULL;
+ }
}
int lvcreate(struct cmd_context *cmd, int argc, char **argv)
@@ -1488,19 +1501,24 @@ int lvcreate(struct cmd_context *cmd, int argc, char **argv)
};
struct lvcreate_cmdline_params lcp = { 0 };
struct volume_group *vg;
+ uint32_t lockd_state = 0;
if (!_lvcreate_params(cmd, argc, argv, &lp, &lcp)) {
stack;
return EINVALID_CMD_LINE;
}
+PFLA("lp.stripe_size=%u", lp.stripe_size);
if (!_check_pool_parameters(cmd, NULL, &lp, &lcp)) {
stack;
return EINVALID_CMD_LINE;
}
+ if (!lockd_vg(cmd, lp.vg_name, "ex", 0, &lockd_state))
+ return_ECMD_FAILED;
+
log_verbose("Finding volume group \"%s\"", lp.vg_name);
- vg = vg_read_for_update(cmd, lp.vg_name, NULL, 0);
+ vg = vg_read_for_update(cmd, lp.vg_name, NULL, 0, lockd_state);
if (vg_read_error(vg)) {
release_vg(vg);
return_ECMD_FAILED;
@@ -1545,6 +1563,13 @@ int lvcreate(struct cmd_context *cmd, int argc, char **argv)
lp.pool_name ? : "with generated name", lp.vg_name, lp.segtype->name);
}
+ if (vg->lock_type && !strcmp(vg->lock_type, "sanlock")) {
+ if (!handle_sanlock_lv(cmd, vg)) {
+ log_error("No space for sanlock lock, extend the internal lvmlock LV.");
+ goto_out;
+ }
+ }
+
if (seg_is_thin_volume(&lp))
log_verbose("Making thin LV %s in pool %s in VG %s%s%s using segtype %s",
lp.lv_name ? : "with generated name",
@@ -1552,6 +1577,9 @@ int lvcreate(struct cmd_context *cmd, int argc, char **argv)
lp.snapshot ? " as snapshot of " : "",
lp.snapshot ? lp.origin_name : "", lp.segtype->name);
+ if (is_lockd_type(vg->lock_type))
+ lp.needs_lockd_init = 1;
+
if (!lv_create_single(vg, &lp))
goto_out;
diff --git a/tools/lvdisplay.c b/tools/lvdisplay.c
index fb3420a56..7d900cc70 100644
--- a/tools/lvdisplay.c
+++ b/tools/lvdisplay.c
@@ -16,7 +16,7 @@
#include "tools.h"
static int _lvdisplay_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle __attribute__ ((unused)))
{
if (!arg_count(cmd, all_ARG) && !lv_is_visible(lv))
return ECMD_PROCESSED;
@@ -24,7 +24,7 @@ static int _lvdisplay_single(struct cmd_context *cmd, struct logical_volume *lv,
if (arg_count(cmd, colon_ARG))
lvdisplay_colons(lv);
else {
- lvdisplay_full(cmd, lv, handle);
+ lvdisplay_full(cmd, lv, NULL);
if (arg_count(cmd, maps_ARG))
lvdisplay_segments(lv);
}
@@ -46,7 +46,6 @@ int lvdisplay(struct cmd_context *cmd, int argc, char **argv)
arg_count(cmd, binary_ARG) ||
arg_count(cmd, noheadings_ARG) ||
arg_count(cmd, options_ARG) ||
- arg_count(cmd, select_ARG) ||
arg_count(cmd, separator_ARG) ||
arg_count(cmd, sort_ARG) ||
arg_count(cmd, unbuffered_ARG)) {
diff --git a/tools/lvm-static.c b/tools/lvm-static.c
index 1be4c2416..920127bec 100644
--- a/tools/lvm-static.c
+++ b/tools/lvm-static.c
@@ -14,6 +14,7 @@
*/
#include "tools.h"
+
#include "lvm2cmdline.h"
int main(int argc, char **argv)
diff --git a/tools/lvm.c b/tools/lvm.c
index f9ab438b2..63cc0b4ad 100644
--- a/tools/lvm.c
+++ b/tools/lvm.c
@@ -14,6 +14,7 @@
*/
#include "tools.h"
+
#include "lvm2cmdline.h"
int main(int argc, char **argv)
diff --git a/tools/lvm2cmdline.h b/tools/lvm2cmdline.h
index 5c4889e1e..fe77d569a 100644
--- a/tools/lvm2cmdline.h
+++ b/tools/lvm2cmdline.h
@@ -31,7 +31,7 @@ int lvm2_main(int argc, char **argv);
void *cmdlib_lvm2_init(unsigned static_compile);
void lvm_fin(struct cmd_context *cmd);
-struct cmd_context *init_lvm(void);
+struct cmd_context *init_lvm(unsigned set_connections, unsigned set_filters);
void lvm_register_commands(void);
int lvm_split(char *str, int *argc, char **argv, int max);
int lvm_run_command(struct cmd_context *cmd, int argc, char **argv);
diff --git a/tools/lvmcmdlib.c b/tools/lvmcmdlib.c
index 6b641c239..26f160f88 100644
--- a/tools/lvmcmdlib.c
+++ b/tools/lvmcmdlib.c
@@ -14,6 +14,7 @@
*/
#include "tools.h"
+
#include "lvm2cmdline.h"
#include "label.h"
#include "memlock.h"
@@ -32,7 +33,7 @@ void *cmdlib_lvm2_init(unsigned static_compile)
lvm_register_commands();
init_is_static(static_compile);
- if (!(cmd = init_lvm()))
+ if (!(cmd = init_lvm(1, 1)))
return NULL;
return (void *) cmd;
diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c
index 6149dbc52..b7809c727 100644
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -14,9 +14,11 @@
*/
#include "tools.h"
+
#include "lvm2cmdline.h"
#include "label.h"
#include "lvm-version.h"
+#include "lvmlockd.h"
#include "stub.h"
#include "last-path-component.h"
@@ -29,6 +31,10 @@
#include <paths.h>
#include <locale.h>
+#ifdef HAVE_VALGRIND
+#include <valgrind.h>
+#endif
+
#ifdef HAVE_GETOPTLONG
# include <getopt.h>
# define GETOPTLONG_FN(a, b, c, d, e) getopt_long((a), (b), (c), (d), (e))
@@ -282,6 +288,12 @@ int activation_arg(struct cmd_context *cmd __attribute__((unused)), struct arg_v
av->ui_value = CHANGE_AEY;
}
+ else if (!strcmp(av->value, "s") || !strcmp(av->value, "sy") ||
+ !strcmp(av->value, "ys")) {
+ av->i_value = CHANGE_ASY;
+ av->ui_value = CHANGE_ASY;
+ }
+
else if (!strcmp(av->value, "y")) {
av->i_value = CHANGE_AY;
av->ui_value = CHANGE_AY;
@@ -615,6 +627,19 @@ int alloc_arg(struct cmd_context *cmd __attribute__((unused)), struct arg_values
return 1;
}
+int locktype_arg(struct cmd_context *cmd __attribute__((unused)), struct arg_values *av)
+{
+ lock_type_t lock_type;
+
+ av->sign = SIGN_NONE;
+
+ lock_type = get_lock_type_from_string(av->value);
+ if (lock_type == LOCK_TYPE_INVALID)
+ return 0;
+
+ return 1;
+}
+
int segtype_arg(struct cmd_context *cmd, struct arg_values *av)
{
struct segment_type *segtype;
@@ -973,6 +998,15 @@ static int _merge_synonym(struct cmd_context *cmd, int oldarg, int newarg)
return 1;
}
+int systemid(struct cmd_context *cmd __attribute__((unused)),
+ int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ log_print("system ID: %s", cmd->system_id ? : "");
+
+ return ECMD_PROCESSED;
+}
+
int version(struct cmd_context *cmd __attribute__((unused)),
int argc __attribute__((unused)),
char **argv __attribute__((unused)))
@@ -1026,6 +1060,9 @@ static int _get_settings(struct cmd_context *cmd)
cmd->current_settings.backup = 0;
}
+ if (cmd->command->flags & LOCKD_VG_SH)
+ cmd->lockd_vg_default_sh = 1;
+
cmd->partial_activation = 0;
cmd->degraded_activation = 0;
activation_mode = find_config_tree_str(cmd, activation_mode_CFG, NULL);
@@ -1061,8 +1098,16 @@ static int _get_settings(struct cmd_context *cmd)
else
init_ignorelockingfailure(0);
- cmd->ignore_clustered_vgs = arg_count(cmd, ignoreskippedcluster_ARG) ? 1 : 0;
+ cmd->ignore_clustered_vgs = arg_is_set(cmd, ignoreskippedcluster_ARG);
+ cmd->include_foreign_vgs = arg_is_set(cmd, foreign_ARG) ? 1 : 0;
+ cmd->include_shared_vgs = arg_is_set(cmd, shared_ARG) ? 1 : 0;
+ /*
+ * This is set to zero by process_each which wants to print errors
+ * itself rather than having them printed in vg_read.
+ */
+ cmd->vg_read_print_access_error = 1;
+
if (!arg_count(cmd, sysinit_ARG))
lvmetad_connect_or_warn();
@@ -1232,23 +1277,47 @@ static const char *_copy_command_line(struct cmd_context *cmd, int argc, char **
static int _prepare_profiles(struct cmd_context *cmd)
{
+ static const char COMMAND_PROFILE_ENV_VAR_NAME[] = "LVM_COMMAND_PROFILE";
+ static const char _cmd_profile_arg_preferred_over_env_var_msg[] = "Giving "
+ "preference to command profile specified on command "
+ "line over the one specified via environment variable.";
static const char _failed_to_add_profile_msg[] = "Failed to add %s %s.";
static const char _failed_to_apply_profile_msg[] = "Failed to apply %s %s.";
static const char _command_profile_source_name[] = "command profile";
static const char _metadata_profile_source_name[] = "metadata profile";
static const char _setting_global_profile_msg[] = "Setting global %s \"%s\".";
+ const char *env_cmd_profile_name = NULL;
const char *name;
struct profile *profile;
config_source_t source;
const char *source_name;
+ /* Check whether default global command profile is set via env. var. */
+ if ((env_cmd_profile_name = getenv(COMMAND_PROFILE_ENV_VAR_NAME))) {
+ if (!*env_cmd_profile_name)
+ env_cmd_profile_name = NULL;
+ else
+ log_debug("Command profile '%s' requested via "
+ "environment variable.",
+ env_cmd_profile_name);
+ }
+
+ if (!arg_count(cmd, profile_ARG) &&
+ !arg_count(cmd, commandprofile_ARG) &&
+ !arg_count(cmd, metadataprofile_ARG) &&
+ !env_cmd_profile_name)
+ /* nothing to do */
+ return 1;
+
if (arg_count(cmd, profile_ARG)) {
/*
* If --profile is used with dumpconfig, it's used
* to dump the profile without the profile being applied.
*/
- if (!strcmp(cmd->command->name, "dumpconfig"))
+ if (!strcmp(cmd->command->name, "dumpconfig") ||
+ !strcmp(cmd->command->name, "lvmconfig") ||
+ !strcmp(cmd->command->name, "config"))
return 1;
/*
@@ -1274,6 +1343,15 @@ static int _prepare_profiles(struct cmd_context *cmd)
"--commandprofile allowed.");
return 0;
}
+ /*
+ * Prefer command profile specified on command
+ * line over the profile specified via
+ * COMMAND_PROFILE_ENV_VAR_NAME env. var.
+ */
+ if (env_cmd_profile_name) {
+ log_debug(_cmd_profile_arg_preferred_over_env_var_msg);
+ env_cmd_profile_name = NULL;
+ }
source = CONFIG_PROFILE_COMMAND;
source_name = _command_profile_source_name;
}
@@ -1301,8 +1379,18 @@ static int _prepare_profiles(struct cmd_context *cmd)
}
- if (arg_count(cmd, commandprofile_ARG)) {
- name = arg_str_value(cmd, commandprofile_ARG, NULL);
+ if (arg_count(cmd, commandprofile_ARG) || env_cmd_profile_name) {
+ if (arg_count(cmd, commandprofile_ARG)) {
+ /*
+ * Prefer command profile specified on command
+ * line over the profile specified via
+ * COMMAND_PROFILE_ENV_VAR_NAME env. var.
+ */
+ if (env_cmd_profile_name)
+ log_debug(_cmd_profile_arg_preferred_over_env_var_msg);
+ name = arg_str_value(cmd, commandprofile_ARG, NULL);
+ } else
+ name = env_cmd_profile_name;
source_name = _command_profile_source_name;
if (!(profile = add_profile(cmd, name, CONFIG_PROFILE_COMMAND))) {
@@ -1342,6 +1430,44 @@ static int _prepare_profiles(struct cmd_context *cmd)
return 1;
}
+static int _init_lvmlockd(struct cmd_context *cmd)
+{
+ const char *lvmlockd_socket;
+ int use_lvmlockd = find_config_tree_bool(cmd, global_use_lvmlockd_CFG, NULL);
+
+ if (use_lvmlockd && arg_count(cmd, nolocking_ARG)) {
+ /* --nolocking is only allowed with vgs/lvs/pvs commands */
+ cmd->lockd_gl_disable = 1;
+ cmd->lockd_vg_disable = 1;
+ cmd->lockd_lv_disable = 1;
+ return 1;
+ }
+
+ if (use_lvmlockd && locking_is_clustered()) {
+ log_error("ERROR: configuration setting use_lvmlockd cannot be used with clustered locking_type 3.");
+ return 0;
+ }
+
+ lvmlockd_disconnect(); /* start over when tool context is refreshed */
+ lvmlockd_socket = getenv("LVM_LVMLOCKD_SOCKET");
+ if (!lvmlockd_socket)
+ lvmlockd_socket = DEFAULT_RUN_DIR "/lvmlockd.socket";
+
+ lvmlockd_set_socket(lvmlockd_socket);
+ lvmlockd_set_use(use_lvmlockd);
+ if (use_lvmlockd) {
+ lvmlockd_init(cmd);
+ lvmlockd_connect();
+ }
+
+ return 1;
+}
+
+static int _cmd_no_meta_proc(struct cmd_context *cmd)
+{
+ return cmd->command->flags & NO_METADATA_PROCESSING;
+}
+
int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
{
struct dm_config_tree *config_string_cft;
@@ -1349,12 +1475,48 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
int ret = 0;
int locking_type;
int monitoring;
+ char *arg_new, *arg;
+ int i;
+ int skip_hyphens;
init_error_message_produced(0);
/* each command should start out with sigint flag cleared */
sigint_clear();
+ /* eliminate '-' from all options starting with -- */
+ for (i = 1; i < argc; i++) {
+
+ arg = argv[i];
+
+ if (*arg++ != '-' || *arg++ != '-')
+ continue;
+
+ /* If we reach "--" then stop. */
+ if (!*arg)
+ break;
+
+ arg_new = arg;
+ skip_hyphens = 1;
+ while (*arg) {
+ /* If we encounter '=', stop any further hyphen removal. */
+ if (*arg == '=')
+ skip_hyphens = 0;
+
+ /* Do we need to keep the next character? */
+ if (*arg != '-' || !skip_hyphens) {
+ if (arg_new != arg)
+ *arg_new = *arg;
+ ++arg_new;
+ }
+ arg++;
+ }
+
+ /* Terminate a shortened arg */
+ if (arg_new != arg)
+ *arg_new = '\0';
+ }
+
if (!(cmd->cmd_line = _copy_command_line(cmd, argc, argv)))
return_ECMD_FAILED;
@@ -1384,7 +1546,7 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
goto_out;
}
- if (arg_count(cmd, config_ARG) || !cmd->config_initialized || config_files_changed(cmd)) {
+ if (arg_count(cmd, config_ARG) || !cmd->initialized.config || config_files_changed(cmd)) {
/* Reinitialise various settings inc. logging, filters */
if (!refresh_toolcontext(cmd)) {
if ((config_string_cft = remove_config_tree_by_source(cmd, CONFIG_STRING)))
@@ -1394,12 +1556,14 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
}
}
- if (arg_count(cmd, profile_ARG) ||
- arg_count(cmd, commandprofile_ARG) ||
- arg_count(cmd, metadataprofile_ARG)) {
- if (!_prepare_profiles(cmd))
- return_ECMD_FAILED;
- }
+ if (!_prepare_profiles(cmd))
+ return_ECMD_FAILED;
+
+ if (!cmd->initialized.connections && !_cmd_no_meta_proc(cmd) && !init_connections(cmd))
+ return_ECMD_FAILED;
+
+ if (!cmd->initialized.filters && !_cmd_no_meta_proc(cmd) && !init_filters(cmd, 1))
+ return_ECMD_FAILED;
if (arg_count(cmd, readonly_ARG))
cmd->metadata_read_only = 1;
@@ -1415,6 +1579,7 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
init_dmeventd_monitor(monitoring);
log_debug("Processing: %s", cmd->cmd_line);
+ log_debug("system ID: %s", cmd->system_id ? : "");
#ifdef O_DIRECT_SUPPORT
log_debug("O_DIRECT will be used");
@@ -1433,8 +1598,22 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
goto out;
}
- if (arg_count(cmd, readonly_ARG)) {
- locking_type = 5;
+ if (_cmd_no_meta_proc(cmd))
+ locking_type = 0;
+ else if (arg_count(cmd, readonly_ARG)) {
+ if (find_config_tree_bool(cmd, global_use_lvmlockd_CFG, NULL)) {
+ /*
+ * FIXME: we could use locking_type 5 here if that didn't
+ * cause CLUSTERED to be set, which conflicts with using lvmlockd.
+ */
+ locking_type = 1;
+ cmd->lockd_gl_disable = 1;
+ cmd->lockd_vg_disable = 1;
+ cmd->lockd_lv_disable = 1;
+ } else {
+ locking_type = 5;
+ }
+
if (lvmetad_used()) {
lvmetad_set_active(cmd, 0);
log_verbose("Disabling use of lvmetad because read-only is set.");
@@ -1444,13 +1623,32 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
else
locking_type = -1;
- if (!init_locking(locking_type, cmd, arg_count(cmd, sysinit_ARG))) {
+ if (!init_locking(locking_type, cmd, _cmd_no_meta_proc(cmd) || arg_count(cmd, sysinit_ARG))) {
ret = ECMD_FAILED;
goto_out;
}
+ if (!_cmd_no_meta_proc(cmd) && !_init_lvmlockd(cmd)) {
+ ret = ECMD_FAILED;
+ goto_out;
+ }
+
+ /*
+ * Other hosts might have changed foreign VGs so enforce a rescan
+ * before processing any command using them.
+ */
+ if (cmd->include_foreign_vgs && lvmetad_used() &&
+ !lvmetad_pvscan_foreign_vgs(cmd, NULL)) {
+ log_error("Failed to scan devices.");
+ return ECMD_FAILED;
+ }
+
+ /*
+ * FIXME Break up into multiple functions.
+ */
ret = cmd->command->fn(cmd, argc, argv);
+ lvmlockd_disconnect();
fin_locking();
out:
@@ -1494,6 +1692,8 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
int lvm_return_code(int ret)
{
+ unlink_log_file(ret);
+
return (ret == ECMD_PROCESSED ? 0 : ret);
}
@@ -1644,6 +1844,13 @@ static int _close_stray_fds(const char *command)
struct dirent *dirent;
DIR *d;
+#ifdef HAVE_VALGRIND
+ if (RUNNING_ON_VALGRIND) {
+ log_debug("Skipping close of descriptors within valgrind execution.");
+ return 1;
+ }
+#endif
+
if (getenv("LVM_SUPPRESS_FD_WARNINGS"))
suppress_warnings = 1;
@@ -1679,7 +1886,7 @@ static int _close_stray_fds(const char *command)
return 1;
}
-struct cmd_context *init_lvm(void)
+struct cmd_context *init_lvm(unsigned set_connections, unsigned set_filters)
{
struct cmd_context *cmd;
@@ -1693,7 +1900,8 @@ struct cmd_context *init_lvm(void)
*/
dm_set_name_mangling_mode(DM_STRING_MANGLING_NONE);
- if (!(cmd = create_toolcontext(0, NULL, 1, 0))) {
+ if (!(cmd = create_toolcontext(0, NULL, 1, 0,
+ set_connections, set_filters))) {
udev_fin_library_context();
return_NULL;
}
@@ -1847,12 +2055,12 @@ int lvm2_main(int argc, char **argv)
return -1;
if (is_static() && strcmp(base, "lvm.static") &&
- path_exists(LVM_SHARED_PATH) &&
+ path_exists(LVM_PATH) &&
!getenv("LVM_DID_EXEC")) {
if (setenv("LVM_DID_EXEC", base, 1))
log_sys_error("setenv", "LVM_DID_EXEC");
- if (execvp(LVM_SHARED_PATH, argv) == -1)
- log_sys_error("execvp", "LVM_SHARED_PATH");
+ if (execvp(LVM_PATH, argv) == -1)
+ log_sys_error("execvp", LVM_PATH);
if (unsetenv("LVM_DID_EXEC"))
log_sys_error("unsetenv", "LVM_DID_EXEC");
}
@@ -1861,7 +2069,7 @@ int lvm2_main(int argc, char **argv)
if (!alias && argc > 1 && !strcmp(argv[1], "version"))
return lvm_return_code(version(NULL, argc, argv));
- if (!(cmd = init_lvm()))
+ if (!(cmd = init_lvm(0, 0)))
return -1;
cmd->argv = argv;
diff --git a/tools/lvpoll.c b/tools/lvpoll.c
new file mode 100644
index 000000000..c040b6050
--- /dev/null
+++ b/tools/lvpoll.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "tools.h"
+
+#include "polldaemon.h"
+#include "pvmove_poll.h"
+#include "lvconvert_poll.h"
+#include "polling_ops.h"
+
+static struct poll_functions _pvmove_fns = {
+ .poll_progress = poll_mirror_progress,
+ .update_metadata = pvmove_update_metadata,
+ .finish_copy = pvmove_finish
+};
+
+static struct poll_functions _convert_fns = {
+ .poll_progress = poll_mirror_progress,
+ .finish_copy = lvconvert_mirror_finish
+};
+
+static struct poll_functions _merge_fns = {
+ .poll_progress = poll_merge_progress,
+ .finish_copy = lvconvert_merge_finish
+};
+
+static struct poll_functions _thin_merge_fns = {
+ .poll_progress = poll_thin_merge_progress,
+ .finish_copy = lvconvert_merge_finish
+};
+
+static int _set_daemon_parms(struct cmd_context *cmd, struct daemon_parms *parms)
+{
+ const char *poll_oper = arg_str_value(cmd, polloperation_ARG, "");
+
+ parms->interval = arg_uint_value(cmd, interval_ARG, 0);
+ parms->aborting = arg_is_set(cmd, abort_ARG);
+ parms->progress_display = 1;
+ parms->wait_before_testing = (arg_sign_value(cmd, interval_ARG, SIGN_NONE) == SIGN_PLUS);
+
+ if (!strcmp(poll_oper, PVMOVE_POLL)) {
+ parms->progress_title = "Moved";
+ parms->lv_type = PVMOVE;
+ parms->poll_fns = &_pvmove_fns;
+ } else if (!strcmp(poll_oper, CONVERT_POLL)) {
+ parms->progress_title = "Converted";
+ parms->poll_fns = &_convert_fns;
+ } else if (!strcmp(poll_oper, MERGE_POLL)) {
+ parms->progress_title = "Merged";
+ parms->poll_fns = &_merge_fns;
+ } else if (!strcmp(poll_oper, MERGE_THIN_POLL)) {
+ parms->progress_title = "Merged";
+ parms->poll_fns = &_thin_merge_fns;
+ } else {
+ log_error("Unknown polling operation %s", poll_oper);
+ return 0;
+ }
+
+ cmd->handles_missing_pvs = arg_is_set(cmd, handlemissingpvs_ARG);
+
+ return 1;
+}
+
+static int poll_lv(struct cmd_context *cmd, const char *lv_name)
+{
+ struct daemon_parms parms = { 0 };
+ struct poll_operation_id id = {
+ .display_name = skip_dev_dir(cmd, lv_name, NULL)
+ };
+
+ if (!id.display_name)
+ return_EINVALID_CMD_LINE;
+
+ id.lv_name = id.display_name;
+
+ if (!validate_lvname_param(cmd, &id.vg_name, &id.lv_name))
+ return_EINVALID_CMD_LINE;
+
+ if (!_set_daemon_parms(cmd, &parms))
+ return_EINVALID_CMD_LINE;
+
+ return wait_for_single_lv(cmd, &id, &parms) ? ECMD_PROCESSED : ECMD_FAILED;
+}
+
+int lvpoll(struct cmd_context *cmd, int argc, char **argv)
+{
+ if (!arg_is_set(cmd, polloperation_ARG)) {
+ log_error("--polloperation parameter is mandatory");
+ return EINVALID_CMD_LINE;
+ }
+
+ if (arg_sign_value(cmd, interval_ARG, SIGN_NONE) == SIGN_MINUS) {
+ log_error("Argument to --interval cannot be negative");
+ return EINVALID_CMD_LINE;
+ }
+
+ if (!argc) {
+ log_error("Provide LV name");
+ return EINVALID_CMD_LINE;
+ }
+
+ return poll_lv(cmd, argv[0]);
+}
diff --git a/tools/lvremove.c b/tools/lvremove.c
index 1005218c0..ce0b65b8b 100644
--- a/tools/lvremove.c
+++ b/tools/lvremove.c
@@ -17,8 +17,9 @@
int lvremove(struct cmd_context *cmd, int argc, char **argv)
{
- if (!argc) {
- log_error("Please enter one or more logical volume paths");
+ if (!argc && !arg_is_set(cmd, select_ARG)) {
+ log_error("Please enter one or more logical volume paths "
+ "or use --select for selection.");
return EINVALID_CMD_LINE;
}
diff --git a/tools/lvrename.c b/tools/lvrename.c
index eeff76da2..7b0791a9d 100644
--- a/tools/lvrename.c
+++ b/tools/lvrename.c
@@ -27,6 +27,7 @@ int lvrename(struct cmd_context *cmd, int argc, char **argv)
char *st;
struct volume_group *vg;
struct lv_list *lvl;
+ uint32_t lockd_state = 0;
int r = ECMD_FAILED;
if (argc == 3) {
@@ -98,8 +99,11 @@ int lvrename(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
+ if (!lockd_vg(cmd, vg_name, "ex", 0, &lockd_state))
+ return_ECMD_FAILED;
+
log_verbose("Checking for existing volume group \"%s\"", vg_name);
- vg = vg_read_for_update(cmd, vg_name, NULL, 0);
+ vg = vg_read_for_update(cmd, vg_name, NULL, 0, lockd_state);
if (vg_read_error(vg)) {
release_vg(vg);
return_ECMD_FAILED;
diff --git a/tools/lvresize.c b/tools/lvresize.c
index 08248bbec..3188f27cb 100644
--- a/tools/lvresize.c
+++ b/tools/lvresize.c
@@ -169,13 +169,17 @@ int lvresize(struct cmd_context *cmd, int argc, char **argv)
struct volume_group *vg;
struct dm_list *pvh = NULL;
struct logical_volume *lv;
+ uint32_t lockd_state = 0;
int r = ECMD_FAILED;
if (!_lvresize_params(cmd, argc, argv, &lp))
return EINVALID_CMD_LINE;
+ if (!lockd_vg(cmd, lp.vg_name, "ex", 0, &lockd_state))
+ return_ECMD_FAILED;
+
log_verbose("Finding volume group %s", lp.vg_name);
- vg = vg_read_for_update(cmd, lp.vg_name, NULL, 0);
+ vg = vg_read_for_update(cmd, lp.vg_name, NULL, 0, lockd_state);
if (vg_read_error(vg)) {
release_vg(vg);
return_ECMD_FAILED;
diff --git a/tools/lvscan.c b/tools/lvscan.c
index 2d7be074b..e98c73d00 100644
--- a/tools/lvscan.c
+++ b/tools/lvscan.c
@@ -40,7 +40,7 @@ static int _lvscan_single_lvmetad(struct cmd_context *cmd, struct logical_volume
pvid_s);
continue;
}
- if (!lvmetad_pvscan_single(cmd, pvl->pv->dev, NULL))
+ if (!lvmetad_pvscan_single(cmd, pvl->pv->dev, NULL, 0))
return ECMD_FAILED;
}
@@ -48,7 +48,7 @@ static int _lvscan_single_lvmetad(struct cmd_context *cmd, struct logical_volume
}
static int lvscan_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
struct lvinfo info;
int inkernel, snap_active = 1;
diff --git a/tools/polldaemon.c b/tools/polldaemon.c
index 8d5c6babb..cee1983f2 100644
--- a/tools/polldaemon.c
+++ b/tools/polldaemon.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -14,8 +14,14 @@
*/
#include "tools.h"
+
#include "polldaemon.h"
#include "lvm2cmdline.h"
+#include "lvmpolld-client.h"
+
+#include <time.h>
+
+#define WAIT_AT_LEAST_NANOSECS 100000
progress_t poll_mirror_progress(struct cmd_context *cmd,
struct logical_volume *lv, const char *name,
@@ -105,51 +111,78 @@ static int _check_lv_status(struct cmd_context *cmd,
return 1;
}
+static void _nanosleep(unsigned secs, unsigned allow_zero_time)
+{
+ struct timespec wtime = {
+ .tv_sec = secs,
+ };
+
+ if (!secs && !allow_zero_time)
+ wtime.tv_nsec = WAIT_AT_LEAST_NANOSECS;
+
+ while (!nanosleep(&wtime, &wtime) && errno == EINTR) {}
+}
+
static void _sleep_and_rescan_devices(struct daemon_parms *parms)
{
- /* FIXME Use alarm for regular intervals instead */
if (parms->interval && !parms->aborting) {
- sleep(parms->interval);
+ dev_close_all();
+ _nanosleep(parms->interval, 1);
/* Devices might have changed while we slept */
init_full_scan_done(0);
}
}
-static int _wait_for_single_lv(struct cmd_context *cmd, const char *name, const char *uuid,
- struct daemon_parms *parms)
+int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id,
+ struct daemon_parms *parms)
{
- struct volume_group *vg;
+ struct volume_group *vg = NULL;
struct logical_volume *lv;
int finished = 0;
+ uint32_t lockd_state = 0;
+ int ret;
/* Poll for completion */
while (!finished) {
if (parms->wait_before_testing)
_sleep_and_rescan_devices(parms);
+ /*
+ * An ex VG lock is needed because the check can call finish_copy
+ * which writes the VG.
+ */
+ if (!lockd_vg(cmd, id->vg_name, "ex", 0, &lockd_state)) {
+ log_error("ABORTING: Can't lock VG for %s.", id->display_name);
+ return 0;
+ }
+
/* Locks the (possibly renamed) VG again */
- vg = parms->poll_fns->get_copy_vg(cmd, name, uuid);
+ vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state);
if (vg_read_error(vg)) {
- release_vg(vg);
- log_error("ABORTING: Can't reread VG for %s", name);
/* What more could we do here? */
- return 0;
+ log_error("ABORTING: Can't reread VG for %s.", id->display_name);
+ release_vg(vg);
+ vg = NULL;
+ ret = 0;
+ goto out;
}
- lv = parms->poll_fns->get_copy_lv(cmd, vg, name, uuid, parms->lv_type);
+ lv = find_lv(vg, id->lv_name);
- if (!lv && parms->lv_type == PVMOVE) {
- log_print_unless_silent("%s: no pvmove in progress - already finished or aborted.",
- name);
- unlock_and_release_vg(cmd, vg, vg->name);
- return 1;
- }
+ if (lv && id->uuid && strcmp(id->uuid, (char *)&lv->lvid))
+ lv = NULL;
+ if (lv && parms->lv_type && !(lv->status & parms->lv_type))
+ lv = NULL;
if (!lv) {
- log_error("ABORTING: Can't find LV in %s for %s",
- vg->name, name);
- unlock_and_release_vg(cmd, vg, vg->name);
- return 0;
+ if (parms->lv_type == PVMOVE)
+ log_print_unless_silent("%s: No pvmove in progress - already finished or aborted.",
+ id->display_name);
+ else
+ log_print_unless_silent("Can't find LV in %s for %s.",
+ vg->name, id->display_name);
+ ret = 1;
+ goto out;
}
/*
@@ -157,18 +190,21 @@ static int _wait_for_single_lv(struct cmd_context *cmd, const char *name, const
* queried for its status. We must exit in this case.
*/
if (!lv_is_active_locally(lv)) {
- log_print_unless_silent("%s: Interrupted: No longer active.", name);
- unlock_and_release_vg(cmd, vg, vg->name);
- return 1;
+ log_print_unless_silent("%s: Interrupted: No longer active.", id->display_name);
+ ret = 1;
+ goto out;
}
- if (!_check_lv_status(cmd, vg, lv, name, parms, &finished)) {
- unlock_and_release_vg(cmd, vg, vg->name);
- return_0;
+ if (!_check_lv_status(cmd, vg, lv, id->display_name, parms, &finished)) {
+ ret = 0;
+ goto_out;
}
unlock_and_release_vg(cmd, vg, vg->name);
+ if (!lockd_vg(cmd, id->vg_name, "un", 0, &lockd_state))
+ stack;
+
/*
* FIXME Sleeping after testing, while preferred, also works around
* unreliable "finished" state checking in _percent_run. If the
@@ -181,113 +217,378 @@ static int _wait_for_single_lv(struct cmd_context *cmd, const char *name, const
* polldaemon(s) are polling. These other polldaemon(s) can then
* continue polling an LV that doesn't have a "status".
*/
- if (!parms->wait_before_testing)
+ if (!parms->wait_before_testing && !finished)
_sleep_and_rescan_devices(parms);
}
return 1;
+
+out:
+ if (vg)
+ unlock_and_release_vg(cmd, vg, vg->name);
+ if (!lockd_vg(cmd, id->vg_name, "un", 0, &lockd_state))
+ stack;
+
+ return ret;
+}
+
+struct poll_id_list {
+ struct dm_list list;
+ struct poll_operation_id *id;
+};
+
+static struct poll_operation_id *copy_poll_operation_id(struct dm_pool *mem,
+ const struct poll_operation_id *id)
+{
+ struct poll_operation_id *copy;
+
+ if (!id)
+ return_NULL;
+
+ copy = (struct poll_operation_id *) dm_pool_alloc(mem, sizeof(struct poll_operation_id));
+ if (!copy) {
+ log_error("Poll operation ID allocation failed.");
+ return NULL;
+ }
+
+ copy->display_name = id->display_name ? dm_pool_strdup(mem, id->display_name) : NULL;
+ copy->lv_name = id->lv_name ? dm_pool_strdup(mem, id->lv_name) : NULL;
+ copy->vg_name = id->vg_name ? dm_pool_strdup(mem, id->vg_name) : NULL;
+ copy->uuid = id->uuid ? dm_pool_strdup(mem, id->uuid) : NULL;
+
+ if (!copy->display_name || !copy->lv_name || !copy->vg_name || !copy->uuid) {
+ log_error("Failed to copy one or more poll_operation_id members.");
+ return NULL;
+ }
+
+ return copy;
+}
+
+static struct poll_id_list* poll_id_list_create(struct dm_pool *mem,
+ const struct poll_operation_id *id)
+{
+ struct poll_id_list *idl = (struct poll_id_list *) dm_pool_alloc(mem, sizeof(struct poll_id_list));
+
+ if (!idl) {
+ log_error("Poll ID list allocation failed.");
+ return NULL;
+ }
+
+ if (!(idl->id = copy_poll_operation_id(mem, id))) {
+ dm_pool_free(mem, idl);
+ return NULL;
+ }
+
+ return idl;
}
static int _poll_vg(struct cmd_context *cmd, const char *vgname,
- struct volume_group *vg, void *handle)
+ struct volume_group *vg, struct processing_handle *handle)
{
- struct daemon_parms *parms = (struct daemon_parms *) handle;
+ struct daemon_parms *parms;
struct lv_list *lvl;
+ struct dm_list idls;
+ struct poll_id_list *idl;
+ struct poll_operation_id id;
struct logical_volume *lv;
- const char *name;
int finished;
- if (!parms) {
+ if (!handle || !(parms = (struct daemon_parms *) handle->custom_handle)) {
log_error(INTERNAL_ERROR "Handle is undefined.");
return ECMD_FAILED;
}
+ dm_list_init(&idls);
+
+ /*
+ * first iterate all LVs in a VG and collect LVs suitable
+ * for polling (or an abort) which takes place below
+ */
dm_list_iterate_items(lvl, &vg->lvs) {
lv = lvl->lv;
if (!(lv->status & parms->lv_type))
continue;
- name = parms->poll_fns->get_copy_name_from_lv(lv);
- if (!name && !parms->aborting)
+ id.display_name = parms->poll_fns->get_copy_name_from_lv(lv);
+ if (!id.display_name && !parms->aborting)
continue;
+ if (!id.display_name) {
+ log_error("Device name for LV %s not found in metadata. "
+ "(unfinished pvmove mirror removal?)", display_lvname(lv));
+ goto err;
+ }
+
/* FIXME Need to do the activation from _set_up_pvmove here
* if it's not running and we're not aborting. */
if (!lv_is_active(lv)) {
- log_print_unless_silent("%s: Skipping inactive LV. Try lvchange or vgchange.", name);
+ log_print_unless_silent("%s: Skipping inactive LV. Try lvchange or vgchange.", id.display_name);
continue;
}
- if (_check_lv_status(cmd, vg, lv, name, parms, &finished) &&
- !finished)
+ id.lv_name = lv->name;
+ id.vg_name = vg->name;
+ id.uuid = lv->lvid.s;
+
+ idl = poll_id_list_create(cmd->mem, &id);
+ if (!idl) {
+ log_error("Failed to create poll_id_list.");
+ goto err;
+ }
+
+ dm_list_add(&idls, &idl->list);
+ }
+
+ /* perform the poll operation on LVs collected in previous cycle */
+ dm_list_iterate_items(idl, &idls) {
+ if (!(lv = find_lv(vg, idl->id->lv_name)))
+ continue;
+ if (idl->id->uuid && strcmp(idl->id->uuid, (char *)&lv->lvid))
+ continue;
+ if (parms->lv_type && !(lv->status & parms->lv_type))
+ continue;
+ if (_check_lv_status(cmd, vg, lv, idl->id->display_name, parms, &finished) && !finished)
parms->outstanding_count++;
}
- return ECMD_PROCESSED;
+err:
+ if (!dm_list_empty(&idls))
+ dm_pool_free(cmd->mem, dm_list_item(dm_list_first(&idls), struct poll_id_list));
+ return ECMD_PROCESSED;
}
static void _poll_for_all_vgs(struct cmd_context *cmd,
- struct daemon_parms *parms)
+ struct processing_handle *handle)
{
+ struct daemon_parms *parms = (struct daemon_parms *) handle->custom_handle;
+
while (1) {
parms->outstanding_count = 0;
- process_each_vg(cmd, 0, NULL, READ_FOR_UPDATE, parms, _poll_vg);
+ process_each_vg(cmd, 0, NULL, READ_FOR_UPDATE, handle, _poll_vg);
if (!parms->outstanding_count)
break;
- sleep(parms->interval);
+ if (parms->interval)
+ dev_close_all();
+ _nanosleep(parms->interval, 1);
}
}
+#ifdef LVMPOLLD_SUPPORT
+typedef struct {
+ struct daemon_parms *parms;
+ struct dm_list idls;
+} lvmpolld_parms_t;
+
+static int report_progress(struct cmd_context *cmd, struct poll_operation_id *id,
+ struct daemon_parms *parms)
+{
+ struct volume_group *vg;
+ struct logical_volume *lv;
+ uint32_t lockd_state = 0;
+ int ret;
+
+ /*
+ * It's reasonable to expect a lockd_vg("sh") here, but it should
+ * not actually be needed, because we only report the progress on
+ * the same host where the pvmove/lvconvert is happening. This means
+ * that the local pvmove/lvconvert/lvpoll commands are updating the
+ * local lvmetad with the latest info they have, and we just need to
+ * read the latest info that they have put into lvmetad about their
+ * progress. No VG lock is needed to protect anything here (we're
+ * just reading the VG), and no VG lock is needed to force a VG read
+ * from disk to get changes from other hosts, because the only change
+ * to the VG we're interested in is the change done locally.
+ */
+
+ vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state);
+ if (vg_read_error(vg)) {
+ release_vg(vg);
+ log_error("Can't reread VG for %s", id->display_name);
+ ret = 0;
+ goto out_ret;
+ }
+
+ lv = find_lv(vg, id->lv_name);
+
+ if (lv && id->uuid && strcmp(id->uuid, (char *)&lv->lvid))
+ lv = NULL;
+ if (lv && parms->lv_type && !(lv->status & parms->lv_type))
+ lv = NULL;
+
+ if (!lv) {
+ if (parms->lv_type == PVMOVE)
+ log_verbose("%s: No pvmove in progress - already finished or aborted.",
+ id->display_name);
+ else
+ log_verbose("Can't find LV in %s for %s. Already finished or removed.",
+ vg->name, id->display_name);
+ ret = 1;
+ goto out;
+ }
+
+ if (!lv_is_active_locally(lv)) {
+ log_verbose("%s: Interrupted: No longer active.", id->display_name);
+ ret = 1;
+ goto out;
+ }
+
+ if (parms->poll_fns->poll_progress(cmd, lv, id->display_name, parms) == PROGRESS_CHECK_FAILED) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = 1;
+
+out:
+ unlock_and_release_vg(cmd, vg, vg->name);
+out_ret:
+ return ret;
+}
+
+static int _lvmpolld_init_poll_vg(struct cmd_context *cmd, const char *vgname,
+ struct volume_group *vg, struct processing_handle *handle)
+{
+ int r;
+ struct lv_list *lvl;
+ struct logical_volume *lv;
+ struct poll_id_list *idl;
+ struct poll_operation_id id;
+ lvmpolld_parms_t *lpdp = (lvmpolld_parms_t *) handle->custom_handle;
+
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ lv = lvl->lv;
+ if (!(lv->status & lpdp->parms->lv_type))
+ continue;
+
+ id.display_name = lpdp->parms->poll_fns->get_copy_name_from_lv(lv);
+ if (!id.display_name && !lpdp->parms->aborting)
+ continue;
+
+ id.vg_name = lv->vg->name;
+ id.lv_name = lv->name;
+
+ if (!*lv->lvid.s) {
+ log_print_unless_silent("Missing LV uuid within: %s/%s", id.vg_name, id.lv_name);
+ continue;
+ }
+
+ id.uuid = lv->lvid.s;
+
+ r = lvmpolld_poll_init(cmd, &id, lpdp->parms);
+
+ if (r && !lpdp->parms->background) {
+ if (!(idl = poll_id_list_create(cmd->mem, &id)))
+ return ECMD_FAILED;
+
+ dm_list_add(&lpdp->idls, &idl->list);
+ }
+ }
+
+ return ECMD_PROCESSED;
+}
+
+static void _lvmpolld_poll_for_all_vgs(struct cmd_context *cmd,
+ struct daemon_parms *parms,
+ struct processing_handle *handle)
+{
+ int r;
+ struct dm_list *first;
+ struct poll_id_list *idl, *tlv;
+ unsigned finished;
+ lvmpolld_parms_t lpdp = {
+ .parms = parms
+ };
+
+ dm_list_init(&lpdp.idls);
+
+ handle->custom_handle = &lpdp;
+
+ process_each_vg(cmd, 0, NULL, 0, handle, _lvmpolld_init_poll_vg);
+
+ first = dm_list_first(&lpdp.idls);
+
+ while (!dm_list_empty(&lpdp.idls)) {
+ dm_list_iterate_items_safe(idl, tlv, &lpdp.idls) {
+ r = lvmpolld_request_info(idl->id, lpdp.parms,
+ &finished);
+ if (!r || finished)
+ dm_list_del(&idl->list);
+ else if (!parms->aborting)
+ report_progress(cmd, idl->id, lpdp.parms);
+ }
+
+ if (lpdp.parms->interval)
+ dev_close_all();
+
+ _nanosleep(lpdp.parms->interval, 0);
+ }
+
+ if (first)
+ dm_pool_free(cmd->mem, dm_list_item(first, struct poll_id_list));
+}
+
+static int _lvmpoll_daemon(struct cmd_context *cmd, struct poll_operation_id *id,
+ struct daemon_parms *parms)
+{
+ int r;
+ struct processing_handle *handle = NULL;
+ unsigned finished = 0;
+
+ if (parms->aborting)
+ parms->interval = 0;
+
+ if (id) {
+ r = lvmpolld_poll_init(cmd, id, parms);
+ if (r && !parms->background) {
+ while (1) {
+ if (!(r = lvmpolld_request_info(id, parms, &finished)) ||
+ finished ||
+ (!parms->aborting && !(r = report_progress(cmd, id, parms))))
+ break;
+
+ if (parms->interval)
+ dev_close_all();
+
+ _nanosleep(parms->interval, 0);
+ }
+ }
+
+ return r ? ECMD_PROCESSED : ECMD_FAILED;
+ } else {
+ /* process all in-flight operations */
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ return ECMD_FAILED;
+ } else {
+ _lvmpolld_poll_for_all_vgs(cmd, parms, handle);
+ destroy_processing_handle(cmd, handle);
+ return ECMD_PROCESSED;
+ }
+ }
+}
+#else
+# define _lvmpoll_daemon(cmd, id, parms) (ECMD_FAILED)
+#endif /* LVMPOLLD_SUPPORT */
+
/*
* Only allow *one* return from poll_daemon() (the parent).
* If there is a child it must exit (ignoring the memory leak messages).
* - 'background' is advisory so a child polldaemon may not be used even
* if it was requested.
*/
-int poll_daemon(struct cmd_context *cmd, const char *name, const char *uuid,
- unsigned background,
- uint64_t lv_type, struct poll_functions *poll_fns,
- const char *progress_title)
+static int _poll_daemon(struct cmd_context *cmd, struct poll_operation_id *id,
+ struct daemon_parms *parms)
{
- struct daemon_parms parms;
+ struct processing_handle *handle = NULL;
int daemon_mode = 0;
int ret = ECMD_PROCESSED;
- sign_t interval_sign;
- parms.aborting = arg_is_set(cmd, abort_ARG);
- parms.background = background;
- interval_sign = arg_sign_value(cmd, interval_ARG, SIGN_NONE);
- if (interval_sign == SIGN_MINUS) {
- log_error("Argument to --interval cannot be negative");
- return EINVALID_CMD_LINE;
- }
- parms.interval = arg_uint_value(cmd, interval_ARG,
- find_config_tree_int(cmd, activation_polling_interval_CFG, NULL));
- parms.wait_before_testing = (interval_sign == SIGN_PLUS);
- parms.progress_display = 1;
- parms.progress_title = progress_title;
- parms.lv_type = lv_type;
- parms.poll_fns = poll_fns;
-
- if (parms.interval && !parms.aborting)
- log_verbose("Checking progress %s waiting every %u seconds",
- (parms.wait_before_testing ? "after" : "before"),
- parms.interval);
-
- if (!parms.interval) {
- parms.progress_display = 0;
-
- /* FIXME Disabled multiple-copy wait_event */
- if (!name)
- parms.interval = find_config_tree_int(cmd, activation_polling_interval_CFG, NULL);
- }
-
- if (parms.background) {
+ if (parms->background) {
daemon_mode = become_daemon(cmd, 0);
if (daemon_mode == 0)
return ECMD_PROCESSED; /* Parent */
else if (daemon_mode == 1)
- parms.progress_display = 0; /* Child */
+ parms->progress_display = 0; /* Child */
/* FIXME Use wait_event (i.e. interval = 0) and */
/* fork one daemon per copy? */
}
@@ -295,15 +596,25 @@ int poll_daemon(struct cmd_context *cmd, const char *name, const char *uuid,
/*
* Process one specific task or all incomplete tasks?
*/
- if (name) {
- if (!_wait_for_single_lv(cmd, name, uuid, &parms)) {
+ if (id) {
+ if (!wait_for_single_lv(cmd, id, parms)) {
stack;
ret = ECMD_FAILED;
}
- } else
- _poll_for_all_vgs(cmd, &parms);
+ } else {
+ if (!parms->interval)
+ parms->interval = find_config_tree_int(cmd, activation_polling_interval_CFG, NULL);
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ ret = ECMD_FAILED;
+ } else {
+ handle->custom_handle = parms;
+ _poll_for_all_vgs(cmd, handle);
+ }
+ }
- if (parms.background && daemon_mode == 1) {
+ if (parms->background && daemon_mode == 1) {
+ destroy_processing_handle(cmd, handle);
/*
* child was successfully forked:
* background polldaemon must not return to the caller
@@ -314,5 +625,54 @@ int poll_daemon(struct cmd_context *cmd, const char *name, const char *uuid,
_exit(lvm_return_code(ret));
}
+ destroy_processing_handle(cmd, handle);
return ret;
}
+
+static int _daemon_parms_init(struct cmd_context *cmd, struct daemon_parms *parms,
+ unsigned background, struct poll_functions *poll_fns,
+ const char *progress_title, uint64_t lv_type)
+{
+ sign_t interval_sign;
+
+ parms->aborting = arg_is_set(cmd, abort_ARG);
+ parms->background = background;
+ interval_sign = arg_sign_value(cmd, interval_ARG, SIGN_NONE);
+ if (interval_sign == SIGN_MINUS) {
+ log_error("Argument to --interval cannot be negative.");
+ return 0;
+ }
+ parms->interval = arg_uint_value(cmd, interval_ARG,
+ find_config_tree_int(cmd, activation_polling_interval_CFG, NULL));
+ parms->wait_before_testing = (interval_sign == SIGN_PLUS);
+ parms->progress_title = progress_title;
+ parms->lv_type = lv_type;
+ parms->poll_fns = poll_fns;
+
+ if (parms->interval && !parms->aborting)
+ log_verbose("Checking progress %s waiting every %u seconds.",
+ (parms->wait_before_testing ? "after" : "before"),
+ parms->interval);
+
+ parms->progress_display = parms->interval ? 1 : 0;
+
+ return 1;
+}
+
+int poll_daemon(struct cmd_context *cmd, unsigned background,
+ uint64_t lv_type, struct poll_functions *poll_fns,
+ const char *progress_title, struct poll_operation_id *id)
+{
+ struct daemon_parms parms;
+
+ if (!_daemon_parms_init(cmd, &parms, background, poll_fns, progress_title, lv_type))
+ return_EINVALID_CMD_LINE;
+
+ if (lvmpolld_use())
+ return _lvmpoll_daemon(cmd, id, &parms);
+ else {
+ /* classical polling allows only PMVOVE or 0 values */
+ parms.lv_type &= PVMOVE;
+ return _poll_daemon(cmd, id, &parms);
+ }
+}
diff --git a/tools/pvchange.c b/tools/pvchange.c
index 76db91727..91e93c1e7 100644
--- a/tools/pvchange.c
+++ b/tools/pvchange.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -15,37 +15,43 @@
#include "tools.h"
+struct pvchange_params {
+ unsigned done;
+ unsigned total;
+};
+
static int _pvchange_single(struct cmd_context *cmd, struct volume_group *vg,
- struct physical_volume *pv,
- void *handle __attribute__((unused)))
+ struct physical_volume *pv, struct processing_handle *handle)
{
+ struct pvchange_params *params = (struct pvchange_params *) handle->custom_handle;
const char *pv_name = pv_dev_name(pv);
char uuid[64] __attribute__((aligned(8)));
+ unsigned done = 0;
int allocatable = arg_int_value(cmd, allocatable_ARG, 0);
int mda_ignore = arg_int_value(cmd, metadataignore_ARG, 0);
int tagargs = arg_count(cmd, addtag_ARG) + arg_count(cmd, deltag_ARG);
+ params->total++;
+
/* If in a VG, must change using volume group. */
if (!is_orphan(pv)) {
if (tagargs && !(vg->fid->fmt->features & FMT_TAGS)) {
log_error("Volume group containing %s does not "
"support tags", pv_name);
- return 0;
+ goto bad;
}
if (arg_count(cmd, uuid_ARG) && lvs_in_vg_activated(vg)) {
log_error("Volume group containing %s has active "
"logical volumes", pv_name);
- return 0;
+ goto bad;
}
if (!archive(vg))
- return 0;
- } else {
- if (tagargs) {
- log_error("Can't change tag on Physical Volume %s not "
- "in volume group", pv_name);
- return 0;
- }
+ goto_bad;
+ } else if (tagargs) {
+ log_error("Can't change tag on Physical Volume %s not "
+ "in volume group", pv_name);
+ goto bad;
}
if (arg_count(cmd, allocatable_ARG)) {
@@ -53,41 +59,46 @@ static int _pvchange_single(struct cmd_context *cmd, struct volume_group *vg,
!(pv->fmt->features & FMT_ORPHAN_ALLOCATABLE)) {
log_error("Allocatability not supported by orphan "
"%s format PV %s", pv->fmt->name, pv_name);
- return 0;
+ goto bad;
}
/* change allocatability for a PV */
if (allocatable && (pv_status(pv) & ALLOCATABLE_PV)) {
log_warn("Physical volume \"%s\" is already "
"allocatable.", pv_name);
- return 1;
- }
-
- if (!allocatable && !(pv_status(pv) & ALLOCATABLE_PV)) {
+ } else if (!allocatable && !(pv_status(pv) & ALLOCATABLE_PV)) {
log_warn("Physical volume \"%s\" is already "
"unallocatable.", pv_name);
- return 1;
- }
-
- if (allocatable) {
+ } else if (allocatable) {
log_verbose("Setting physical volume \"%s\" "
"allocatable", pv_name);
pv->status |= ALLOCATABLE_PV;
+ done = 1;
} else {
log_verbose("Setting physical volume \"%s\" NOT "
"allocatable", pv_name);
pv->status &= ~ALLOCATABLE_PV;
+ done = 1;
}
}
+ /*
+ * Needed to change a property on an orphan PV.
+ * i.e. the global lock is only needed for orphans.
+ * Convert sh to ex.
+ */
+ if (is_orphan(pv) && !lockd_gl(cmd, "ex", 0))
+ return_ECMD_FAILED;
+
if (tagargs) {
/* tag or deltag */
if (arg_count(cmd, addtag_ARG) && !change_tag(cmd, NULL, NULL, pv, addtag_ARG))
- return_0;
+ goto_bad;
if (arg_count(cmd, deltag_ARG) && !change_tag(cmd, NULL, NULL, pv, deltag_ARG))
- return_0;
-
+ goto_bad;
+
+ done = 1;
}
if (arg_count(cmd, metadataignore_ARG)) {
@@ -95,12 +106,12 @@ static int _pvchange_single(struct cmd_context *cmd, struct volume_group *vg,
(arg_count(cmd, force_ARG) == PROMPT) &&
yes_no_prompt("Override preferred number of copies "
"of VG %s metadata? [y/n]: ",
- pv_vg_name(pv)) == 'n') {
- log_error("Physical volume %s not changed", pv_name);
- return 0;
- }
+ pv_vg_name(pv)) == 'n')
+ goto_bad;
if (!pv_change_metadataignore(pv, mda_ignore))
- return_0;
+ goto_bad;
+
+ done = 1;
}
if (arg_count(cmd, uuid_ARG)) {
@@ -109,16 +120,23 @@ static int _pvchange_single(struct cmd_context *cmd, struct volume_group *vg,
if (!id_create(&pv->id)) {
log_error("Failed to generate new random UUID for %s.",
pv_name);
- return 0;
+ goto bad;
}
if (!id_write_format(&pv->id, uuid, sizeof(uuid)))
- return 0;
+ goto_bad;
log_verbose("Changing uuid of %s to %s.", pv_name, uuid);
if (!is_orphan(pv) && (!pv_write(cmd, pv, 1))) {
log_error("pv_write with new uuid failed "
"for %s.", pv_name);
- return 0;
+ goto bad;
}
+
+ done = 1;
+ }
+
+ if (!done) {
+ log_print_unless_silent("Physical volume %s not changed", pv_name);
+ return ECMD_PROCESSED;
}
log_verbose("Updating physical volume \"%s\"", pv_name);
@@ -126,85 +144,62 @@ static int _pvchange_single(struct cmd_context *cmd, struct volume_group *vg,
if (!vg_write(vg) || !vg_commit(vg)) {
log_error("Failed to store physical volume \"%s\" in "
"volume group \"%s\"", pv_name, vg->name);
- return 0;
+ goto bad;
}
backup(vg);
} else if (!(pv_write(cmd, pv, 0))) {
log_error("Failed to store physical volume \"%s\"",
pv_name);
- return 0;
+ goto bad;
}
log_print_unless_silent("Physical volume \"%s\" changed", pv_name);
- return 1;
+ params->done++;
+ return ECMD_PROCESSED;
+
+bad:
+ log_error("Physical volume %s not changed", pv_name);
+
+ return ECMD_FAILED;
}
int pvchange(struct cmd_context *cmd, int argc, char **argv)
{
- int opt = 0;
- int done = 0;
- int total = 0;
-
- struct volume_group *vg;
- const char *vg_name;
- char *pv_name;
-
- struct pv_list *pvl;
- struct dm_list *vgnames;
- struct dm_str_list *sll;
+ struct pvchange_params params = { 0 };
+ struct processing_handle *handle = NULL;
+ int ret;
if (!(arg_count(cmd, allocatable_ARG) + arg_is_set(cmd, addtag_ARG) +
arg_is_set(cmd, deltag_ARG) + arg_count(cmd, uuid_ARG) +
arg_count(cmd, metadataignore_ARG))) {
log_error("Please give one or more of -x, -uuid, "
"--addtag, --deltag or --metadataignore");
- return EINVALID_CMD_LINE;
+ ret = EINVALID_CMD_LINE;
+ goto out;
+ }
+
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ ret = ECMD_FAILED;
+ goto out;
}
- if (!(arg_count(cmd, all_ARG)) && !argc) {
- log_error("Please give a physical volume path");
- return EINVALID_CMD_LINE;
+ handle->custom_handle = &params;
+
+ if (!(arg_count(cmd, all_ARG)) && !argc && !handle->internal_report_for_select) {
+ log_error("Please give a physical volume path or use --select for selection.");
+ ret = EINVALID_CMD_LINE;
+ goto out;
}
if (arg_count(cmd, all_ARG) && argc) {
log_error("Option --all and PhysicalVolumePath are exclusive.");
- return EINVALID_CMD_LINE;
+ ret = EINVALID_CMD_LINE;
+ goto out;
}
- if (argc) {
- log_verbose("Using physical volume(s) on command line");
- for (; opt < argc; opt++) {
- total++;
- pv_name = argv[opt];
- dm_unescape_colons_and_at_signs(pv_name, NULL, NULL);
- vg_name = find_vgname_from_pvname(cmd, pv_name);
- if (!vg_name) {
- log_error("Failed to read physical volume %s",
- pv_name);
- continue;
- }
- vg = vg_read_for_update(cmd, vg_name, NULL, 0);
- if (vg_read_error(vg)) {
- release_vg(vg);
- stack;
- continue;
- }
- pvl = find_pv_in_vg(vg, pv_name);
- if (!pvl || !pvl->pv) {
- unlock_and_release_vg(cmd, vg, vg_name);
- log_error("Unable to find %s in %s",
- pv_name, vg_name);
- continue;
- }
-
- done += _pvchange_single(cmd, vg,
- pvl->pv, NULL);
- unlock_and_release_vg(cmd, vg, vg_name);
- }
- } else {
- log_verbose("Scanning for physical volume names");
- /* FIXME: share code with toollib */
+ if (!argc) {
/*
* Take the global lock here so the lvmcache remains
* consistent across orphan/non-orphan vg locks. If we don't
@@ -213,38 +208,21 @@ int pvchange(struct cmd_context *cmd, int argc, char **argv)
*/
if (!lock_vol(cmd, VG_GLOBAL, LCK_VG_WRITE, NULL)) {
log_error("Unable to obtain global lock.");
- return ECMD_FAILED;
+ ret = ECMD_FAILED;
+ goto out;
}
+ }
- /* populate lvmcache */
- if (!lvmetad_vg_list_to_lvmcache(cmd))
- stack;
-
- if ((vgnames = get_vgnames(cmd, 1)) &&
- !dm_list_empty(vgnames)) {
- dm_list_iterate_items(sll, vgnames) {
- vg = vg_read_for_update(cmd, sll->str, NULL, 0);
- if (vg_read_error(vg)) {
- release_vg(vg);
- stack;
- continue;
- }
- dm_list_iterate_items(pvl, &vg->pvs) {
- total++;
- done += _pvchange_single(cmd, vg,
- pvl->pv,
- NULL);
- }
- unlock_and_release_vg(cmd, vg, sll->str);
- }
- }
+ ret = process_each_pv(cmd, argc, argv, NULL, READ_FOR_UPDATE, handle, _pvchange_single);
+
+ if (!argc)
unlock_vg(cmd, VG_GLOBAL);
- }
- log_print_unless_silent("%d physical volume%s changed / %d physical volume%s "
- "not changed",
- done, done == 1 ? "" : "s",
- total - done, (total - done) == 1 ? "" : "s");
+ log_print_unless_silent("%d physical volume%s changed / %d physical volume%s not changed",
+ params.done, params.done == 1 ? "" : "s",
+ params.total - params.done, (params.total - params.done) == 1 ? "" : "s");
- return (total == done) ? ECMD_PROCESSED : ECMD_FAILED;
+out:
+ destroy_processing_handle(cmd, handle);
+ return ret;
}
diff --git a/tools/pvcreate.c b/tools/pvcreate.c
index 958e353b5..1f45ad91d 100644
--- a/tools/pvcreate.c
+++ b/tools/pvcreate.c
@@ -96,12 +96,16 @@ int pvcreate(struct cmd_context *cmd, int argc, char **argv)
int ret = ECMD_PROCESSED;
struct pvcreate_params pp;
+ /* Needed to change the set of orphan PVs. */
+ if (!lockd_gl(cmd, "ex", 0))
+ return_ECMD_FAILED;
+
pvcreate_params_set_defaults(&pp);
if (!pvcreate_restore_params_validate(cmd, argc, argv, &pp)) {
return EINVALID_CMD_LINE;
}
- if (!pvcreate_params_validate(cmd, argc, argv, &pp)) {
+ if (!pvcreate_params_validate(cmd, argc, &pp)) {
return EINVALID_CMD_LINE;
}
diff --git a/tools/pvdisplay.c b/tools/pvdisplay.c
index 50522d9c2..2763889a9 100644
--- a/tools/pvdisplay.c
+++ b/tools/pvdisplay.c
@@ -17,7 +17,8 @@
static int _pvdisplay_single(struct cmd_context *cmd,
struct volume_group *vg,
- struct physical_volume *pv, void *handle)
+ struct physical_volume *pv,
+ struct processing_handle *handle __attribute__((unused)))
{
const char *pv_name = pv_dev_name(pv);
int ret = ECMD_PROCESSED;
@@ -48,7 +49,7 @@ static int _pvdisplay_single(struct cmd_context *cmd,
goto out;
}
- pvdisplay_full(cmd, pv, handle);
+ pvdisplay_full(cmd, pv, NULL);
if (arg_count(cmd, maps_ARG))
pvdisplay_segments(pv);
@@ -76,7 +77,6 @@ int pvdisplay(struct cmd_context *cmd, int argc, char **argv)
arg_count(cmd, binary_ARG) ||
arg_count(cmd, noheadings_ARG) ||
arg_count(cmd, options_ARG) ||
- arg_count(cmd, select_ARG) ||
arg_count(cmd, separator_ARG) ||
arg_count(cmd, sort_ARG) ||
arg_count(cmd, unbuffered_ARG)) {
diff --git a/tools/pvmove.c b/tools/pvmove.c
index 30725844c..038056370 100644
--- a/tools/pvmove.c
+++ b/tools/pvmove.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -14,11 +14,13 @@
*/
#include "tools.h"
+
#include "polldaemon.h"
#include "display.h"
+#include "pvmove_poll.h"
+#include "lvmpolld-client.h"
#define PVMOVE_FIRST_TIME 0x00000001 /* Called for first time */
-#define PVMOVE_EXCLUSIVE 0x00000002 /* Require exclusive LV */
static int _pvmove_target_present(struct cmd_context *cmd, int clustered)
{
@@ -30,7 +32,7 @@ static int _pvmove_target_present(struct cmd_context *cmd, int clustered)
if (clustered && _clustered_found >= 0)
return _clustered_found;
- if (!(segtype = get_segtype_from_string(cmd, "mirror")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_MIRROR)))
return_0;
if (activation() && segtype->ops->target_present &&
@@ -88,13 +90,6 @@ static const char *_extract_lvname(struct cmd_context *cmd, const char *vgname,
return lvname;
}
-static struct volume_group *_get_vg(struct cmd_context *cmd, const char *vgname)
-{
- dev_close_all();
-
- return vg_read_for_update(cmd, vgname, NULL, 0);
-}
-
/* Create list of PVs for allocation of replacement extents */
static struct dm_list *_get_allocatable_pvs(struct cmd_context *cmd, int argc,
char **argv, struct volume_group *vg,
@@ -513,170 +508,62 @@ static int _activate_lv(struct cmd_context *cmd, struct logical_volume *lv_mirr,
return r;
}
-static int _is_pvmove_image_removable(struct logical_volume *mimage_lv,
- void *baton)
-{
- uint32_t mimage_to_remove = *((uint32_t *)baton);
- struct lv_segment *mirror_seg;
-
- if (!(mirror_seg = get_only_segment_using_this_lv(mimage_lv))) {
- log_error(INTERNAL_ERROR "%s is not a proper mirror image",
- mimage_lv->name);
- return 0;
- }
-
- if (seg_type(mirror_seg, 0) != AREA_LV) {
- log_error(INTERNAL_ERROR "%s is not a pvmove mirror of LV-type",
- mirror_seg->lv->name);
- return 0;
- }
-
- if (mimage_to_remove > mirror_seg->area_count) {
- log_error(INTERNAL_ERROR "Mirror image %" PRIu32 " not found in segment",
- mimage_to_remove);
- return 0;
- }
-
- if (seg_lv(mirror_seg, mimage_to_remove) == mimage_lv)
- return 1;
-
- return 0;
-}
-
-static int _detach_pvmove_mirror(struct cmd_context *cmd,
- struct logical_volume *lv_mirr)
-{
- uint32_t mimage_to_remove = 0;
- struct dm_list lvs_completed;
- struct lv_list *lvl;
-
- /* Update metadata to remove mirror segments and break dependencies */
- dm_list_init(&lvs_completed);
-
- if (arg_is_set(cmd, abort_ARG) &&
- (seg_type(first_seg(lv_mirr), 0) == AREA_LV))
- mimage_to_remove = 1; /* remove the second mirror leg */
-
- if (!lv_remove_mirrors(cmd, lv_mirr, 1, 0, _is_pvmove_image_removable, &mimage_to_remove, PVMOVE) ||
- !remove_layers_for_segments_all(cmd, lv_mirr, PVMOVE,
- &lvs_completed)) {
- return 0;
- }
-
- dm_list_iterate_items(lvl, &lvs_completed)
- /* FIXME Assumes only one pvmove at a time! */
- lvl->lv->status &= ~LOCKED;
-
- return 1;
-}
-
-static int _suspend_lvs(struct cmd_context *cmd, unsigned first_time,
- struct logical_volume *lv_mirr,
- struct dm_list *lvs_changed,
- struct volume_group *vg_to_revert)
-{
- /*
- * Suspend lvs_changed the first time.
- * Suspend mirrors on subsequent calls.
- */
- if (first_time) {
- if (!suspend_lvs(cmd, lvs_changed, vg_to_revert))
- return_0;
- } else if (!suspend_lv(cmd, lv_mirr)) {
- if (vg_to_revert)
- vg_revert(vg_to_revert);
- return_0;
- }
-
- return 1;
-}
-
-static int _resume_lvs(struct cmd_context *cmd, unsigned first_time,
- struct logical_volume *lv_mirr,
- struct dm_list *lvs_changed)
-{
- /*
- * Suspend lvs_changed the first time.
- * Suspend mirrors on subsequent calls.
- */
-
- if (first_time) {
- if (!resume_lvs(cmd, lvs_changed)) {
- log_error("Unable to resume logical volumes");
- return 0;
- }
- } else if (!resume_lv(cmd, lv_mirr)) {
- log_error("Unable to reactivate logical volume \"%s\"",
- lv_mirr->name);
- return 0;
- }
-
- return 1;
-}
-
/*
- * Called to set up initial pvmove LV and to advance the mirror
- * to successive sections of it.
- * (Not called after the last section completes.)
+ * Called to set up initial pvmove LV only.
+ * (Not called after first or any other section completes.)
*/
static int _update_metadata(struct cmd_context *cmd, struct volume_group *vg,
struct logical_volume *lv_mirr,
- struct dm_list *lvs_changed, unsigned flags)
+ struct dm_list *lvs_changed, unsigned exclusive)
{
- unsigned exclusive = (flags & PVMOVE_EXCLUSIVE) ? 1 : 0;
- unsigned first_time = (flags & PVMOVE_FIRST_TIME) ? 1 : 0;
int r = 0;
- log_verbose("Updating volume group metadata");
+ log_verbose("Setting up pvmove in on-disk volume group metadata.");
if (!vg_write(vg)) {
log_error("ABORTING: Volume group metadata update failed.");
return 0;
}
- if (!_suspend_lvs(cmd, first_time, lv_mirr, lvs_changed, vg)) {
- log_error("ABORTING: Temporary pvmove mirror %s failed.", first_time ? "activation" : "reload");
+ if (!suspend_lvs(cmd, lvs_changed, vg)) {
+ log_error("ABORTING: Temporary pvmove mirror activation failed.");
/* FIXME Add a recovery path for first time too. */
- if (!first_time && !revert_lv(cmd, lv_mirr))
- stack;
return 0;
}
/* Commit on-disk metadata */
if (!vg_commit(vg)) {
log_error("ABORTING: Volume group metadata update failed.");
- if (!_resume_lvs(cmd, first_time, lv_mirr, lvs_changed))
- stack;
- if (!first_time && !revert_lv(cmd, lv_mirr))
- stack;
+ if (!resume_lvs(cmd, lvs_changed))
+ log_error("Unable to resume logical volumes.");
return 0;
}
/* Activate the temporary mirror LV */
/* Only the first mirror segment gets activated as a mirror */
/* FIXME: Add option to use a log */
- if (first_time) {
- if (!exclusive && _pvmove_is_exclusive(cmd, vg))
- exclusive = 1;
+ if (!exclusive && _pvmove_is_exclusive(cmd, vg))
+ exclusive = 1;
- if (!_activate_lv(cmd, lv_mirr, exclusive)) {
- if (test_mode()) {
- r = 1;
- goto out;
- }
-
- /*
- * FIXME Run --abort internally here.
- */
- log_error("ABORTING: Temporary pvmove mirror activation failed. Run pvmove --abort.");
+ if (!_activate_lv(cmd, lv_mirr, exclusive)) {
+ if (test_mode()) {
+ r = 1;
goto out;
}
+
+ /*
+ * FIXME Run --abort internally here.
+ */
+ log_error("ABORTING: Temporary pvmove mirror activation failed. Run pvmove --abort.");
+ goto out;
}
r = 1;
out:
- if (!_resume_lvs(cmd, first_time, lv_mirr, lvs_changed))
+ if (!resume_lvs(cmd, lvs_changed)) {
+ log_error("Unable to resume logical volumes.");
r = 0;
+ }
if (r)
backup(vg);
@@ -684,10 +571,27 @@ out:
return r;
}
+static int _copy_id_components(struct cmd_context *cmd,
+ const struct logical_volume *lv, char **vg_name,
+ char **lv_name, union lvid *lvid)
+{
+ if (!(*vg_name = dm_pool_strdup(cmd->mem, lv->vg->name)) ||
+ !(*lv_name = dm_pool_strdup(cmd->mem, lv->name))) {
+ log_error("Failed to clone VG or LV name.");
+ return 0;
+ }
+
+ *lvid = lv->lvid;
+
+ return 1;
+}
+
static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
- int argc, char **argv)
+ int argc, char **argv, union lvid *lvid, char **vg_name_copy,
+ char **lv_mirr_name)
{
const char *lv_name = NULL;
+ const char *vg_name;
char *pv_name_arg;
struct volume_group *vg;
struct dm_list *source_pvl;
@@ -696,6 +600,7 @@ static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
struct dm_list *lvs_changed;
struct physical_volume *pv;
struct logical_volume *lv_mirr;
+ uint32_t lockd_state = 0;
unsigned flags = PVMOVE_FIRST_TIME;
unsigned exclusive;
int r = ECMD_FAILED;
@@ -710,9 +615,10 @@ static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
return EINVALID_CMD_LINE;
}
+ vg_name = pv_vg_name(pv);
+
if (arg_count(cmd, name_ARG)) {
- if (!(lv_name = _extract_lvname(cmd, pv_vg_name(pv),
- arg_value(cmd, name_ARG)))) {
+ if (!(lv_name = _extract_lvname(cmd, vg_name, arg_value(cmd, name_ARG)))) {
stack;
free_pv_fid(pv);
return EINVALID_CMD_LINE;
@@ -726,12 +632,15 @@ static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
}
/* Read VG */
- log_verbose("Finding volume group \"%s\"", pv_vg_name(pv));
+ log_verbose("Finding volume group \"%s\"", vg_name);
+
+ if (!lockd_vg(cmd, vg_name, "ex", 0, &lockd_state))
+ return_ECMD_FAILED;
- vg = _get_vg(cmd, pv_vg_name(pv));
+ vg = vg_read(cmd, vg_name, NULL, READ_FOR_UPDATE, lockd_state);
if (vg_read_error(vg)) {
release_vg(vg);
- return_ECMD_FAILED;
+ goto out_ret;
}
exclusive = _pvmove_is_exclusive(cmd, vg);
@@ -785,143 +694,154 @@ static int _set_up_pvmove(struct cmd_context *cmd, const char *pv_name,
/* init_pvmove(1); */
/* vg->status |= PVMOVE; */
- if (flags & PVMOVE_FIRST_TIME) {
- if (exclusive)
- flags |= PVMOVE_EXCLUSIVE;
- if (!_update_metadata
- (cmd, vg, lv_mirr, lvs_changed, flags))
+ if (!_copy_id_components(cmd, lv_mirr, vg_name_copy, lv_mirr_name, lvid))
+ goto out;
+
+ if (flags & PVMOVE_FIRST_TIME)
+ if (!_update_metadata(cmd, vg, lv_mirr, lvs_changed, exclusive))
goto_out;
- }
/* LVs are all in status LOCKED */
r = ECMD_PROCESSED;
out:
free_pv_fid(pv);
- unlock_and_release_vg(cmd, vg, pv_vg_name(pv));
+ unlock_and_release_vg(cmd, vg, vg_name);
+out_ret:
+ /*
+ * Release explicitly because the command may continue running
+ * for some time monitoring the progress, and we don not want
+ * or need the lockd lock held over that.
+ */
+ if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
+ stack;
+
return r;
}
-static int _finish_pvmove(struct cmd_context *cmd, struct volume_group *vg,
- struct logical_volume *lv_mirr,
- struct dm_list *lvs_changed)
+static int _read_poll_id_from_pvname(struct cmd_context *cmd, const char *pv_name,
+ union lvid *lvid, char **vg_name_copy,
+ char **lv_name_copy, unsigned *in_progress)
{
- int r = 1;
+ int ret = 0;
+ const char *vg_name;
+ struct logical_volume *lv;
+ struct physical_volume *pv;
+ struct volume_group *vg;
+ uint32_t lockd_state = 0;
- if (!dm_list_empty(lvs_changed) &&
- (!_detach_pvmove_mirror(cmd, lv_mirr) ||
- !replace_lv_with_error_segment(lv_mirr))) {
- log_error("ABORTING: Removal of temporary mirror failed");
+ if (!pv_name) {
+ log_error(INTERNAL_ERROR "Invalid PV name parameter.");
return 0;
}
- /* Store metadata without dependencies on mirror segments */
- if (!vg_write(vg)) {
- log_error("ABORTING: Failed to write new data locations "
- "to disk.");
- return 0;
- }
+ if (!(pv = find_pv_by_name(cmd, pv_name, 0, 0)))
+ return_0;
- /* Suspend LVs changed (implicitly suspends lv_mirr) */
- if (!suspend_lvs(cmd, lvs_changed, vg)) {
- log_error("ABORTING: Locking LVs to remove temporary mirror failed");
- if (!revert_lv(cmd, lv_mirr))
- stack;
- return 0;
- }
+ vg_name = pv_vg_name(pv);
- /* Store metadata without dependencies on mirror segments */
- if (!vg_commit(vg)) {
- log_error("ABORTING: Failed to write new data locations "
- "to disk.");
- if (!revert_lv(cmd, lv_mirr))
- stack;
- if (!revert_lvs(cmd, lvs_changed))
- stack;
- return 0;
- }
+ if (!lockd_vg(cmd, vg_name, "sh", 0, &lockd_state))
+ return_0;
- /* Release mirror LV. (No pending I/O because it's been suspended.) */
- if (!resume_lv(cmd, lv_mirr)) {
- log_error("Unable to reactivate logical volume \"%s\"",
- lv_mirr->name);
- r = 0;
+ /* need read-only access */
+ vg = vg_read(cmd, vg_name, NULL, 0, lockd_state);
+ if (vg_read_error(vg)) {
+ log_error("ABORTING: Can't read VG for %s.", pv_name);
+ release_vg(vg);
+ ret = 0;
+ goto out;
}
- /* Unsuspend LVs */
- if (!resume_lvs(cmd, lvs_changed))
- stack;
-
- /* Deactivate mirror LV */
- if (!deactivate_lv(cmd, lv_mirr)) {
- log_error("ABORTING: Unable to deactivate temporary logical "
- "volume \"%s\"", lv_mirr->name);
- r = 0;
+ if (!(lv = find_pvmove_lv(vg, pv_dev(pv), PVMOVE))) {
+ log_print_unless_silent("%s: No pvmove in progress - already finished or aborted.",
+ pv_name);
+ ret = 1;
+ *in_progress = 0;
+ } else if (_copy_id_components(cmd, lv, vg_name_copy, lv_name_copy, lvid)) {
+ ret = 1;
+ *in_progress = 1;
}
- log_verbose("Removing temporary pvmove LV");
- if (!lv_remove(lv_mirr)) {
- log_error("ABORTING: Removal of temporary pvmove LV failed");
- return 0;
- }
+ unlock_and_release_vg(cmd, vg, vg_name);
+out:
+ if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
+ stack;
+ free_pv_fid(pv);
+ return ret;
+}
- /* Store it on disks */
- log_verbose("Writing out final volume group after pvmove");
- if (!vg_write(vg) || !vg_commit(vg)) {
- log_error("ABORTING: Failed to write new data locations "
- "to disk.");
- return 0;
- }
+static struct poll_functions _pvmove_fns = {
+ .get_copy_name_from_lv = get_pvmove_pvname_from_lv_mirr,
+ .poll_progress = poll_mirror_progress,
+ .update_metadata = pvmove_update_metadata,
+ .finish_copy = pvmove_finish,
+};
- /* FIXME backup positioning */
- backup(vg);
+static void _destroy_id(struct cmd_context *cmd, struct poll_operation_id *id)
+{
+ if (!id)
+ return;
- return r;
+ dm_pool_free(cmd->mem, id);
}
-static struct volume_group *_get_move_vg(struct cmd_context *cmd,
- const char *name,
- const char *uuid __attribute__((unused)))
+static struct poll_operation_id *_create_id(struct cmd_context *cmd,
+ const char *pv_name,
+ const char *vg_name,
+ const char *lv_name,
+ const char *uuid)
{
- struct physical_volume *pv;
- struct volume_group *vg;
-
- /* Reread all metadata in case it got changed */
- if (!(pv = find_pv_by_name(cmd, name, 0, 0))) {
- log_error("ABORTING: Can't reread PV %s", name);
- /* What more could we do here? */
+ struct poll_operation_id *id = dm_pool_alloc(cmd->mem, sizeof(struct poll_operation_id));
+ if (!id) {
+ log_error("Poll operation ID allocation failed.");
return NULL;
}
- vg = _get_vg(cmd, pv_vg_name(pv));
- free_pv_fid(pv);
+ id->vg_name = vg_name ? dm_pool_strdup(cmd->mem, vg_name) : NULL;
+ id->lv_name = lv_name ? dm_pool_strdup(cmd->mem, lv_name) : NULL;
+ id->display_name = pv_name ? dm_pool_strdup(cmd->mem, pv_name) : NULL;
+ id->uuid = uuid ? dm_pool_strdup(cmd->mem, uuid) : NULL;
- return vg;
-}
+ if (!id->vg_name || !id->lv_name || !id->display_name || !id->uuid) {
+ log_error("Failed to copy one or more poll operation ID members.");
+ _destroy_id(cmd, id);
+ id = NULL;
+ }
-static struct poll_functions _pvmove_fns = {
- .get_copy_name_from_lv = get_pvmove_pvname_from_lv_mirr,
- .get_copy_vg = _get_move_vg,
- .get_copy_lv = find_pvmove_lv_from_pvname,
- .poll_progress = poll_mirror_progress,
- .update_metadata = _update_metadata,
- .finish_copy = _finish_pvmove,
-};
+ return id;
+}
int pvmove_poll(struct cmd_context *cmd, const char *pv_name,
- unsigned background)
+ const char *uuid, const char *vg_name,
+ const char *lv_name, unsigned background)
{
+ int r;
+ struct poll_operation_id *id = NULL;
+
if (test_mode())
return ECMD_PROCESSED;
- return poll_daemon(cmd, pv_name, NULL, background, PVMOVE, &_pvmove_fns,
- "Moved");
+ if (uuid) {
+ id = _create_id(cmd, pv_name, vg_name, lv_name, uuid);
+ if (!id) {
+ log_error("Failed to allocate poll identifier for pvmove.");
+ return ECMD_FAILED;
+ }
+ }
+
+ r = poll_daemon(cmd, background, PVMOVE, &_pvmove_fns, "Moved", id);
+
+ _destroy_id(cmd, id);
+
+ return r;
}
int pvmove(struct cmd_context *cmd, int argc, char **argv)
{
- char *pv_name = NULL;
char *colon;
int ret;
+ unsigned in_progress = 1;
+ union lvid *lvid = NULL;
+ char *pv_name = NULL, *vg_name = NULL, *lv_name = NULL;
/* dm raid1 target must be present in every case */
if (!_pvmove_target_present(cmd, 0)) {
@@ -930,9 +850,32 @@ int pvmove(struct cmd_context *cmd, int argc, char **argv)
return ECMD_FAILED;
}
+ if (lvmlockd_use() && !lvmpolld_use()) {
+ /*
+ * Don't want to spend the time making lvmlockd
+ * work without lvmpolld.
+ */
+ log_error("Enable lvmpolld when using lvmlockd.");
+ return ECMD_FAILED;
+ }
+
+ if (lvmlockd_use() && !argc) {
+ /*
+ * FIXME: move process_each_vg from polldaemon up to here,
+ * then we can remove this limitation.
+ */
+ log_error("Specify pvmove args when using lvmlockd.");
+ return ECMD_FAILED;
+ }
+
if (argc) {
+ if (!(lvid = dm_pool_alloc(cmd->mem, sizeof(*lvid)))) {
+ log_error("Failed to allocate lvid.");
+ return ECMD_FAILED;
+ }
+
if (!(pv_name = dm_pool_strdup(cmd->mem, argv[0]))) {
- log_error("Failed to clone PV name");
+ log_error("Failed to clone PV name.");
return ECMD_FAILED;
}
@@ -942,13 +885,36 @@ int pvmove(struct cmd_context *cmd, int argc, char **argv)
if (colon)
*colon = '\0';
- if (!arg_count(cmd, abort_ARG) &&
- (ret = _set_up_pvmove(cmd, pv_name, argc, argv)) !=
- ECMD_PROCESSED) {
+ /*
+ * To do a reverse mapping from PV name to VG name, we need the
+ * correct global mapping of PVs to VGs.
+ */
+ if (!lockd_gl(cmd, "sh", 0)) {
stack;
- return ret;
+ return ECMD_FAILED;
+ }
+
+ if (!arg_count(cmd, abort_ARG)) {
+ if ((ret = _set_up_pvmove(cmd, pv_name, argc, argv, lvid, &vg_name, &lv_name)) != ECMD_PROCESSED) {
+ stack;
+ return ret;
+ }
+ } else {
+ if (!_read_poll_id_from_pvname(cmd, pv_name, lvid, &vg_name, &lv_name, &in_progress))
+ return_ECMD_FAILED;
+
+ if (!in_progress)
+ return ECMD_PROCESSED;
}
+
+ /*
+ * The command may sit and report progress for some time,
+ * and we do not want or need the lockd locks held during
+ * that time.
+ */
+ lockd_gl(cmd, "un", 0);
}
- return pvmove_poll(cmd, pv_name, arg_is_set(cmd, background_ARG));
+ return pvmove_poll(cmd, pv_name, lvid ? lvid->s : NULL, vg_name, lv_name,
+ arg_is_set(cmd, background_ARG));
}
diff --git a/tools/pvmove_poll.c b/tools/pvmove_poll.c
new file mode 100644
index 000000000..0f071dc93
--- /dev/null
+++ b/tools/pvmove_poll.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "tools.h"
+
+#include "pvmove_poll.h"
+
+static int _is_pvmove_image_removable(struct logical_volume *mimage_lv,
+ void *baton)
+{
+ uint32_t mimage_to_remove = *((uint32_t *)baton);
+ struct lv_segment *mirror_seg;
+
+ if (!(mirror_seg = get_only_segment_using_this_lv(mimage_lv))) {
+ log_error(INTERNAL_ERROR "%s is not a proper mirror image",
+ mimage_lv->name);
+ return 0;
+ }
+
+ if (seg_type(mirror_seg, 0) != AREA_LV) {
+ log_error(INTERNAL_ERROR "%s is not a pvmove mirror of LV-type",
+ mirror_seg->lv->name);
+ return 0;
+ }
+
+ if (mimage_to_remove > mirror_seg->area_count) {
+ log_error(INTERNAL_ERROR "Mirror image %" PRIu32 " not found in segment",
+ mimage_to_remove);
+ return 0;
+ }
+
+ if (seg_lv(mirror_seg, mimage_to_remove) == mimage_lv)
+ return 1;
+
+ return 0;
+}
+
+static int _detach_pvmove_mirror(struct cmd_context *cmd,
+ struct logical_volume *lv_mirr)
+{
+ uint32_t mimage_to_remove = 0;
+ struct dm_list lvs_completed;
+ struct lv_list *lvl;
+
+ /* Update metadata to remove mirror segments and break dependencies */
+ dm_list_init(&lvs_completed);
+
+ if (arg_is_set(cmd, abort_ARG) &&
+ (seg_type(first_seg(lv_mirr), 0) == AREA_LV))
+ mimage_to_remove = 1; /* remove the second mirror leg */
+
+ if (!lv_remove_mirrors(cmd, lv_mirr, 1, 0, _is_pvmove_image_removable, &mimage_to_remove, PVMOVE) ||
+ !remove_layers_for_segments_all(cmd, lv_mirr, PVMOVE,
+ &lvs_completed)) {
+ return 0;
+ }
+
+ dm_list_iterate_items(lvl, &lvs_completed)
+ /* FIXME Assumes only one pvmove at a time! */
+ lvl->lv->status &= ~LOCKED;
+
+ return 1;
+}
+
+/*
+ * Called to advance the mirror to successive sections of it.
+ * (Not called first time or after the last section completes.)
+ */
+int pvmove_update_metadata(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv_mirr,
+ struct dm_list *lvs_changed __attribute__((unused)),
+ unsigned flags __attribute__((unused)))
+{
+ log_verbose("Updating volume group metadata.");
+ if (!vg_write(vg)) {
+ log_error("ABORTING: Volume group metadata update failed.");
+ return 0;
+ }
+
+ if (!suspend_lv(cmd, lv_mirr)) {
+ vg_revert(vg);
+ log_error("ABORTING: Temporary pvmove mirror reload failed.");
+ if (!revert_lv(cmd, lv_mirr))
+ stack;
+ return 0;
+ }
+
+ /* Commit on-disk metadata */
+ if (!vg_commit(vg)) {
+ log_error("ABORTING: Volume group metadata update failed.");
+ if (!resume_lv(cmd, lv_mirr))
+ log_error("Unable to reactivate logical volume \"%s\".",
+ lv_mirr->name);
+ if (!revert_lv(cmd, lv_mirr))
+ stack;
+ return 0;
+ }
+
+ if (!resume_lv(cmd, lv_mirr)) {
+ log_error("Unable to reactivate logical volume \"%s\".",
+ lv_mirr->name);
+ return 0;
+ }
+
+ backup(vg);
+
+ return 1;
+}
+
+int pvmove_finish(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv_mirr, struct dm_list *lvs_changed)
+{
+ int r = 1;
+
+ if (!dm_list_empty(lvs_changed) &&
+ (!_detach_pvmove_mirror(cmd, lv_mirr) ||
+ !replace_lv_with_error_segment(lv_mirr))) {
+ log_error("ABORTING: Removal of temporary mirror failed");
+ return 0;
+ }
+
+ /* Store metadata without dependencies on mirror segments */
+ if (!vg_write(vg)) {
+ log_error("ABORTING: Failed to write new data locations "
+ "to disk.");
+ return 0;
+ }
+
+ /* Suspend LVs changed (implicitly suspends lv_mirr) */
+ if (!suspend_lvs(cmd, lvs_changed, vg)) {
+ log_error("ABORTING: Locking LVs to remove temporary mirror failed");
+ if (!revert_lv(cmd, lv_mirr))
+ stack;
+ return 0;
+ }
+
+ /* Store metadata without dependencies on mirror segments */
+ if (!vg_commit(vg)) {
+ log_error("ABORTING: Failed to write new data locations "
+ "to disk.");
+ if (!revert_lv(cmd, lv_mirr))
+ stack;
+ if (!revert_lvs(cmd, lvs_changed))
+ stack;
+ return 0;
+ }
+
+ /* Release mirror LV. (No pending I/O because it's been suspended.) */
+ if (!resume_lv(cmd, lv_mirr)) {
+ log_error("Unable to reactivate logical volume \"%s\"",
+ lv_mirr->name);
+ r = 0;
+ }
+
+ /* Unsuspend LVs */
+ if (!resume_lvs(cmd, lvs_changed))
+ stack;
+
+ /* Deactivate mirror LV */
+ if (!deactivate_lv(cmd, lv_mirr)) {
+ log_error("ABORTING: Unable to deactivate temporary logical "
+ "volume \"%s\"", lv_mirr->name);
+ r = 0;
+ }
+
+ log_verbose("Removing temporary pvmove LV");
+ if (!lv_remove(lv_mirr)) {
+ log_error("ABORTING: Removal of temporary pvmove LV failed");
+ return 0;
+ }
+
+ /* Store it on disks */
+ log_verbose("Writing out final volume group after pvmove");
+ if (!vg_write(vg) || !vg_commit(vg)) {
+ log_error("ABORTING: Failed to write new data locations "
+ "to disk.");
+ return 0;
+ }
+
+ /* FIXME backup positioning */
+ backup(vg);
+
+ return r;
+}
diff --git a/tools/pvmove_poll.h b/tools/pvmove_poll.h
new file mode 100644
index 000000000..b12458f64
--- /dev/null
+++ b/tools/pvmove_poll.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_PVMOVE_H
+#define _LVM_PVMOVE_H
+
+struct cmd_context;
+struct dm_list;
+struct logical_volume;
+struct volume_group;
+
+int pvmove_update_metadata(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv_mirr,
+ struct dm_list *lvs_changed, unsigned flags);
+
+int pvmove_finish(struct cmd_context *cmd, struct volume_group *vg,
+ struct logical_volume *lv_mirr, struct dm_list *lvs_changed);
+
+#endif /* _LVM_PVMOVE_H */
diff --git a/tools/pvremove.c b/tools/pvremove.c
index 035d4fd4e..e6ae86641 100644
--- a/tools/pvremove.c
+++ b/tools/pvremove.c
@@ -18,9 +18,9 @@
int pvremove(struct cmd_context *cmd, int argc, char **argv)
{
int i;
- int ret = ECMD_PROCESSED;
unsigned force_count;
unsigned prompt;
+ struct dm_list pv_names;
if (!argc) {
log_error("Please enter a physical volume path");
@@ -30,15 +30,20 @@ int pvremove(struct cmd_context *cmd, int argc, char **argv)
force_count = arg_count(cmd, force_ARG);
prompt = arg_count(cmd, yes_ARG);
+ dm_list_init(&pv_names);
+
+ /* Needed to change the set of orphan PVs. */
+ if (!lockd_gl(cmd, "ex", 0))
+ return_ECMD_FAILED;
+
for (i = 0; i < argc; i++) {
dm_unescape_colons_and_at_signs(argv[i], NULL, NULL);
- if (!pvremove_single(cmd, argv[i], NULL, force_count, prompt)) {
- stack;
- ret = ECMD_FAILED;
- }
- if (sigint_caught())
+ if (!str_list_add(cmd->mem, &pv_names, argv[i]))
return_ECMD_FAILED;
}
- return ret;
+ if (!pvremove_many(cmd, &pv_names, force_count, prompt))
+ return_ECMD_FAILED;
+
+ return ECMD_PROCESSED;
}
diff --git a/tools/pvresize.c b/tools/pvresize.c
index 631e63e0b..0b055e6ef 100644
--- a/tools/pvresize.c
+++ b/tools/pvresize.c
@@ -26,9 +26,9 @@ struct pvresize_params {
static int _pvresize_single(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle)
+ struct processing_handle *handle)
{
- struct pvresize_params *params = (struct pvresize_params *) handle;
+ struct pvresize_params *params = (struct pvresize_params *) handle->custom_handle;
if (!params) {
log_error(INTERNAL_ERROR "Invalid resize params.");
@@ -36,6 +36,14 @@ static int _pvresize_single(struct cmd_context *cmd,
}
params->total++;
+ /*
+ * Needed to change a property on an orphan PV.
+ * i.e. the global lock is only needed for orphans.
+ * Convert sh to ex.
+ */
+ if (is_orphan(pv) && !lockd_gl(cmd, "ex", 0))
+ return_ECMD_FAILED;
+
if (!pv_resize_single(cmd, vg, pv, params->new_size))
return_ECMD_FAILED;
@@ -47,16 +55,19 @@ static int _pvresize_single(struct cmd_context *cmd,
int pvresize(struct cmd_context *cmd, int argc, char **argv)
{
struct pvresize_params params;
+ struct processing_handle *handle = NULL;
int ret;
if (!argc) {
log_error("Please supply physical volume(s)");
- return EINVALID_CMD_LINE;
+ ret = EINVALID_CMD_LINE;
+ goto out;
}
if (arg_sign_value(cmd, physicalvolumesize_ARG, SIGN_NONE) == SIGN_MINUS) {
log_error("Physical volume size may not be negative");
- return EINVALID_CMD_LINE;
+ ret = EINVALID_CMD_LINE;
+ goto out;
}
params.new_size = arg_uint64_value(cmd, physicalvolumesize_ARG,
@@ -65,11 +76,20 @@ int pvresize(struct cmd_context *cmd, int argc, char **argv)
params.done = 0;
params.total = 0;
- ret = process_each_pv(cmd, argc, argv, NULL, READ_FOR_UPDATE, &params,
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ ret = ECMD_FAILED;
+ goto out;
+ }
+
+ handle->custom_handle = &params;
+
+ ret = process_each_pv(cmd, argc, argv, NULL, READ_FOR_UPDATE, handle,
_pvresize_single);
log_print_unless_silent("%d physical volume(s) resized / %d physical volume(s) "
"not resized", params.done, params.total - params.done);
-
+out:
+ destroy_processing_handle(cmd, handle);
return ret;
}
diff --git a/tools/pvscan.c b/tools/pvscan.c
index 3eaf0bbcb..3adb91c63 100644
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -43,7 +43,7 @@ static void _pvscan_display_single(struct cmd_context *cmd,
/* pv_show(pv); */
/* FIXME - Moved to Volume Group structure */
- /* log_print("System Id %s", pv->vg->system_id); */
+ /* log_print("system ID %s", pv->vg->system_id); */
/* log_print(" "); */
/* return; */
@@ -106,7 +106,7 @@ static int _auto_activation_handler(struct cmd_context *cmd,
return_0;
/* NB. This is safe because we know lvmetad is running and we won't hit disk. */
- vg = vg_read(cmd, vgname, (const char *)&vgid_raw, 0);
+ vg = vg_read(cmd, vgname, (const char *)&vgid_raw, 0, 0);
if (vg_read_error(vg)) {
log_error("Failed to read Volume Group \"%s\" (%s) during autoactivation.", vgname, vgid);
release_vg(vg);
@@ -166,7 +166,7 @@ static int _clear_dev_from_lvmetad_cache(dev_t devno, int32_t major, int32_t min
{
char buf[24];
- (void) dm_snprintf(buf, sizeof(buf), "%" PRIi32 ":%" PRIi32, major, minor);
+ (void) dm_snprintf(buf, sizeof(buf), FMTi32 ":" FMTi32, major, minor);
if (!lvmetad_pv_gone(devno, buf, handler))
return_0;
@@ -189,6 +189,8 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
dev_t devno;
activation_handler handler = NULL;
+ cmd->include_foreign_vgs = 1;
+
/*
* Return here immediately if lvmetad is not used.
* Also return if locking_type=3 (clustered) as we
@@ -239,8 +241,17 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
if (pv_name[0] == '/') {
/* device path */
if (!(dev = dev_cache_get(pv_name, cmd->lvmetad_filter))) {
- log_error("Physical Volume %s not found.", pv_name);
- ret = ECMD_FAILED;
+ if ((dev = dev_cache_get(pv_name, NULL))) {
+ if (!_clear_dev_from_lvmetad_cache(dev->dev, MAJOR(dev->dev), MINOR(dev->dev), handler)) {
+ stack;
+ ret = ECMD_FAILED;
+ break;
+ }
+ } else {
+ log_error("Physical Volume %s not found.", pv_name);
+ ret = ECMD_FAILED;
+ break;
+ }
continue;
}
}
@@ -251,7 +262,7 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
ret = ECMD_FAILED;
continue;
}
- devno = MKDEV((dev_t)major, minor);
+ devno = MKDEV((dev_t)major, (dev_t)minor);
if (!(dev = dev_cache_get_by_devt(devno, cmd->lvmetad_filter))) {
if (!(_clear_dev_from_lvmetad_cache(devno, major, minor, handler))) {
stack;
@@ -266,7 +277,7 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
stack;
break;
}
- if (!lvmetad_pvscan_single(cmd, dev, handler)) {
+ if (!lvmetad_pvscan_single(cmd, dev, handler, 0)) {
ret = ECMD_FAILED;
stack;
break;
@@ -284,7 +295,7 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
if (major < 0 || minor < 0)
continue;
- devno = MKDEV((dev_t)major, minor);
+ devno = MKDEV((dev_t)major, (dev_t)minor);
if (!(dev = dev_cache_get_by_devt(devno, cmd->lvmetad_filter))) {
if (!(_clear_dev_from_lvmetad_cache(devno, major, minor, handler))) {
@@ -299,7 +310,7 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
stack;
break;
}
- if (!lvmetad_pvscan_single(cmd, dev, handler)) {
+ if (!lvmetad_pvscan_single(cmd, dev, handler, 0)) {
ret = ECMD_FAILED;
stack;
break;
@@ -308,9 +319,9 @@ static int _pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv)
}
out:
- sync_local_dev_names(cmd);
+ if (!sync_local_dev_names(cmd))
+ stack;
unlock_vg(cmd, VG_GLOBAL);
-
return ret;
}
@@ -360,6 +371,10 @@ int pvscan(struct cmd_context *cmd, int argc, char **argv)
return ECMD_FAILED;
}
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lockd_gl(cmd, "sh", 0))
+ return_ECMD_FAILED;
+
if (cmd->full_filter->wipe)
cmd->full_filter->wipe(cmd->full_filter);
lvmcache_destroy(cmd, 1, 0);
diff --git a/tools/reporter.c b/tools/reporter.c
index b3125680d..8f7a2723a 100644
--- a/tools/reporter.c
+++ b/tools/reporter.c
@@ -14,14 +14,16 @@
*/
#include "tools.h"
+
#include "report.h"
-static int _process_each_devtype(struct cmd_context *cmd, int argc, void *handle)
+static int _process_each_devtype(struct cmd_context *cmd, int argc,
+ struct processing_handle *handle)
{
if (argc)
log_warn("WARNING: devtypes currently ignores command line arguments.");
- if (!report_devtypes(handle))
+ if (!report_devtypes(handle->custom_handle))
return_ECMD_FAILED;
return ECMD_PROCESSED;
@@ -29,9 +31,12 @@ static int _process_each_devtype(struct cmd_context *cmd, int argc, void *handle
static int _vgs_single(struct cmd_context *cmd __attribute__((unused)),
const char *vg_name, struct volume_group *vg,
- void *handle)
+ struct processing_handle *handle)
{
- if (!report_object(handle, vg, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+ struct selection_handle *sh = handle->selection_handle;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ vg, NULL, NULL, NULL, NULL, NULL, NULL))
return_ECMD_FAILED;
check_current_backup(vg);
@@ -39,7 +44,7 @@ static int _vgs_single(struct cmd_context *cmd __attribute__((unused)),
return ECMD_PROCESSED;
}
-static void _choose_lv_segment_for_status_report(struct logical_volume *lv, struct lv_segment **lv_seg)
+static void _choose_lv_segment_for_status_report(const struct logical_volume *lv, const struct lv_segment **lv_seg)
{
/*
* By default, take the first LV segment to report status for.
@@ -51,134 +56,139 @@ static void _choose_lv_segment_for_status_report(struct logical_volume *lv, stru
*lv_seg = first_seg(lv);
}
-static void _do_info_and_status(struct cmd_context *cmd,
- struct logical_volume *lv,
- struct lvinfo *lvinfo,
- struct lv_segment *lv_seg,
- struct lv_seg_status *lv_seg_status,
- int do_info, int do_status)
+static int _do_info_and_status(struct cmd_context *cmd,
+ const struct logical_volume *lv,
+ const struct lv_segment *lv_seg,
+ struct lv_with_info_and_seg_status *status,
+ int do_info, int do_status)
{
- if (lv_seg_status) {
- lv_seg_status->mem = lv->vg->vgmem;
- lv_seg_status->type = SEG_STATUS_NONE;
- lv_seg_status->status = NULL;
- }
+ unsigned use_layer = lv_is_thin_pool(lv) ? 1 : 0;
- if (do_info && !do_status) {
- /* info only */
- if (!lv_info(cmd, lv, 0, lvinfo, 1, 1))
- lvinfo->exists = 0;
- } else if (!do_info && do_status) {
- /* status only */
+ status->lv = lv;
+ if (do_status) {
+ if (!(status->seg_status.mem = dm_pool_create("reporter_pool", 1024)))
+ return_0;
if (!lv_seg)
_choose_lv_segment_for_status_report(lv, &lv_seg);
- if (!lv_status(cmd, lv_seg, lv_seg_status))
- lvinfo->exists = 0;
- } else if (do_info && do_status) {
- /* both info and status */
- if (!lv_seg)
- _choose_lv_segment_for_status_report(lv, &lv_seg);
- if (!lv_info_with_seg_status(cmd, lv, lv_seg, 0, lvinfo, lv_seg_status, 1, 1))
- lvinfo->exists = 0;
- }
+ if (do_info) {
+ /* both info and status */
+ status->info_ok = lv_info_with_seg_status(cmd, lv, lv_seg, use_layer, status, 1, 1);
+ /* for inactive thin-pools reset lv info struct */
+ if (use_layer && status->info_ok &&
+ !lv_info(cmd, lv, 0, NULL, 0, 0))
+ memset(&status->info, 0, sizeof(status->info));
+ } else
+ /* status only */
+ status->info_ok = lv_status(cmd, lv_seg, use_layer, &status->seg_status);
+ } else if (do_info)
+ /* info only */
+ status->info_ok = lv_info(cmd, lv, use_layer, &status->info, 1, 1);
+
+ return 1;
}
static int _do_lvs_with_info_and_status_single(struct cmd_context *cmd,
- struct logical_volume *lv,
+ const struct logical_volume *lv,
int do_info, int do_status,
- void *handle)
+ struct processing_handle *handle)
{
- struct lvinfo lvinfo;
- struct lv_seg_status lv_seg_status;
+ struct selection_handle *sh = handle->selection_handle;
+ struct lv_with_info_and_seg_status status = {
+ .seg_status.type = SEG_STATUS_NONE
+ };
int r = ECMD_FAILED;
- _do_info_and_status(cmd, lv, &lvinfo, NULL, &lv_seg_status, do_info, do_status);
- if (!report_object(handle, lv->vg, lv, NULL, NULL, NULL,
- do_info ? &lvinfo : NULL,
- do_status ? &lv_seg_status : NULL,
- NULL))
+ if (!_do_info_and_status(cmd, lv, NULL, &status, do_info, do_status))
+ goto_out;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ lv->vg, lv, NULL, NULL, NULL, &status, NULL))
goto out;
r = ECMD_PROCESSED;
out:
- if (lv_seg_status.status)
- dm_pool_free(lv_seg_status.mem, lv_seg_status.status);
+ if (status.seg_status.mem)
+ dm_pool_destroy(status.seg_status.mem);
+
return r;
}
static int _lvs_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
return _do_lvs_with_info_and_status_single(cmd, lv, 0, 0, handle);
}
static int _lvs_with_info_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
return _do_lvs_with_info_and_status_single(cmd, lv, 1, 0, handle);
}
static int _lvs_with_status_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
return _do_lvs_with_info_and_status_single(cmd, lv, 0, 1, handle);
}
static int _lvs_with_info_and_status_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
return _do_lvs_with_info_and_status_single(cmd, lv, 1, 1, handle);
}
static int _do_segs_with_info_and_status_single(struct cmd_context *cmd,
- struct lv_segment *seg,
+ const struct lv_segment *seg,
int do_info, int do_status,
- void *handle)
+ struct processing_handle *handle)
{
- struct lvinfo lvinfo;
- struct lv_seg_status lv_seg_status;
+ struct selection_handle *sh = handle->selection_handle;
+ struct lv_with_info_and_seg_status status = {
+ .seg_status.type = SEG_STATUS_NONE
+ };
int r = ECMD_FAILED;
- _do_info_and_status(cmd, seg->lv, &lvinfo, seg, &lv_seg_status, do_info, do_status);
- if (!report_object(handle, seg->lv->vg, seg->lv, NULL, seg, NULL,
- do_info ? &lvinfo : NULL,
- do_status ? &lv_seg_status : NULL,
- NULL))
- goto out;
+ if (!_do_info_and_status(cmd, seg->lv, seg, &status, do_info, do_status))
+ goto_out;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ seg->lv->vg, seg->lv, NULL, seg, NULL, &status, NULL))
+ goto_out;
r = ECMD_PROCESSED;
out:
- if (lv_seg_status.status)
- dm_pool_free(lv_seg_status.mem, lv_seg_status.status);
+ if (status.seg_status.mem)
+ dm_pool_destroy(status.seg_status.mem);
+
return r;
}
static int _segs_single(struct cmd_context *cmd, struct lv_segment *seg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_segs_with_info_and_status_single(cmd, seg, 0, 0, handle);
}
static int _segs_with_info_single(struct cmd_context *cmd, struct lv_segment *seg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_segs_with_info_and_status_single(cmd, seg, 1, 0, handle);
}
static int _segs_with_status_single(struct cmd_context *cmd, struct lv_segment *seg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_segs_with_info_and_status_single(cmd, seg, 0, 1, handle);
}
static int _segs_with_info_and_status_single(struct cmd_context *cmd, struct lv_segment *seg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_segs_with_info_and_status_single(cmd, seg, 1, 1, handle);
}
static int _lvsegs_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
if (!arg_count(cmd, all_ARG) && !lv_is_visible(lv))
return ECMD_PROCESSED;
@@ -187,7 +197,7 @@ static int _lvsegs_single(struct cmd_context *cmd, struct logical_volume *lv,
}
static int _lvsegs_with_info_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
if (!arg_count(cmd, all_ARG) && !lv_is_visible(lv))
return ECMD_PROCESSED;
@@ -196,7 +206,7 @@ static int _lvsegs_with_info_single(struct cmd_context *cmd, struct logical_volu
}
static int _lvsegs_with_status_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
if (!arg_count(cmd, all_ARG) && !lv_is_visible(lv))
return ECMD_PROCESSED;
@@ -205,7 +215,7 @@ static int _lvsegs_with_status_single(struct cmd_context *cmd, struct logical_vo
}
static int _lvsegs_with_info_and_status_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle)
+ struct processing_handle *handle)
{
if (!arg_count(cmd, all_ARG) && !lv_is_visible(lv))
return ECMD_PROCESSED;
@@ -218,13 +228,11 @@ static int _do_pvsegs_sub_single(struct cmd_context *cmd,
struct pv_segment *pvseg,
int do_info,
int do_status,
- void *handle)
+ struct processing_handle *handle)
{
+ struct selection_handle *sh = handle->selection_handle;
int ret = ECMD_PROCESSED;
struct lv_segment *seg = pvseg->lvseg;
- struct lvinfo lvinfo = { .exists = 0 };
- struct lv_seg_status lv_seg_status = { .type = SEG_STATUS_NONE,
- .status = NULL };
struct segment_type _freeseg_type = {
.name = "free",
@@ -259,26 +267,33 @@ static int _do_pvsegs_sub_single(struct cmd_context *cmd,
.origin_list = DM_LIST_HEAD_INIT(_free_lv_segment.origin_list),
};
- if (seg)
- _do_info_and_status(cmd, seg->lv, &lvinfo, seg, &lv_seg_status, do_info, do_status);
+ struct lv_with_info_and_seg_status status = {
+ .seg_status.type = SEG_STATUS_NONE,
+ .lv = &_free_logical_volume
+ };
- if (!report_object(handle, vg, seg ? seg->lv : &_free_logical_volume, pvseg->pv,
- seg ? : &_free_lv_segment, pvseg, &lvinfo, &lv_seg_status,
- pv_label(pvseg->pv))) {
+ if (seg && !_do_info_and_status(cmd, seg->lv, seg, &status, do_info, do_status))
+ goto_out;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ vg, seg ? seg->lv : &_free_logical_volume,
+ pvseg->pv, seg ? : &_free_lv_segment, pvseg,
+ &status, pv_label(pvseg->pv))) {
ret = ECMD_FAILED;
goto_out;
}
out:
- if (seg && lv_seg_status.status)
- dm_pool_free(lv_seg_status.mem, lv_seg_status.status);
+ if (status.seg_status.mem)
+ dm_pool_destroy(status.seg_status.mem);
+
return ret;
}
static int _pvsegs_sub_single(struct cmd_context *cmd,
struct volume_group *vg,
struct pv_segment *pvseg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_pvsegs_sub_single(cmd, vg, pvseg, 0, 0, handle);
}
@@ -286,7 +301,7 @@ static int _pvsegs_sub_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_info_sub_single(struct cmd_context *cmd,
struct volume_group *vg,
struct pv_segment *pvseg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_pvsegs_sub_single(cmd, vg, pvseg, 1, 0, handle);
}
@@ -294,7 +309,7 @@ static int _pvsegs_with_lv_info_sub_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_status_sub_single(struct cmd_context *cmd,
struct volume_group *vg,
struct pv_segment *pvseg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_pvsegs_sub_single(cmd, vg, pvseg, 0, 1, handle);
}
@@ -302,7 +317,7 @@ static int _pvsegs_with_lv_status_sub_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_info_and_status_sub_single(struct cmd_context *cmd,
struct volume_group *vg,
struct pv_segment *pvseg,
- void *handle)
+ struct processing_handle *handle)
{
return _do_pvsegs_sub_single(cmd, vg, pvseg, 1, 1, handle);
}
@@ -310,7 +325,7 @@ static int _pvsegs_with_lv_info_and_status_sub_single(struct cmd_context *cmd,
static int _pvsegs_single(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle)
+ struct processing_handle *handle)
{
return process_each_segment_in_pv(cmd, vg, pv, handle, _pvsegs_sub_single);
}
@@ -318,7 +333,7 @@ static int _pvsegs_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_info_single(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle)
+ struct processing_handle *handle)
{
return process_each_segment_in_pv(cmd, vg, pv, handle, _pvsegs_with_lv_info_sub_single);
}
@@ -326,7 +341,7 @@ static int _pvsegs_with_lv_info_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_status_single(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle)
+ struct processing_handle *handle)
{
return process_each_segment_in_pv(cmd, vg, pv, handle, _pvsegs_with_lv_status_sub_single);
}
@@ -334,24 +349,31 @@ static int _pvsegs_with_lv_status_single(struct cmd_context *cmd,
static int _pvsegs_with_lv_info_and_status_single(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle)
+ struct processing_handle *handle)
{
return process_each_segment_in_pv(cmd, vg, pv, handle, _pvsegs_with_lv_info_and_status_sub_single);
}
static int _pvs_single(struct cmd_context *cmd, struct volume_group *vg,
- struct physical_volume *pv, void *handle)
+ struct physical_volume *pv,
+ struct processing_handle *handle)
{
- if (!report_object(handle, vg, NULL, pv, NULL, NULL, NULL, NULL, NULL))
+ struct selection_handle *sh = handle->selection_handle;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ vg, NULL, pv, NULL, NULL, NULL, NULL))
return_ECMD_FAILED;
return ECMD_PROCESSED;
}
static int _label_single(struct cmd_context *cmd, struct label *label,
- void *handle)
+ struct processing_handle *handle)
{
- if (!report_object(handle, NULL, NULL, NULL, NULL, NULL, NULL, NULL, label))
+ struct selection_handle *sh = handle->selection_handle;
+
+ if (!report_object(sh ? : handle->custom_handle, sh != NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, label))
return_ECMD_FAILED;
return ECMD_PROCESSED;
@@ -359,45 +381,242 @@ static int _label_single(struct cmd_context *cmd, struct label *label,
static int _pvs_in_vg(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle)
+ struct processing_handle *handle)
{
- int skip;
+ return process_each_pv_in_vg(cmd, vg, handle, &_pvs_single);
+}
- if (ignore_vg(vg, vg_name, 0, &skip))
- return_ECMD_FAILED;
+static int _pvsegs_in_vg(struct cmd_context *cmd, const char *vg_name,
+ struct volume_group *vg,
+ struct processing_handle *handle)
+{
+ return process_each_pv_in_vg(cmd, vg, handle, &_pvsegs_single);
+}
- if (skip)
- return ECMD_PROCESSED;
+static int _get_final_report_type(int args_are_pvs,
+ report_type_t report_type,
+ int *lv_info_needed,
+ int *lv_segment_status_needed,
+ report_type_t *final_report_type)
+{
+ /* Do we need to acquire LV device info in addition? */
+ *lv_info_needed = (report_type & (LVSINFO | LVSINFOSTATUS)) ? 1 : 0;
- return process_each_pv_in_vg(cmd, vg, handle, &_pvs_single);
+ /* Do we need to acquire LV device status in addition? */
+ *lv_segment_status_needed = (report_type & (SEGSSTATUS | LVSSTATUS | LVSINFOSTATUS)) ? 1 : 0;
+
+ /* Ensure options selected are compatible */
+ if (report_type & (SEGS | SEGSSTATUS))
+ report_type |= LVS;
+ if (report_type & PVSEGS)
+ report_type |= PVS;
+ if ((report_type & (LVS | LVSINFO | LVSSTATUS | LVSINFOSTATUS)) &&
+ (report_type & (PVS | LABEL)) && !args_are_pvs) {
+ log_error("Can't report LV and PV fields at the same time");
+ return 0;
+ }
+
+ /* Change report type if fields specified makes this necessary */
+ if ((report_type & PVSEGS) ||
+ ((report_type & (PVS | LABEL)) && (report_type & (LVS | LVSINFO | LVSSTATUS | LVSINFOSTATUS))))
+ report_type = PVSEGS;
+ else if ((report_type & PVS) ||
+ ((report_type & LABEL) && (report_type & VGS)))
+ report_type = PVS;
+ else if (report_type & (SEGS | SEGSSTATUS))
+ report_type = SEGS;
+ else if (report_type & (LVS | LVSINFO | LVSSTATUS | LVSINFOSTATUS))
+ report_type = LVS;
+
+ *final_report_type = report_type;
+ return 1;
}
-static int _pvsegs_in_vg(struct cmd_context *cmd, const char *vg_name,
+int report_for_selection(struct cmd_context *cmd,
+ struct selection_handle *sh,
+ struct physical_volume *pv,
struct volume_group *vg,
- void *handle)
+ struct logical_volume *lv)
{
- int skip;
+ static const char *incorrect_report_type_msg = "report_for_selection: incorrect report type";
+ int args_are_pvs = sh->orig_report_type == PVS;
+ int do_lv_info, do_lv_seg_status;
+ struct processing_handle *handle;
+ int r = 0;
- if (ignore_vg(vg, vg_name, 0, &skip))
- return_ECMD_FAILED;
+ if (!_get_final_report_type(args_are_pvs,
+ sh->orig_report_type | sh->report_type,
+ &do_lv_info,
+ &do_lv_seg_status,
+ &sh->report_type))
+ return_0;
- if (skip)
- return ECMD_PROCESSED;
+ if (!(handle = init_processing_handle(cmd)))
+ return_0;
- return process_each_pv_in_vg(cmd, vg, handle, &_pvsegs_single);
+ /*
+ * We're already reporting for select so override
+ * internal_report_for_select to 0 as we can call
+ * process_each_* functions again and we could
+ * end up in an infinite loop if we didn't stop
+ * internal reporting for select right here.
+ *
+ * So the overall call trace from top to bottom looks like this:
+ *
+ * process_each_* (top-level one, using processing_handle with internal reporting enabled and selection_handle) ->
+ * select_match_*(processing_handle with selection_handle) ->
+ * report for selection ->
+ * (creating new processing_handle here with internal reporting disabled!!!)
+ * reporting_fn OR process_each_* (using *new* processing_handle with original selection_handle)
+ *
+ * The selection_handle is still reused so we can track
+ * whether any of the items the top-level one is composed
+ * of are still selected or not unerneath. Do not destroy
+ * this selection handle - it needs to be passed to upper
+ * layers to check the overall selection status.
+ */
+ handle->internal_report_for_select = 0;
+ handle->selection_handle = sh;
+
+ /*
+ * Remember:
+ * sh->orig_report_type is the original report type requested (what are we selecting? PV/VG/LV?)
+ * sh->report_type is the report type actually used (it counts with all types of fields used in selection criteria)
+ */
+ switch (sh->orig_report_type) {
+ case LVS:
+ switch (sh->report_type) {
+ case LVS:
+ r = _do_lvs_with_info_and_status_single(vg->cmd, lv, do_lv_info, do_lv_seg_status, handle);
+ break;
+ case SEGS:
+ r = process_each_segment_in_lv(vg->cmd, lv, handle,
+ do_lv_info && !do_lv_seg_status ? &_segs_with_info_single :
+ !do_lv_info && do_lv_seg_status ? &_segs_with_status_single :
+ do_lv_info && do_lv_seg_status ? &_segs_with_info_and_status_single :
+ &_segs_single);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "%s for LVS", incorrect_report_type_msg);
+ break;
+ }
+ break;
+ case VGS:
+ switch (sh->report_type) {
+ case VGS:
+ r = _vgs_single(vg->cmd, vg->name, vg, handle);
+ break;
+ case LVS:
+ r = process_each_lv_in_vg(vg->cmd, vg, NULL, NULL, 0, handle,
+ do_lv_info && !do_lv_seg_status ? &_lvs_with_info_single :
+ !do_lv_info && do_lv_seg_status ? &_lvs_with_status_single :
+ do_lv_info && do_lv_seg_status ? &_lvs_with_info_and_status_single :
+ &_lvs_single);
+ break;
+ case SEGS:
+ r = process_each_lv_in_vg(vg->cmd, vg, NULL, NULL, 0, handle,
+ do_lv_info && !do_lv_seg_status ? &_lvsegs_with_info_single :
+ !do_lv_info && do_lv_seg_status ? &_lvsegs_with_status_single :
+ do_lv_info && do_lv_seg_status ? &_lvsegs_with_info_and_status_single :
+ &_lvsegs_single);
+ break;
+ case PVS:
+ r = process_each_pv_in_vg(vg->cmd, vg, handle, &_pvs_single);
+ break;
+ case PVSEGS:
+ r = process_each_pv_in_vg(vg->cmd, vg, handle,
+ do_lv_info && !do_lv_seg_status ? &_pvsegs_with_lv_info_single :
+ !do_lv_info && do_lv_seg_status ? &_pvsegs_with_lv_status_single :
+ do_lv_info && do_lv_seg_status ? &_pvsegs_with_lv_info_and_status_single :
+ &_pvsegs_single);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "%s for VGS", incorrect_report_type_msg);
+ break;
+ }
+ break;
+ case PVS:
+ switch (sh->report_type) {
+ case PVS:
+ r = _pvs_single(vg->cmd, vg, pv, handle);
+ break;
+ case PVSEGS:
+ r = process_each_segment_in_pv(vg->cmd, vg, pv, handle,
+ do_lv_info && !do_lv_seg_status ? &_pvsegs_with_lv_info_sub_single :
+ !do_lv_info && do_lv_seg_status ? &_pvsegs_with_lv_status_sub_single :
+ do_lv_info && do_lv_seg_status ? &_pvsegs_with_lv_info_and_status_sub_single :
+ &_pvsegs_sub_single);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "%s for PVS", incorrect_report_type_msg);
+ break;
+ }
+ break;
+ default:
+ log_error(INTERNAL_ERROR "%s", incorrect_report_type_msg);
+ break;
+ }
+
+ /*
+ * Keep the selection handle provided from the caller -
+ * do not destroy it - the caller will still use it to
+ * pass the result through it to layers above.
+ */
+ handle->selection_handle = NULL;
+ destroy_processing_handle(cmd, handle);
+ return r;
+}
+
+static void _check_pv_list(struct cmd_context *cmd, int argc, char **argv,
+ report_type_t *report_type, unsigned *args_are_pvs)
+{
+ unsigned i;
+ int rescan_done = 0;
+
+ *args_are_pvs = (*report_type == PVS ||
+ *report_type == LABEL ||
+ *report_type == PVSEGS) ? 1 : 0;
+
+ if (*args_are_pvs && argc) {
+ for (i = 0; i < argc; i++) {
+ if (!rescan_done && !dev_cache_get(argv[i], cmd->full_filter)) {
+ cmd->filter->wipe(cmd->filter);
+ /* FIXME scan only one device */
+ lvmcache_label_scan(cmd, 0);
+ rescan_done = 1;
+ }
+ if (*argv[i] == '@') {
+ /*
+ * Tags are metadata related, not label
+ * related, change report type accordingly!
+ */
+ if (*report_type == LABEL)
+ *report_type = PVS;
+ /*
+ * If we changed the report_type and we did rescan,
+ * no need to iterate over dev list further - nothing
+ * else would change.
+ */
+ if (rescan_done)
+ break;
+ }
+ }
+ }
}
static int _report(struct cmd_context *cmd, int argc, char **argv,
report_type_t report_type)
{
void *report_handle;
+ struct processing_handle handle = {0};
const char *opts;
char *str;
const char *keys = NULL, *options = NULL, *selection = NULL, *separator;
int r = ECMD_PROCESSED;
int aligned, buffered, headings, field_prefixes, quoted;
int columns_as_rows;
- unsigned args_are_pvs, lv_info_needed, lv_segment_status_needed;
+ unsigned args_are_pvs;
+ int lv_info_needed, lv_segment_status_needed;
int lock_global = 0;
aligned = find_config_tree_bool(cmd, report_aligned_CFG, NULL);
@@ -408,16 +627,16 @@ static int _report(struct cmd_context *cmd, int argc, char **argv,
quoted = find_config_tree_bool(cmd, report_quoted_CFG, NULL);
columns_as_rows = find_config_tree_bool(cmd, report_colums_as_rows_CFG, NULL);
- args_are_pvs = (report_type == PVS ||
- report_type == LABEL ||
-
- report_type == PVSEGS) ? 1 : 0;
-
/*
- * FIXME Trigger scans based on unrecognised listed devices instead.
+ * Include foreign VGs that contain active LVs.
+ * That shouldn't happen in general, but if it does by some
+ * mistake, then we want to display those VGs and allow the
+ * LVs to be deactivated.
*/
- if (args_are_pvs && argc)
- cmd->filter->wipe(cmd->filter);
+ cmd->include_active_foreign_vgs = 1;
+
+ /* Check PV specifics and do extra changes/actions if needed. */
+ _check_pv_list(cmd, argc, argv, &report_type, &args_are_pvs);
switch (report_type) {
case DEVTYPES:
@@ -517,36 +736,14 @@ static int _report(struct cmd_context *cmd, int argc, char **argv,
columns_as_rows, selection)))
return_ECMD_FAILED;
- /* Do we need to acquire LV device info in addition? */
- lv_info_needed = (report_type & LVSINFO) ? 1 : 0;
-
- /* Do we need to acquire LV device status in addition? */
- lv_segment_status_needed = (report_type & (SEGSSTATUS | LVSSTATUS)) ? 1 : 0;
-
- /* Ensure options selected are compatible */
- if (report_type & (SEGS | SEGSSTATUS))
- report_type |= LVS;
- if (report_type & PVSEGS)
- report_type |= PVS;
- if ((report_type & (LVS | LVSINFO | LVSSTATUS)) && (report_type & (PVS | LABEL)) && !args_are_pvs) {
- log_error("Can't report LV and PV fields at the same time");
+ if (!_get_final_report_type(args_are_pvs,
+ report_type, &lv_info_needed,
+ &lv_segment_status_needed,
+ &report_type)) {
dm_report_free(report_handle);
return ECMD_FAILED;
}
- /* Change report type if fields specified makes this necessary */
- if ((report_type & PVSEGS) ||
- ((report_type & (PVS | LABEL)) && (report_type & (LVS | LVSINFO | LVSSTATUS))))
- report_type = PVSEGS;
- else if ((report_type & LABEL) && (report_type & VGS))
- report_type = PVS;
- else if (report_type & PVS)
- report_type = PVS;
- else if (report_type & (SEGS | SEGSSTATUS))
- report_type = SEGS;
- else if (report_type & (LVS | LVSINFO | LVSSTATUS))
- report_type = LVS;
-
/*
* We lock VG_GLOBAL to enable use of metadata cache.
* This can pause alongide pvscan or vgscan process for a while.
@@ -561,16 +758,21 @@ static int _report(struct cmd_context *cmd, int argc, char **argv,
}
}
+ handle.internal_report_for_select = 0;
+ handle.custom_handle = report_handle;
+
switch (report_type) {
case DEVTYPES:
- r = _process_each_devtype(cmd, argc, report_handle);
+ r = _process_each_devtype(cmd, argc, &handle);
break;
case LVSINFO:
/* fall through */
case LVSSTATUS:
/* fall through */
+ case LVSINFOSTATUS:
+ /* fall through */
case LVS:
- r = process_each_lv(cmd, argc, argv, 0, report_handle,
+ r = process_each_lv(cmd, argc, argv, 0, &handle,
lv_info_needed && !lv_segment_status_needed ? &_lvs_with_info_single :
!lv_info_needed && lv_segment_status_needed ? &_lvs_with_status_single :
lv_info_needed && lv_segment_status_needed ? &_lvs_with_info_and_status_single :
@@ -578,24 +780,24 @@ static int _report(struct cmd_context *cmd, int argc, char **argv,
break;
case VGS:
r = process_each_vg(cmd, argc, argv, 0,
- report_handle, &_vgs_single);
+ &handle, &_vgs_single);
break;
case LABEL:
r = process_each_label(cmd, argc, argv,
- report_handle, &_label_single);
+ &handle, &_label_single);
break;
case PVS:
if (args_are_pvs)
r = process_each_pv(cmd, argc, argv, NULL, 0,
- report_handle, &_pvs_single);
+ &handle, &_pvs_single);
else
r = process_each_vg(cmd, argc, argv, 0,
- report_handle, &_pvs_in_vg);
+ &handle, &_pvs_in_vg);
break;
case SEGSSTATUS:
/* fall through */
case SEGS:
- r = process_each_lv(cmd, argc, argv, 0, report_handle,
+ r = process_each_lv(cmd, argc, argv, 0, &handle,
lv_info_needed && !lv_segment_status_needed ? &_lvsegs_with_info_single :
!lv_info_needed && lv_segment_status_needed ? &_lvsegs_with_status_single :
lv_info_needed && lv_segment_status_needed ? &_lvsegs_with_info_and_status_single :
@@ -604,14 +806,14 @@ static int _report(struct cmd_context *cmd, int argc, char **argv,
case PVSEGS:
if (args_are_pvs)
r = process_each_pv(cmd, argc, argv, NULL, 0,
- report_handle,
+ &handle,
lv_info_needed && !lv_segment_status_needed ? &_pvsegs_with_lv_info_single :
!lv_info_needed && lv_segment_status_needed ? &_pvsegs_with_lv_status_single :
lv_info_needed && lv_segment_status_needed ? &_pvsegs_with_lv_info_and_status_single :
&_pvsegs_single);
else
r = process_each_vg(cmd, argc, argv, 0,
- report_handle, &_pvsegs_in_vg);
+ &handle, &_pvsegs_in_vg);
break;
}
diff --git a/tools/tool.h b/tools/tool.h
new file mode 100644
index 000000000..04c51eeaa
--- /dev/null
+++ b/tools/tool.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Most source files should include this file or lib.h or dmlib.h */
+
+#ifndef _LVM_TOOL_H
+#define _LVM_TOOL_H
+
+#define _GNU_SOURCE
+#define _FILE_OFFSET_BITS 64
+
+#include "configure.h"
+
+#include <unistd.h>
+
+#include "libdevmapper.h"
+#include "util.h"
+
+#endif /* _LVM_TOOL_H */
diff --git a/tools/toollib.c b/tools/toollib.c
index a6e9a14fb..b4c9da0c5 100644
--- a/tools/toollib.c
+++ b/tools/toollib.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -14,9 +14,17 @@
*/
#include "tools.h"
+
#include <sys/stat.h>
#include <signal.h>
#include <sys/wait.h>
+#include <sys/utsname.h>
+
+struct device_id_list {
+ struct dm_list list;
+ struct device *dev;
+ char pvid[ID_LEN + 1];
+};
const char *command_name(struct cmd_context *cmd)
{
@@ -49,7 +57,10 @@ int become_daemon(struct cmd_context *cmd, int skip_lvm)
sigaction(SIGCHLD, &act, NULL);
if (!skip_lvm)
- sync_local_dev_names(cmd); /* Flush ops and reset dm cookie */
+ if (!sync_local_dev_names(cmd)) { /* Flush ops and reset dm cookie */
+ log_error("Failed to sync local devices before forking.");
+ return -1;
+ }
if ((pid = fork()) == -1) {
log_error("fork failed: %s", strerror(errno));
@@ -90,6 +101,8 @@ int become_daemon(struct cmd_context *cmd, int skip_lvm)
strncpy(*cmd->argv, "(lvm2)", strlen(*cmd->argv));
+ lvmetad_disconnect();
+
if (!skip_lvm) {
reset_locking();
lvmcache_destroy(cmd, 1, 1);
@@ -168,8 +181,10 @@ const char *skip_dev_dir(struct cmd_context *cmd, const char *vg_name,
* and the command option has been used to ignore clustered vgs.
*
* Case c covers the other errors returned when reading the VG.
+ * If *skip is 1, it's OK for the caller to read the list of PVs in the VG.
*/
-int ignore_vg(struct volume_group *vg, const char *vg_name, int allow_inconsistent, int *skip)
+static int _ignore_vg(struct volume_group *vg, const char *vg_name,
+ struct dm_list *arg_vgnames, int allow_inconsistent, int *skip)
{
uint32_t read_error = vg_read_error(vg);
*skip = 0;
@@ -183,7 +198,58 @@ int ignore_vg(struct volume_group *vg, const char *vg_name, int allow_inconsiste
*skip = 1;
}
+ /*
+ * Commands that operate on "all vgs" shouldn't be bothered by
+ * skipping a foreign VG, and the command shouldn't fail when
+ * one is skipped. But, if the command explicitly asked to
+ * operate on a foreign VG and it's skipped, then the command
+ * would expect to fail.
+ */
+ if (read_error & FAILED_SYSTEMID) {
+ if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) {
+ log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.",
+ vg->name, vg->system_id, vg->cmd->system_id ? "" : "unknown ",
+ vg->cmd->system_id ? " " : "", vg->cmd->system_id ? vg->cmd->system_id : "");
+ return 1;
+ } else {
+ read_error &= ~FAILED_SYSTEMID; /* Check for other errors */
+ log_verbose("Skipping foreign volume group %s", vg_name);
+ *skip = 1;
+ }
+ }
+
+ /*
+ * Accessing a lockd VG when lvmlockd is not used is similar
+ * to accessing a foreign VG.
+ * This is also the point where a command fails if it failed
+ * to acquire the necessary lock from lvmlockd.
+ * The two cases are distinguished by FAILED_LOCK_TYPE (the
+ * VG lock_type requires lvmlockd), and FAILED_LOCK_MODE (the
+ * command failed to acquire the necessary lock.)
+ */
+ if (read_error & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) {
+ if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) {
+ if (read_error & FAILED_LOCK_TYPE)
+ log_error("Cannot access VG %s with lock type %s that requires lvmlockd.",
+ vg->name, vg->lock_type);
+ /* For FAILED_LOCK_MODE, the error is printed in vg_read. */
+ return 1;
+ } else {
+ read_error &= ~FAILED_LOCK_TYPE; /* Check for other errors */
+ read_error &= ~FAILED_LOCK_MODE;
+ log_verbose("Skipping volume group %s", vg_name);
+ *skip = 1;
+ }
+ }
+
+ if (read_error == FAILED_CLUSTERED) {
+ *skip = 1;
+ stack; /* Error already logged */
+ return 1;
+ }
+
if (read_error != SUCCESS) {
+ *skip = 0;
log_error("Cannot process volume group %s", vg_name);
return 1;
}
@@ -192,15 +258,38 @@ int ignore_vg(struct volume_group *vg, const char *vg_name, int allow_inconsiste
}
/*
+ * This functiona updates the "selected" arg only if last item processed
+ * is selected so this implements the "whole structure is selected if
+ * at least one of its items is selected".
+ */
+static void _update_selection_result(struct processing_handle *handle, int *selected)
+{
+ if (!handle || !handle->selection_handle)
+ return;
+
+ if (handle->selection_handle->selected)
+ *selected = 1;
+}
+
+static void _set_final_selection_result(struct processing_handle *handle, int selected)
+{
+ if (!handle || !handle->selection_handle)
+ return;
+
+ handle->selection_handle->selected = selected;
+}
+
+/*
* Metadata iteration functions
*/
int process_each_segment_in_pv(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle,
+ struct processing_handle *handle,
process_single_pvseg_fn_t process_single_pvseg)
{
struct pv_segment *pvseg;
+ int whole_selected = 0;
int ret_max = ECMD_PROCESSED;
int ret;
struct pv_segment _free_pv_segment = { .pv = pv };
@@ -217,6 +306,7 @@ int process_each_segment_in_pv(struct cmd_context *cmd,
return_ECMD_FAILED;
ret = process_single_pvseg(cmd, vg, pvseg, handle);
+ _update_selection_result(handle, &whole_selected);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
@@ -224,15 +314,18 @@ int process_each_segment_in_pv(struct cmd_context *cmd,
}
}
+ /* the PV is selected if at least one PV segment is selected */
+ _set_final_selection_result(handle, whole_selected);
return ret_max;
}
int process_each_segment_in_lv(struct cmd_context *cmd,
struct logical_volume *lv,
- void *handle,
+ struct processing_handle *handle,
process_single_seg_fn_t process_single_seg)
{
struct lv_segment *seg;
+ int whole_selected = 0;
int ret_max = ECMD_PROCESSED;
int ret;
@@ -241,12 +334,15 @@ int process_each_segment_in_lv(struct cmd_context *cmd,
return_ECMD_FAILED;
ret = process_single_seg(cmd, seg, handle);
+ _update_selection_result(handle, &whole_selected);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
}
+ /* the LV is selected if at least one LV segment is selected */
+ _set_final_selection_result(handle, whole_selected);
return ret_max;
}
@@ -609,6 +705,7 @@ int vgcreate_params_set_defaults(struct cmd_context *cmd,
{
int64_t extent_size;
+ /* Only vgsplit sets vg */
if (vg) {
vp_def->vg_name = NULL;
vp_def->extent_size = vg->extent_size;
@@ -617,6 +714,7 @@ int vgcreate_params_set_defaults(struct cmd_context *cmd,
vp_def->alloc = vg->alloc;
vp_def->clustered = vg_is_clustered(vg);
vp_def->vgmetadatacopies = vg->mda_copies;
+ vp_def->system_id = vg->system_id; /* No need to clone this */
} else {
vp_def->vg_name = NULL;
extent_size = find_config_tree_int64(cmd,
@@ -631,6 +729,7 @@ int vgcreate_params_set_defaults(struct cmd_context *cmd,
vp_def->alloc = DEFAULT_ALLOC_POLICY;
vp_def->clustered = DEFAULT_CLUSTERED;
vp_def->vgmetadatacopies = DEFAULT_VGMETADATACOPIES;
+ vp_def->system_id = cmd->system_id;
}
return 1;
@@ -646,6 +745,13 @@ int vgcreate_params_set_from_args(struct cmd_context *cmd,
struct vgcreate_params *vp_new,
struct vgcreate_params *vp_def)
{
+ const char *system_id_arg_str;
+ const char *lock_type = NULL;
+ int locking_type;
+ int use_lvmlockd;
+ int use_clvmd;
+ lock_type_t lock_type_num;
+
vp_new->vg_name = skip_dev_dir(cmd, vp_def->vg_name, NULL);
vp_new->max_lv = arg_uint_value(cmd, maxlogicalvolumes_ARG,
vp_def->max_lv);
@@ -657,12 +763,6 @@ int vgcreate_params_set_from_args(struct cmd_context *cmd,
vp_new->extent_size =
arg_uint_value(cmd, physicalextentsize_ARG, vp_def->extent_size);
- if (arg_count(cmd, clustered_ARG))
- vp_new->clustered = arg_int_value(cmd, clustered_ARG, vp_def->clustered);
- else
- /* Default depends on current locking type */
- vp_new->clustered = locking_is_clustered();
-
if (arg_sign_value(cmd, physicalextentsize_ARG, SIGN_NONE) == SIGN_MINUS) {
log_error(_pe_size_may_not_be_negative_msg);
return 0;
@@ -684,16 +784,212 @@ int vgcreate_params_set_from_args(struct cmd_context *cmd,
return 0;
}
- if (arg_count(cmd, metadatacopies_ARG)) {
+ if (arg_count(cmd, metadatacopies_ARG))
vp_new->vgmetadatacopies = arg_int_value(cmd, metadatacopies_ARG,
DEFAULT_VGMETADATACOPIES);
- } else if (arg_count(cmd, vgmetadatacopies_ARG)) {
+ else if (arg_count(cmd, vgmetadatacopies_ARG))
vp_new->vgmetadatacopies = arg_int_value(cmd, vgmetadatacopies_ARG,
DEFAULT_VGMETADATACOPIES);
- } else {
+ else
vp_new->vgmetadatacopies = find_config_tree_int(cmd, metadata_vgmetadatacopies_CFG, NULL);
+
+ if (!(system_id_arg_str = arg_str_value(cmd, systemid_ARG, NULL))) {
+ vp_new->system_id = vp_def->system_id;
+ } else {
+ if (!(vp_new->system_id = system_id_from_string(cmd, system_id_arg_str)))
+ return_0;
+
+ /* FIXME Take local/extra_system_ids into account */
+ if (vp_new->system_id && cmd->system_id &&
+ strcmp(vp_new->system_id, cmd->system_id)) {
+ if (*vp_new->system_id)
+ log_warn("VG with system ID %s might become inaccessible as local system ID is %s",
+ vp_new->system_id, cmd->system_id);
+ else
+ log_warn("WARNING: A VG without a system ID allows unsafe access from other hosts.");
+ }
}
+ if ((system_id_arg_str = arg_str_value(cmd, systemid_ARG, NULL))) {
+ vp_new->system_id = system_id_from_string(cmd, system_id_arg_str);
+ } else {
+ vp_new->system_id = vp_def->system_id;
+ }
+
+ if (system_id_arg_str) {
+ if (!vp_new->system_id || !vp_new->system_id[0])
+ log_warn("WARNING: A VG without a system ID allows unsafe access from other hosts.");
+
+ if (vp_new->system_id && cmd->system_id &&
+ strcmp(vp_new->system_id, cmd->system_id)) {
+ log_warn("VG with system ID %s might become inaccessible as local system ID is %s",
+ vp_new->system_id, cmd->system_id);
+ }
+ }
+
+ /*
+ * Locking: what kind of locking should be used for the
+ * new VG, and is it compatible with current lvm.conf settings.
+ *
+ * The end result is to set vp_new->lock_type to:
+ * none | clvm | dlm | sanlock.
+ *
+ * If 'vgcreate --lock-type <arg>' is set, the answer is given
+ * directly by <arg> which is one of none|clvm|dlm|sanlock.
+ *
+ * 'vgcreate --clustered y' is the way to create clvm VGs.
+ *
+ * 'vgcreate --shared' is the way to create lockd VGs.
+ * lock_type of sanlock or dlm is selected based on
+ * which lock manager is running.
+ *
+ *
+ * 1. Using neither clvmd nor lvmlockd.
+ * ------------------------------------------------
+ * lvm.conf:
+ * global/use_lvmlockd = 0
+ * global/locking_type = 1
+ *
+ * - no locking is enabled
+ * - clvmd is not used
+ * - lvmlockd is not used
+ * - VGs with CLUSTERED set are ignored (requires clvmd)
+ * - VGs with lockd type are ignored (requires lvmlockd)
+ * - vgcreate can create new VGs with lock_type none
+ * - 'vgcreate --clustered y' fails
+ * - 'vgcreate --shared' fails
+ * - 'vgcreate' (neither option) creates a local VG
+ *
+ * 2. Using clvmd.
+ * ------------------------------------------------
+ * lvm.conf:
+ * global/use_lvmlockd = 0
+ * global/locking_type = 3
+ *
+ * - locking through clvmd is enabled (traditional clvm config)
+ * - clvmd is used
+ * - lvmlockd is not used
+ * - VGs with CLUSTERED set can be used
+ * - VGs with lockd type are ignored (requires lvmlockd)
+ * - vgcreate can create new VGs with CLUSTERED status flag
+ * - 'vgcreate --clustered y' works
+ * - 'vgcreate --shared' fails
+ * - 'vgcreate' (neither option) creates a clvm VG
+ *
+ * 3. Using lvmlockd.
+ * ------------------------------------------------
+ * lvm.conf:
+ * global/use_lvmlockd = 1
+ * global/locking_type = 1
+ *
+ * - locking through lvmlockd is enabled
+ * - clvmd is not used
+ * - lvmlockd is used
+ * - VGs with CLUSTERED set are ignored (requires clvmd)
+ * - VGs with lockd type can be used
+ * - vgcreate can create new VGs with lock_type sanlock or dlm
+ * - 'vgcreate --clustered y' fails
+ * - 'vgcreate --shared' works
+ * - 'vgcreate' (neither option) creates a local VG
+ */
+
+ locking_type = find_config_tree_int(cmd, global_locking_type_CFG, NULL);
+ use_lvmlockd = find_config_tree_bool(cmd, global_use_lvmlockd_CFG, NULL);
+ use_clvmd = (locking_type == 3);
+
+ if (arg_is_set(cmd, locktype_ARG)) {
+ if (arg_is_set(cmd, clustered_ARG) || arg_is_set(cmd, shared_ARG)) {
+ log_error("A lock type cannot be specified with --shared or --clustered.");
+ return 0;
+ }
+ lock_type = arg_str_value(cmd, locktype_ARG, "");
+
+ } else if (arg_is_set(cmd, clustered_ARG)) {
+ const char *arg_str = arg_str_value(cmd, clustered_ARG, "");
+ int clustery = strcmp(arg_str, "y") ? 0 : 1;
+
+ if (use_clvmd) {
+ lock_type = clustery ? "clvm" : "none";
+
+ } else if (use_lvmlockd) {
+ log_error("lvmlockd is configured, use --shared with lvmlockd, and --clustered with clvmd.");
+ return 0;
+
+ } else {
+ if (clustery) {
+ log_error("The --clustered option requires clvmd (locking_type=3).");
+ return 0;
+ } else {
+ lock_type = "none";
+ }
+ }
+
+ } else if (arg_is_set(cmd, shared_ARG)) {
+ if (use_lvmlockd) {
+ if (!(lock_type = lockd_running_lock_type(cmd))) {
+ log_error("Failed to detect a running lock manager to select lock type.");
+ return 0;
+ }
+
+ } else if (use_clvmd) {
+ log_error("Use --shared with lvmlockd, and --clustered with clvmd.");
+ return 0;
+
+ } else {
+ log_error("Using a shared lock type requires lvmlockd.");
+ return 0;
+ }
+
+ } else {
+ if (use_clvmd)
+ lock_type = locking_is_clustered() ? "clvm" : "none";
+ else
+ lock_type = "none";
+ }
+
+ /*
+ * Check that the lock_type is recognized, and is being
+ * used with the correct lvm.conf settings.
+ */
+ lock_type_num = get_lock_type_from_string(lock_type);
+
+ switch (lock_type_num) {
+ case LOCK_TYPE_INVALID:
+ log_error("lock_type %s is invalid", lock_type);
+ return 0;
+
+ case LOCK_TYPE_SANLOCK:
+ case LOCK_TYPE_DLM:
+ if (!use_lvmlockd) {
+ log_error("Using a shared lock type requires lvmlockd.");
+ return 0;
+ }
+ break;
+ case LOCK_TYPE_CLVM:
+ if (!use_clvmd) {
+ log_error("Using clvm requires locking_type 3.");
+ return 0;
+ }
+ break;
+ case LOCK_TYPE_NONE:
+ break;
+ };
+
+ /*
+ * The vg is not owned by one host/system_id.
+ * Locking coordinates access from multiple hosts.
+ */
+ if (lock_type_num == LOCK_TYPE_DLM || lock_type_num == LOCK_TYPE_SANLOCK || lock_type_num == LOCK_TYPE_CLVM)
+ vp_new->system_id = NULL;
+
+ vp_new->lock_type = lock_type;
+
+ if (lock_type_num == LOCK_TYPE_CLVM)
+ vp_new->clustered = 1;
+ else
+ vp_new->clustered = 0;
+
+ log_debug("Setting lock_type to %s", vp_new->lock_type);
return 1;
}
@@ -703,6 +999,7 @@ int lv_change_activate(struct cmd_context *cmd, struct logical_volume *lv,
{
int r = 1;
+PFLA("activate=%x", activate);
if (lv_is_cache_pool(lv)) {
if (is_change_activating(activate)) {
log_verbose("Skipping activation of cache pool %s.",
@@ -749,32 +1046,13 @@ int lv_change_activate(struct cmd_context *cmd, struct logical_volume *lv,
if (!lv_active_change(cmd, lv, activate, 0))
return_0;
- if (background_polling() &&
- is_change_activating(activate) &&
- (lv_is_pvmove(lv) || lv_is_converting(lv) || lv_is_merging(lv)))
- lv_spawn_background_polling(cmd, lv);
-
return r;
}
int lv_refresh(struct cmd_context *cmd, struct logical_volume *lv)
{
- if (!cmd->partial_activation && (lv->status & PARTIAL_LV)) {
- log_error("Refusing refresh of partial LV %s."
- " Use '--activationmode partial' to override.",
- lv->name);
- return 0;
- }
-
- if (!suspend_lv(cmd, lv)) {
- log_error("Failed to suspend %s.", lv->name);
- return 0;
- }
-
- if (!resume_lv(cmd, lv)) {
- log_error("Failed to reactivate %s.", lv->name);
- return 0;
- }
+ if (!lv_refresh_suspend_resume(cmd, lv))
+ return_0;
/*
* check if snapshot merge should be polled
@@ -819,22 +1097,23 @@ void lv_spawn_background_polling(struct cmd_context *cmd,
struct logical_volume *lv)
{
const char *pvname;
+ const struct logical_volume *lv_mirr = NULL;
- if (lv_is_pvmove(lv) &&
- (pvname = get_pvmove_pvname_from_lv_mirr(lv))) {
- log_verbose("Spawning background pvmove process for %s.",
- pvname);
- pvmove_poll(cmd, pvname, 1);
- } else if (lv_is_locked(lv) &&
- (pvname = get_pvmove_pvname_from_lv(lv))) {
+ if (lv_is_pvmove(lv))
+ lv_mirr = lv;
+ else if (lv_is_locked(lv))
+ lv_mirr = find_pvmove_lv_in_lv(lv);
+
+ if (lv_mirr &&
+ (pvname = get_pvmove_pvname_from_lv_mirr(lv_mirr))) {
log_verbose("Spawning background pvmove process for %s.",
pvname);
- pvmove_poll(cmd, pvname, 1);
+ pvmove_poll(cmd, pvname, lv_mirr->lvid.s, lv_mirr->vg->name, lv_mirr->name, 1);
}
if (lv_is_converting(lv) || lv_is_merging(lv)) {
log_verbose("Spawning background lvconvert process for %s.",
- lv->name);
+ lv->name);
lvconvert_poll(cmd, lv, 1);
}
}
@@ -845,8 +1124,7 @@ void lv_spawn_background_polling(struct cmd_context *cmd,
* Output arguments:
* pp: structure allocated by caller, fields written / validated here
*/
-int pvcreate_params_validate(struct cmd_context *cmd,
- int argc, char **argv,
+int pvcreate_params_validate(struct cmd_context *cmd, int argc,
struct pvcreate_params *pp)
{
if (!argc) {
@@ -1072,18 +1350,26 @@ static int _validate_stripe_params(struct cmd_context *cmd, uint32_t *stripes,
display_size(cmd, (uint64_t) *stripe_size));
}
- if (*stripes < 1 || *stripes > MAX_STRIPES) {
+ if (*stripes < 1 || *stripes > MAX_AREAS) {
log_error("Number of stripes (%d) must be between %d and %d.",
- *stripes, 1, MAX_STRIPES);
+ *stripes, 1, MAX_AREAS);
return 0;
}
+#if 0
+ if (*stripes > 1 && *stripe_size < STRIPE_SIZE_MIN) {
+ log_error("Invalid stripe size %s.",
+ display_size(cmd, (uint64_t) *stripe_size));
+ return 0;
+ }
+#else
if (*stripes > 1 && (*stripe_size < STRIPE_SIZE_MIN ||
*stripe_size & (*stripe_size - 1))) {
log_error("Invalid stripe size %s.",
display_size(cmd, (uint64_t) *stripe_size));
return 0;
}
+#endif
return 1;
}
@@ -1124,12 +1410,16 @@ int get_stripe_params(struct cmd_context *cmd, uint32_t *stripes, uint32_t *stri
return r;
}
-static int _validate_cachepool_params(struct dm_config_tree *tree)
+static int _validate_cachepool_params(const char *name,
+ const struct dm_config_tree *settings)
{
return 1;
}
-struct dm_config_tree *get_cachepolicy_params(struct cmd_context *cmd)
+int get_cache_params(struct cmd_context *cmd,
+ const char **mode,
+ const char **name,
+ struct dm_config_tree **settings)
{
const char *str;
struct arg_value_group_list *group;
@@ -1137,12 +1427,20 @@ struct dm_config_tree *get_cachepolicy_params(struct cmd_context *cmd)
struct dm_config_node *cn;
int ok = 0;
+ if (mode)
+ *mode = arg_str_value(cmd, cachemode_ARG, NULL);
+
+ if (name)
+ *name = arg_str_value(cmd, cachepolicy_ARG, NULL);
+
+ if (!settings)
+ return 1;
+
dm_list_iterate_items(group, &cmd->arg_value_groups) {
if (!grouped_arg_is_set(group->arg_values, cachesettings_ARG))
continue;
- current = dm_config_create();
- if (!current)
+ if (!(current = dm_config_create()))
goto_out;
if (prev)
current->cascade = prev;
@@ -1157,32 +1455,24 @@ struct dm_config_tree *get_cachepolicy_params(struct cmd_context *cmd)
goto_out;
}
- if (!(result = dm_config_flatten(current)))
- goto_out;
+ if (!current)
+ return 1;
- if (!(cn = dm_config_create_node(result, "policy_settings")))
+ if (!(result = dm_config_flatten(current)))
goto_out;
- cn->child = result->root;
- result->root = cn;
-
- if (arg_count(cmd, cachepolicy_ARG)) {
- if (!(cn = dm_config_create_node(result, "policy")))
- goto_out;
-
- result->root->sib = cn;
- if (!(cn->v = dm_config_create_value(result)))
+ if (result->root) {
+ if (!(cn = dm_config_create_node(result, "policy_settings")))
goto_out;
- cn->v->type = DM_CFG_STRING;
- cn->v->v.str = arg_str_value(cmd, cachepolicy_ARG, NULL);
+ cn->child = result->root;
+ result->root = cn;
}
- if (!_validate_cachepool_params(result))
+ if (!_validate_cachepool_params(*name, result))
goto_out;
ok = 1;
-
out:
if (!ok && result) {
dm_config_destroy(result);
@@ -1193,7 +1483,10 @@ out:
dm_config_destroy(prev);
prev = current;
}
- return result;
+
+ *settings = result;
+
+ return ok;
}
/* FIXME move to lib */
@@ -1238,7 +1531,8 @@ int change_tag(struct cmd_context *cmd, struct volume_group *vg,
return 1;
}
-int process_each_label(struct cmd_context *cmd, int argc, char **argv, void *handle,
+int process_each_label(struct cmd_context *cmd, int argc, char **argv,
+ struct processing_handle *handle,
process_single_label_fn_t process_single_label)
{
struct label *label;
@@ -1428,17 +1722,12 @@ int validate_restricted_lvname_param(struct cmd_context *cmd, const char **vg_na
return -1;
}
-struct vgnameid_list {
- struct dm_list list;
- const char *vg_name;
- const char *vgid;
-};
-
/*
* Extract list of VG names and list of tags from command line arguments.
*/
static int _get_arg_vgnames(struct cmd_context *cmd,
int argc, char **argv,
+ unsigned one_vgname_arg,
struct dm_list *arg_vgnames,
struct dm_list *arg_tags)
{
@@ -1450,18 +1739,26 @@ static int _get_arg_vgnames(struct cmd_context *cmd,
for (; opt < argc; opt++) {
vg_name = argv[opt];
+
if (*vg_name == '@') {
+ if (one_vgname_arg) {
+ log_error("This command does not yet support a tag to identify a Volume Group.");
+ return EINVALID_CMD_LINE;
+ }
+
if (!validate_tag(vg_name + 1)) {
log_error("Skipping invalid tag: %s", vg_name);
if (ret_max < EINVALID_CMD_LINE)
ret_max = EINVALID_CMD_LINE;
continue;
}
+
if (!str_list_add(cmd->mem, arg_tags,
dm_pool_strdup(cmd->mem, vg_name + 1))) {
log_error("strlist allocation failed.");
return ECMD_FAILED;
}
+
continue;
}
@@ -1470,89 +1767,160 @@ static int _get_arg_vgnames(struct cmd_context *cmd,
log_error("Invalid volume group name %s.", vg_name);
if (ret_max < EINVALID_CMD_LINE)
ret_max = EINVALID_CMD_LINE;
+ if (one_vgname_arg)
+ break;
continue;
}
+
if (!str_list_add(cmd->mem, arg_vgnames,
dm_pool_strdup(cmd->mem, vg_name))) {
log_error("strlist allocation failed.");
return ECMD_FAILED;
}
+
+ if (one_vgname_arg)
+ break;
}
return ret_max;
}
-/*
- * FIXME Add arg to include (or not) entries with duplicate vg names?
- *
- * Obtain complete list of VG name/vgid pairs known on the system.
- */
-static int _get_vgnameids_on_system(struct cmd_context *cmd,
- struct dm_list *vgnameids_on_system,
- const char *only_this_vgname, int include_internal)
+struct processing_handle *init_processing_handle(struct cmd_context *cmd)
{
- struct vgnameid_list *vgnl;
- struct dm_list *vgids;
- struct dm_str_list *sl;
- const char *vgid;
+ struct processing_handle *handle;
- if (only_this_vgname) {
- vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl));
- if (!vgnl) {
- log_error("name_id_list allocation failed.");
- return ECMD_FAILED;
- }
+ if (!(handle = dm_pool_zalloc(cmd->mem, sizeof(struct processing_handle)))) {
+ log_error("_init_processing_handle: failed to allocate memory for processing handle");
+ return NULL;
+ }
- vgnl->vg_name = dm_pool_strdup(cmd->mem, only_this_vgname);
- vgnl->vgid = NULL;
+ /*
+ * For any reporting tool, the internal_report_for_select is reset to 0
+ * automatically because the internal reporting/selection is simply not
+ * needed - the reporting/selection is already a part of the code path
+ * used there.
+ *
+ * *The internal report for select is only needed for non-reporting tools!*
+ */
+ handle->internal_report_for_select = arg_is_set(cmd, select_ARG);
- dm_list_add(vgnameids_on_system, &vgnl->list);
- return ECMD_PROCESSED;
+ return handle;
+}
+
+int init_selection_handle(struct cmd_context *cmd, struct processing_handle *handle,
+ report_type_t initial_report_type)
+{
+ struct selection_handle *sh;
+
+ if (!(sh = dm_pool_zalloc(cmd->mem, sizeof(struct selection_handle)))) {
+ log_error("_init_selection_handle: failed to allocate memory for selection handle");
+ return 0;
}
- log_verbose("Finding all volume groups.");
+ sh->report_type = initial_report_type;
+ if (!(sh->selection_rh = report_init_for_selection(cmd, &sh->report_type,
+ arg_str_value(cmd, select_ARG, NULL)))) {
+ dm_pool_free(cmd->mem, sh);
+ return_0;
+ }
- if (!lvmetad_vg_list_to_lvmcache(cmd))
- stack;
+ handle->selection_handle = sh;
+ return 1;
+}
- /*
- * Start with complete vgid list because multiple VGs might have same name.
- */
- vgids = get_vgids(cmd, include_internal);
- if (!vgids || dm_list_empty(vgids)) {
- stack;
- return ECMD_PROCESSED;
+void destroy_processing_handle(struct cmd_context *cmd, struct processing_handle *handle)
+{
+ if (handle) {
+ if (handle->selection_handle && handle->selection_handle->selection_rh)
+ dm_report_free(handle->selection_handle->selection_rh);
+ dm_pool_free(cmd->mem, handle);
}
+}
- /* FIXME get_vgids() should provide these pairings directly */
- dm_list_iterate_items(sl, vgids) {
- if (!(vgid = sl->str))
- continue;
- if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
- log_error("vgnameid_list allocation failed.");
- return ECMD_FAILED;
- }
+int select_match_vg(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, int *selected)
+{
+ struct selection_handle *sh = handle->selection_handle;
- vgnl->vgid = dm_pool_strdup(cmd->mem, vgid);
- vgnl->vg_name = lvmcache_vgname_from_vgid(cmd->mem, vgid);
+ if (!handle->internal_report_for_select) {
+ *selected = 1;
+ return 1;
+ }
+
+ sh->orig_report_type = VGS;
- dm_list_add(vgnameids_on_system, &vgnl->list);
+ if (!report_for_selection(cmd, sh, NULL, vg, NULL)) {
+ log_error("Selection failed for VG %s.", vg->name);
+ return 0;
}
- return ECMD_PROCESSED;
+ sh->orig_report_type = 0;
+ *selected = sh->selected;
+
+ return 1;
+}
+
+int select_match_lv(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, struct logical_volume *lv, int *selected)
+{
+ struct selection_handle *sh = handle->selection_handle;
+
+ if (!handle->internal_report_for_select) {
+ *selected = 1;
+ return 1;
+ }
+
+ sh->orig_report_type = LVS;
+
+ if (!report_for_selection(cmd, sh, NULL, vg, lv)) {
+ log_error("Selection failed for LV %s.", lv->name);
+ return 0;
+ }
+
+ sh->orig_report_type = 0;
+ *selected = sh->selected;
+
+ return 1;
+}
+
+int select_match_pv(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, struct physical_volume *pv, int *selected)
+{
+ struct selection_handle *sh = handle->selection_handle;
+
+ if (!handle->internal_report_for_select) {
+ *selected = 1;
+ return 1;
+ }
+
+ sh->orig_report_type = PVS;
+
+ if (!report_for_selection(cmd, sh, pv, vg, NULL)) {
+ log_error("Selection failed for PV %s.", dev_name(pv->dev));
+ return 0;
+ }
+
+ sh->orig_report_type = 0;
+ *selected = sh->selected;
+
+ return 1;
}
static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
struct dm_list *vgnameids_to_process,
struct dm_list *arg_vgnames,
- struct dm_list *arg_tags, void *handle,
+ struct dm_list *arg_tags,
+ struct processing_handle *handle,
process_single_vg_fn_t process_single_vg)
{
struct volume_group *vg;
struct vgnameid_list *vgnl;
const char *vg_name;
const char *vg_uuid;
+ uint32_t lockd_state = 0;
+ int selected;
+ int whole_selected = 0;
int ret_max = ECMD_PROCESSED;
int ret;
int skip;
@@ -1564,6 +1932,9 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
if (dm_list_empty(arg_vgnames) && dm_list_empty(arg_tags))
process_all = 1;
+ /*
+ * FIXME If one_vgname_arg, only proceed if exactly one VG matches tags or selection.
+ */
dm_list_iterate_items(vgnl, vgnameids_to_process) {
if (sigint_caught())
return_ECMD_FAILED;
@@ -1572,35 +1943,43 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
vg_uuid = vgnl->vgid;
skip = 0;
- vg = vg_read(cmd, vg_name, vg_uuid, flags);
- if (ignore_vg(vg, vg_name, flags & READ_ALLOW_INCONSISTENT, &skip)) {
- stack;
+ if (!lockd_vg(cmd, vg_name, NULL, 0, &lockd_state)) {
ret_max = ECMD_FAILED;
- release_vg(vg);
continue;
}
- if (skip) {
- release_vg(vg);
- continue;
+
+ vg = vg_read(cmd, vg_name, vg_uuid, flags, lockd_state);
+ if (_ignore_vg(vg, vg_name, arg_vgnames, flags & READ_ALLOW_INCONSISTENT, &skip)) {
+ stack;
+ ret_max = ECMD_FAILED;
+ goto endvg;
}
+ if (skip)
+ goto endvg;
/* Process this VG? */
- if (process_all ||
+ if ((process_all ||
(!dm_list_empty(arg_vgnames) && str_list_match_item(arg_vgnames, vg_name)) ||
- (!dm_list_empty(arg_tags) && str_list_match_list(arg_tags, &vg->tags, NULL))) {
+ (!dm_list_empty(arg_tags) && str_list_match_list(arg_tags, &vg->tags, NULL))) &&
+ select_match_vg(cmd, handle, vg, &selected) && selected) {
ret = process_single_vg(cmd, vg_name, vg, handle);
+ _update_selection_result(handle, &whole_selected);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
}
- if (vg_read_error(vg))
- release_vg(vg);
- else
- unlock_and_release_vg(cmd, vg, vg_name);
+ if (!vg_read_error(vg))
+ unlock_vg(cmd, vg_name);
+endvg:
+ release_vg(vg);
+ if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
+ stack;
}
+ /* the VG is selected if at least one LV is selected */
+ _set_final_selection_result(handle, whole_selected);
return ret_max;
}
@@ -1637,17 +2016,22 @@ static int _copy_str_to_vgnameid_list(struct cmd_context *cmd, struct dm_list *s
* Call process_single_vg() for each VG selected by the command line arguments.
*/
int process_each_vg(struct cmd_context *cmd, int argc, char **argv,
- uint32_t flags, void *handle,
+ uint32_t flags, struct processing_handle *handle,
process_single_vg_fn_t process_single_vg)
{
+ int handle_supplied = handle != NULL;
struct dm_list arg_tags; /* str_list */
struct dm_list arg_vgnames; /* str_list */
struct dm_list vgnameids_on_system; /* vgnameid_list */
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->command->flags & ALL_VGS_IS_DEFAULT);
+ unsigned one_vgname_arg = (flags & ONE_VGNAME_ARG);
int ret;
+ /* Disable error in vg_read so we can print it from ignore_vg. */
+ cmd->vg_read_print_access_error = 0;
+
dm_list_init(&arg_tags);
dm_list_init(&arg_vgnames);
dm_list_init(&vgnameids_on_system);
@@ -1656,26 +2040,30 @@ int process_each_vg(struct cmd_context *cmd, int argc, char **argv,
/*
* Find any VGs or tags explicitly provided on the command line.
*/
- if ((ret = _get_arg_vgnames(cmd, argc, argv, &arg_vgnames, &arg_tags)) != ECMD_PROCESSED) {
- stack;
- return ret;
- }
+ if ((ret = _get_arg_vgnames(cmd, argc, argv, one_vgname_arg, &arg_vgnames, &arg_tags)) != ECMD_PROCESSED)
+ goto_out;
/*
* Obtain the complete list of VGs present on the system if it is needed because:
* any tags were supplied and need resolving; or
* no VG names were given and the command defaults to processing all VGs.
*/
- if (((dm_list_empty(&arg_vgnames) && enable_all_vgs) || !dm_list_empty(&arg_tags)) &&
- ((ret = _get_vgnameids_on_system(cmd, &vgnameids_on_system, NULL, 0)) != ECMD_PROCESSED)) {
- stack;
- return ret;
+ if ((dm_list_empty(&arg_vgnames) && enable_all_vgs) || !dm_list_empty(&arg_tags)) {
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lockd_gl(cmd, "sh", 0)) {
+ ret = ECMD_FAILED;
+ goto_out;
+ }
+
+ if (!get_vgnameids(cmd, &vgnameids_on_system, NULL, 0))
+ goto_out;
}
if (dm_list_empty(&arg_vgnames) && dm_list_empty(&vgnameids_on_system)) {
/* FIXME Should be log_print, but suppressed for reporting cmds */
log_verbose("No volume groups found.");
- return ECMD_PROCESSED;
+ ret = ECMD_PROCESSED;
+ goto out;
}
/*
@@ -1684,30 +2072,51 @@ int process_each_vg(struct cmd_context *cmd, int argc, char **argv,
*/
if (!dm_list_empty(&vgnameids_on_system))
dm_list_splice(&vgnameids_to_process, &vgnameids_on_system);
- else if ((ret = _copy_str_to_vgnameid_list(cmd, &arg_vgnames, &vgnameids_to_process)) != ECMD_PROCESSED) {
- stack;
- return ret;
- }
+ else if ((ret = _copy_str_to_vgnameid_list(cmd, &arg_vgnames, &vgnameids_to_process)) != ECMD_PROCESSED)
+ goto_out;
+
+ if (!handle && !(handle = init_processing_handle(cmd)))
+ goto_out;
+
+ if (handle->internal_report_for_select && !handle->selection_handle &&
+ !init_selection_handle(cmd, handle, VGS))
+ goto_out;
- return _process_vgnameid_list(cmd, flags, &vgnameids_to_process,
- &arg_vgnames, &arg_tags, handle, process_single_vg);
+ ret = _process_vgnameid_list(cmd, flags, &vgnameids_to_process,
+ &arg_vgnames, &arg_tags, handle, process_single_vg);
+out:
+ if (!handle_supplied)
+ destroy_processing_handle(cmd, handle);
+
+ return ret;
}
int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
struct dm_list *arg_lvnames, const struct dm_list *tags_in,
int stop_on_error,
- void *handle, process_single_lv_fn_t process_single_lv)
+ struct processing_handle *handle,
+ process_single_lv_fn_t process_single_lv)
{
int ret_max = ECMD_PROCESSED;
int ret = 0;
+ int selected;
+ int whole_selected = 0;
+ int handle_supplied = handle != NULL;
+ unsigned process_lv;
unsigned process_all = 0;
unsigned tags_supplied = 0;
unsigned lvargs_supplied = 0;
struct lv_list *lvl;
struct dm_str_list *sl;
+ struct dm_list final_lvs;
+ struct lv_list *final_lvl;
- if (!vg_check_status(vg, EXPORTED_VG))
- return_ECMD_FAILED;
+ dm_list_init(&final_lvs);
+
+ if (!vg_check_status(vg, EXPORTED_VG)) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
if (tags_in && !dm_list_empty(tags_in))
tags_supplied = 1;
@@ -1715,19 +2124,28 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
if (arg_lvnames && !dm_list_empty(arg_lvnames))
lvargs_supplied = 1;
+ if (!handle && !(handle = init_processing_handle(cmd))) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+
+ if (handle->internal_report_for_select && !handle->selection_handle &&
+ !init_selection_handle(cmd, handle, LVS)) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+
/* Process all LVs in this VG if no restrictions given
* or if VG tags match. */
if ((!tags_supplied && !lvargs_supplied) ||
(tags_supplied && str_list_match_list(tags_in, &vg->tags, NULL)))
process_all = 1;
- /*
- * FIXME: In case of remove it goes through deleted entries,
- * but it works since entries are allocated from vg mem pool.
- */
dm_list_iterate_items(lvl, &vg->lvs) {
- if (sigint_caught())
- return_ECMD_FAILED;
+ if (sigint_caught()) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
if (lvl->lv->status & SNAPSHOT)
continue;
@@ -1747,30 +2165,86 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
}
/*
- * Only let hidden LVs through it --all was used or the LVs
+ * Only let hidden LVs through if --all was used or the LVs
* were specifically named on the command line.
*/
if (!lvargs_supplied && !lv_is_visible(lvl->lv) && !arg_count(cmd, all_ARG))
continue;
- /* Only process the LV if the name matches or process_all is set or if an LV tag matches */
- if (lvargs_supplied && str_list_match_item(arg_lvnames, lvl->lv->name))
+ /*
+ * Only let sanlock LV through if --all was used or if
+ * it is named on the command line.
+ */
+ if (lv_is_lockd_sanlock_lv(lvl->lv)) {
+ if (arg_count(cmd, all_ARG) ||
+ (lvargs_supplied && str_list_match_item(arg_lvnames, lvl->lv->name))) {
+ log_very_verbose("Processing lockd_sanlock_lv %s/%s.", vg->name, lvl->lv->name);
+ } else {
+ continue;
+ }
+ }
+
+ /*
+ * process the LV if one of the following:
+ * - process_all is set
+ * - LV name matches a supplied LV name
+ * - LV tag matches a supplied LV tag
+ * - LV matches the selection
+ */
+
+ process_lv = process_all;
+
+ if (lvargs_supplied && str_list_match_item(arg_lvnames, lvl->lv->name)) {
/* Remove LV from list of unprocessed LV names */
str_list_del(arg_lvnames, lvl->lv->name);
- else if (!process_all &&
- (!tags_supplied || !str_list_match_list(tags_in, &lvl->lv->tags, NULL)))
+ process_lv = 1;
+ }
+
+ if (!process_lv && tags_supplied && str_list_match_list(tags_in, &lvl->lv->tags, NULL))
+ process_lv = 1;
+
+ process_lv = process_lv && select_match_lv(cmd, handle, vg, lvl->lv, &selected) && selected;
+
+ if (sigint_caught()) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+
+ if (!process_lv)
+ continue;
+
+ log_very_verbose("Adding %s/%s to the list of LVs to be processed.", vg->name, lvl->lv->name);
+
+ if (!(final_lvl = dm_pool_zalloc(vg->vgmem, sizeof(struct lv_list)))) {
+ log_error("Failed to allocate final LV list item.");
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+ final_lvl->lv = lvl->lv;
+ dm_list_add(&final_lvs, &final_lvl->list);
+ }
+
+ dm_list_iterate_items(lvl, &final_lvs) {
+ /*
+ * FIXME: Once we have index over vg->removed_lvs, check directly
+ * LV presence there and remove LV_REMOVE flag/lv_is_removed fn
+ * as they won't be needed anymore.
+ */
+ if (lv_is_removed(lvl->lv))
continue;
log_very_verbose("Processing LV %s in VG %s.", lvl->lv->name, vg->name);
ret = process_single_lv(cmd, lvl->lv, handle);
+ if (handle_supplied)
+ _update_selection_result(handle, &whole_selected);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
if (stop_on_error && ret != ECMD_PROCESSED)
- return ret_max;
+ goto_out;
}
if (lvargs_supplied) {
@@ -1785,7 +2259,11 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
ret_max = ECMD_FAILED;
}
}
-
+out:
+ if (!handle_supplied)
+ destroy_processing_handle(cmd, handle);
+ else
+ _set_final_selection_result(handle, whole_selected);
return ret_max;
}
@@ -1898,7 +2376,7 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
struct dm_list *arg_vgnames,
struct dm_list *arg_lvnames,
struct dm_list *arg_tags,
- void *handle,
+ struct processing_handle *handle,
process_single_lv_fn_t process_single_lv)
{
struct volume_group *vg;
@@ -1906,6 +2384,7 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
struct dm_str_list *sl;
struct dm_list *tags_arg;
struct dm_list lvnames;
+ uint32_t lockd_state = 0;
const char *vg_name;
const char *vg_uuid;
const char *vgn;
@@ -1952,18 +2431,20 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
}
}
- vg = vg_read(cmd, vg_name, vg_uuid, flags);
- if (ignore_vg(vg, vg_name, flags & READ_ALLOW_INCONSISTENT, &skip)) {
- stack;
+ if (!lockd_vg(cmd, vg_name, NULL, 0, &lockd_state)) {
ret_max = ECMD_FAILED;
- release_vg(vg);
continue;
-
}
- if (skip) {
- release_vg(vg);
- continue;
+
+ vg = vg_read(cmd, vg_name, vg_uuid, flags, lockd_state);
+ if (_ignore_vg(vg, vg_name, arg_vgnames, flags & READ_ALLOW_INCONSISTENT, &skip)) {
+ stack;
+ ret_max = ECMD_FAILED;
+ goto endvg;
+
}
+ if (skip)
+ goto endvg;
ret = process_each_lv_in_vg(cmd, vg, &lvnames, tags_arg, 0,
handle, process_single_lv);
@@ -1972,7 +2453,11 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
if (ret > ret_max)
ret_max = ret;
- unlock_and_release_vg(cmd, vg, vg_name);
+ unlock_vg(cmd, vg_name);
+endvg:
+ release_vg(vg);
+ if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
+ stack;
}
return ret_max;
@@ -1982,8 +2467,9 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t flags,
* Call process_single_lv() for each LV selected by the command line arguments.
*/
int process_each_lv(struct cmd_context *cmd, int argc, char **argv, uint32_t flags,
- void *handle, process_single_lv_fn_t process_single_lv)
+ struct processing_handle *handle, process_single_lv_fn_t process_single_lv)
{
+ int handle_supplied = handle != NULL;
struct dm_list arg_tags; /* str_list */
struct dm_list arg_vgnames; /* str_list */
struct dm_list arg_lvnames; /* str_list */
@@ -1991,8 +2477,12 @@ int process_each_lv(struct cmd_context *cmd, int argc, char **argv, uint32_t fla
struct dm_list vgnameids_to_process; /* vgnameid_list */
int enable_all_vgs = (cmd->command->flags & ALL_VGS_IS_DEFAULT);
+ int need_vgnameids = 0;
int ret;
+ /* Disable error in vg_read so we can print it from ignore_vg. */
+ cmd->vg_read_print_access_error = 0;
+
dm_list_init(&arg_tags);
dm_list_init(&arg_vgnames);
dm_list_init(&arg_lvnames);
@@ -2002,26 +2492,45 @@ int process_each_lv(struct cmd_context *cmd, int argc, char **argv, uint32_t fla
/*
* Find any LVs, VGs or tags explicitly provided on the command line.
*/
- if ((ret = _get_arg_lvnames(cmd, argc, argv, &arg_vgnames, &arg_lvnames, &arg_tags) != ECMD_PROCESSED)) {
- stack;
- return ret;
- }
+ if ((ret = _get_arg_lvnames(cmd, argc, argv, &arg_vgnames, &arg_lvnames, &arg_tags) != ECMD_PROCESSED))
+ goto_out;
+
+ if (!handle && !(handle = init_processing_handle(cmd)))
+ goto_out;
+
+ if (handle->internal_report_for_select && !handle->selection_handle &&
+ !init_selection_handle(cmd, handle, LVS))
+ goto_out;
/*
* Obtain the complete list of VGs present on the system if it is needed because:
* any tags were supplied and need resolving; or
+ * no VG names were given and the select option needs resolving; or
* no VG names were given and the command defaults to processing all VGs.
*/
- if (((dm_list_empty(&arg_vgnames) && enable_all_vgs) || !dm_list_empty(&arg_tags)) &&
- (ret = _get_vgnameids_on_system(cmd, &vgnameids_on_system, NULL, 0) != ECMD_PROCESSED)) {
- stack;
- return ret;
+ if (!dm_list_empty(&arg_tags))
+ need_vgnameids = 1;
+ else if (dm_list_empty(&arg_vgnames) && enable_all_vgs)
+ need_vgnameids = 1;
+ else if (dm_list_empty(&arg_vgnames) && handle->internal_report_for_select)
+ need_vgnameids = 1;
+
+ if (need_vgnameids) {
+ /* Needed for a current listing of the global VG namespace. */
+ if (!lockd_gl(cmd, "sh", 0)) {
+ ret = ECMD_FAILED;
+ goto_out;
+ }
+
+ if (!get_vgnameids(cmd, &vgnameids_on_system, NULL, 0))
+ goto_out;
}
if (dm_list_empty(&arg_vgnames) && dm_list_empty(&vgnameids_on_system)) {
/* FIXME Should be log_print, but suppressed for reporting cmds */
log_verbose("No volume groups found.");
- return ECMD_PROCESSED;
+ ret = ECMD_PROCESSED;
+ goto out;
}
/*
@@ -2030,13 +2539,15 @@ int process_each_lv(struct cmd_context *cmd, int argc, char **argv, uint32_t fla
*/
if (!dm_list_empty(&vgnameids_on_system))
dm_list_splice(&vgnameids_to_process, &vgnameids_on_system);
- else if ((ret = _copy_str_to_vgnameid_list(cmd, &arg_vgnames, &vgnameids_to_process)) != ECMD_PROCESSED) {
- stack;
- return ret;
- }
+ else if ((ret = _copy_str_to_vgnameid_list(cmd, &arg_vgnames, &vgnameids_to_process)) != ECMD_PROCESSED)
+ goto_out;
- return _process_lv_vgnameid_list(cmd, flags, &vgnameids_to_process, &arg_vgnames, &arg_lvnames,
- &arg_tags, handle, process_single_lv);
+ ret = _process_lv_vgnameid_list(cmd, flags, &vgnameids_to_process, &arg_vgnames, &arg_lvnames,
+ &arg_tags, handle, process_single_lv);
+out:
+ if (!handle_supplied)
+ destroy_processing_handle(cmd, handle);
+ return ret;
}
static int _get_arg_pvnames(struct cmd_context *cmd,
@@ -2082,28 +2593,55 @@ static int _get_arg_pvnames(struct cmd_context *cmd,
return ret_max;
}
+static int _get_arg_devices(struct cmd_context *cmd,
+ struct dm_list *arg_pvnames,
+ struct dm_list *arg_devices)
+{
+ struct dm_str_list *sl;
+ struct device_id_list *dil;
+ int ret_max = ECMD_PROCESSED;
+
+ dm_list_iterate_items(sl, arg_pvnames) {
+ if (!(dil = dm_pool_alloc(cmd->mem, sizeof(*dil)))) {
+ log_error("device_id_list alloc failed.");
+ return ECMD_FAILED;
+ }
+
+ if (!(dil->dev = dev_cache_get(sl->str, cmd->filter))) {
+ log_error("Failed to find device for physical volume \"%s\".", sl->str);
+ ret_max = ECMD_FAILED;
+ } else {
+ strncpy(dil->pvid, dil->dev->pvid, ID_LEN);
+ dm_list_add(arg_devices, &dil->list);
+ }
+ }
+
+ return ret_max;
+}
+
static int _get_all_devices(struct cmd_context *cmd, struct dm_list *all_devices)
{
struct dev_iter *iter;
struct device *dev;
- struct device_list *devl;
+ struct device_id_list *dil;
int r = ECMD_FAILED;
lvmcache_seed_infos_from_lvmetad(cmd);
- if (!(iter = dev_iter_create(cmd->filter, 1))) {
+ if (!(iter = dev_iter_create(cmd->full_filter, 1))) {
log_error("dev_iter creation failed.");
return ECMD_FAILED;
}
while ((dev = dev_iter_get(iter))) {
- if (!(devl = dm_pool_alloc(cmd->mem, sizeof(*devl)))) {
- log_error("device_list alloc failed.");
+ if (!(dil = dm_pool_alloc(cmd->mem, sizeof(*dil)))) {
+ log_error("device_id_list alloc failed.");
goto out;
}
- devl->dev = dev;
- dm_list_add(all_devices, &devl->list);
+ strncpy(dil->pvid, dev->pvid, ID_LEN);
+ dil->dev = dev;
+ dm_list_add(all_devices, &dil->list);
}
r = ECMD_PROCESSED;
@@ -2112,13 +2650,13 @@ out:
return r;
}
-static int _device_list_remove(struct dm_list *all_devices, struct device *dev)
+static int _device_list_remove(struct dm_list *devices, struct device *dev)
{
- struct device_list *devl;
+ struct device_id_list *dil;
- dm_list_iterate_items(devl, all_devices) {
- if (devl->dev == dev) {
- dm_list_del(&devl->list);
+ dm_list_iterate_items(dil, devices) {
+ if (dil->dev == dev) {
+ dm_list_del(&dil->list);
return 1;
}
}
@@ -2126,12 +2664,37 @@ static int _device_list_remove(struct dm_list *all_devices, struct device *dev)
return 0;
}
+static struct device_id_list *_device_list_find_dev(struct dm_list *devices, struct device *dev)
+{
+ struct device_id_list *dil;
+
+ dm_list_iterate_items(dil, devices) {
+ if (dil->dev == dev)
+ return dil;
+ }
+
+ return NULL;
+}
+
+static struct device_id_list *_device_list_find_pvid(struct dm_list *devices, struct physical_volume *pv)
+{
+ struct device_id_list *dil;
+
+ dm_list_iterate_items(dil, devices) {
+ if (id_equal((struct id *) dil->pvid, &pv->id))
+ return dil;
+ }
+
+ return NULL;
+}
+
static int _process_device_list(struct cmd_context *cmd, struct dm_list *all_devices,
- void *handle, process_single_pv_fn_t process_single_pv)
+ struct processing_handle *handle,
+ process_single_pv_fn_t process_single_pv)
{
struct physical_volume pv_dummy;
struct physical_volume *pv;
- struct device_list *devl;
+ struct device_id_list *dil;
int ret_max = ECMD_PROCESSED;
int ret = 0;
@@ -2139,17 +2702,17 @@ static int _process_device_list(struct cmd_context *cmd, struct dm_list *all_dev
* Pretend that each device is a PV with dummy values.
* FIXME Formalise this extension or find an alternative.
*/
- dm_list_iterate_items(devl, all_devices) {
+ dm_list_iterate_items(dil, all_devices) {
if (sigint_caught())
return_ECMD_FAILED;
memset(&pv_dummy, 0, sizeof(pv_dummy));
dm_list_init(&pv_dummy.tags);
dm_list_init(&pv_dummy.segments);
- pv_dummy.dev = devl->dev;
+ pv_dummy.dev = dil->dev;
pv = &pv_dummy;
- log_very_verbose("Processing device %s.", dev_name(devl->dev));
+ log_very_verbose("Processing device %s.", dev_name(dil->dev));
ret = process_single_pv(cmd, NULL, pv, handle);
@@ -2163,41 +2726,70 @@ static int _process_device_list(struct cmd_context *cmd, struct dm_list *all_dev
static int _process_pvs_in_vg(struct cmd_context *cmd,
struct volume_group *vg,
struct dm_list *all_devices,
- struct dm_list *arg_pvnames,
+ struct dm_list *arg_devices,
struct dm_list *arg_tags,
- int process_all,
+ int process_all_pvs,
+ int process_all_devices,
int skip,
- void *handle,
+ struct processing_handle *handle,
process_single_pv_fn_t process_single_pv)
{
+ int handle_supplied = handle != NULL;
struct physical_volume *pv;
struct pv_list *pvl;
+ struct device_id_list *dil;
+ struct device *dev_orig;
const char *pv_name;
+ int selected;
int process_pv;
int dev_found;
int ret_max = ECMD_PROCESSED;
int ret = 0;
+ if (!handle && (!(handle = init_processing_handle(cmd)))) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+
+ if (handle->internal_report_for_select && !handle->selection_handle &&
+ !init_selection_handle(cmd, handle, PVS)) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
+
dm_list_iterate_items(pvl, &vg->pvs) {
- if (sigint_caught())
- return_ECMD_FAILED;
+ if (sigint_caught()) {
+ ret_max = ECMD_FAILED;
+ goto_out;
+ }
pv = pvl->pv;
pv_name = pv_dev_name(pv);
- process_pv = process_all;
+ process_pv = process_all_pvs;
- /* Remove each pvname as it is processed. */
- if (!process_pv && !dm_list_empty(arg_pvnames) &&
- str_list_match_item(arg_pvnames, pv_name)) {
+ /* Remove each arg_devices entry as it is processed. */
+
+ if (!process_pv && !dm_list_empty(arg_devices) &&
+ (dil = _device_list_find_dev(arg_devices, pv->dev))) {
process_pv = 1;
- str_list_del(arg_pvnames, pv_name);
+ _device_list_remove(arg_devices, dil->dev);
+ }
+
+ /* Select the PV if the device arg has the same pvid. */
+
+ if (!process_pv && !dm_list_empty(arg_devices) &&
+ (dil = _device_list_find_pvid(arg_devices, pv))) {
+ process_pv = 1;
+ _device_list_remove(arg_devices, dil->dev);
}
if (!process_pv && !dm_list_empty(arg_tags) &&
str_list_match_list(arg_tags, &pv->tags, NULL))
process_pv = 1;
+ process_pv = process_pv && select_match_pv(cmd, handle, vg, pv, &selected) && selected;
+
if (process_pv) {
if (skip)
log_verbose("Skipping PV %s in VG %s.", pv_name, vg->name);
@@ -2227,44 +2819,105 @@ static int _process_pvs_in_vg(struct cmd_context *cmd,
if (ret > ret_max)
ret_max = ret;
}
+
+ /*
+ * This is a very rare and obscure case where multiple
+ * duplicate devices are specified on the command line
+ * referring to this PV. In this case we want to
+ * process this PV once for each specified device.
+ */
+
+ if (!skip && !dm_list_empty(arg_devices)) {
+ while ((dil = _device_list_find_pvid(arg_devices, pv))) {
+ _device_list_remove(arg_devices, dil->dev);
+
+ /*
+ * Replace pv->dev with this dil->dev
+ * in lvmcache so the duplicate dev
+ * info will be reported. FIXME: it
+ * would be nicer to override pv->dev
+ * without munging lvmcache content.
+ */
+ dev_orig = pv->dev;
+ lvmcache_replace_dev(cmd, pv, dil->dev);
+
+ log_very_verbose("Processing PV %s device %s in VG %s.",
+ pv_name, dev_name(dil->dev), vg->name);
+
+ ret = process_single_pv(cmd, vg, pv, handle);
+ if (ret != ECMD_PROCESSED)
+ stack;
+ if (ret > ret_max)
+ ret_max = ret;
+
+ /* Put the cache state back as it was. */
+ lvmcache_replace_dev(cmd, pv, dev_orig);
+ }
+ }
+
+ /*
+ * This is another rare and obscure case where multiple
+ * duplicate devices are being displayed by pvs -a, and
+ * we want each of them to be displayed in the context
+ * of this VG, so that this VG name appears next to it.
+ */
+
+ if (process_all_devices && lvmcache_found_duplicate_pvs()) {
+ while ((dil = _device_list_find_pvid(all_devices, pv))) {
+ _device_list_remove(all_devices, dil->dev);
+
+ dev_orig = pv->dev;
+ lvmcache_replace_dev(cmd, pv, dil->dev);
+
+ ret = process_single_pv(cmd, vg, pv, handle);
+ if (ret != ECMD_PROCESSED)
+ stack;
+ if (ret > ret_max)
+ ret_max = ret;
+
+ lvmcache_replace_dev(cmd, pv, dev_orig);
+ }
+ }
}
/*
- * When processing only specific PV names, we can quit
- * once they've all been found.
+ * When processing only specific PVs, we can quit once they've all been found.
*/
- if (!process_all && dm_list_empty(arg_tags) && dm_list_empty(arg_pvnames))
+ if (!process_all_pvs && dm_list_empty(arg_tags) && dm_list_empty(arg_devices))
break;
}
-
+out:
+ if (!handle_supplied)
+ destroy_processing_handle(cmd, handle);
return ret_max;
}
/*
* Iterate through all PVs in each listed VG. Process a PV if
- * the name or tag matches arg_pvnames or arg_tags. If both
- * arg_pvnames and arg_tags are empty, then process all PVs.
+ * its dev or tag matches arg_devices or arg_tags. If both
+ * arg_devices and arg_tags are empty, then process all PVs.
* No PV should be processed more than once.
*
- * Each PV is removed from arg_pvnames and all_devices when it is
- * processed. Any names remaining in arg_pvnames were not found, and
+ * Each PV is removed from arg_devices and all_devices when it is
+ * processed. Any names remaining in arg_devices were not found, and
* should produce an error. Any devices remaining in all_devices were
- * not found and should be processed by process_all_devices().
+ * not found and should be processed by process_device_list().
*/
static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t flags,
struct dm_list *all_vgnameids,
struct dm_list *all_devices,
- struct dm_list *arg_pvnames,
+ struct dm_list *arg_devices,
struct dm_list *arg_tags,
- int process_all,
- void *handle,
+ int process_all_pvs,
+ int process_all_devices,
+ struct processing_handle *handle,
process_single_pv_fn_t process_single_pv)
{
struct volume_group *vg;
struct vgnameid_list *vgnl;
- struct dm_str_list *sl;
const char *vg_name;
const char *vg_uuid;
+ uint32_t lockd_state = 0;
int ret_max = ECMD_PROCESSED;
int ret;
int skip;
@@ -2277,42 +2930,45 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t flags,
vg_uuid = vgnl->vgid;
skip = 0;
- vg = vg_read(cmd, vg_name, vg_uuid, flags | READ_WARN_INCONSISTENT);
- if (ignore_vg(vg, vg_name, flags & READ_ALLOW_INCONSISTENT, &skip)) {
- stack;
+ if (!lockd_vg(cmd, vg_name, NULL, 0, &lockd_state)) {
ret_max = ECMD_FAILED;
- release_vg(vg);
continue;
}
+
+ vg = vg_read(cmd, vg_name, vg_uuid, flags | READ_WARN_INCONSISTENT, lockd_state);
+ if (_ignore_vg(vg, vg_name, NULL, flags & READ_ALLOW_INCONSISTENT, &skip)) {
+ stack;
+ ret_max = ECMD_FAILED;
+ if (!skip)
+ goto endvg;
+ /* Drop through to eliminate a clustered VG's PVs from the devices list */
+ }
/*
* Don't continue when skip is set, because we need to remove
* vg->pvs entries from devices list.
*/
- ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_pvnames, arg_tags,
- process_all, skip, handle, process_single_pv);
+ ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags,
+ process_all_pvs, process_all_devices, skip,
+ handle, process_single_pv);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
- if (skip)
- release_vg(vg);
- else
- unlock_and_release_vg(cmd, vg, vg->name);
+ if (!skip)
+ unlock_vg(cmd, vg->name);
+endvg:
+ release_vg(vg);
+ if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
+ stack;
/* Quit early when possible. */
- if (!process_all && dm_list_empty(arg_tags) && dm_list_empty(arg_pvnames))
+ if (!process_all_pvs && dm_list_empty(arg_tags) && dm_list_empty(arg_devices))
return ret_max;
}
- /* Return an error if a pvname arg was not found. */
- dm_list_iterate_items(sl, arg_pvnames) {
- log_error("Failed to find physical volume \"%s\".", sl->str);
- ret_max = ECMD_FAILED;
- }
-
return ret_max;
}
@@ -2320,20 +2976,26 @@ int process_each_pv(struct cmd_context *cmd,
int argc, char **argv,
const char *only_this_vgname,
uint32_t flags,
- void *handle,
+ struct processing_handle *handle,
process_single_pv_fn_t process_single_pv)
{
struct dm_list arg_tags; /* str_list */
struct dm_list arg_pvnames; /* str_list */
+ struct dm_list arg_devices; /* device_id_list */
struct dm_list all_vgnameids; /* vgnameid_list */
- struct dm_list all_devices; /* device_list */
+ struct dm_list all_devices; /* device_id_list */
+ struct device_id_list *dil;
int process_all_pvs;
int process_all_devices;
int ret_max = ECMD_PROCESSED;
int ret;
+ /* Disable error in vg_read so we can print it from ignore_vg. */
+ cmd->vg_read_print_access_error = 0;
+
dm_list_init(&arg_tags);
dm_list_init(&arg_pvnames);
+ dm_list_init(&arg_devices);
dm_list_init(&all_vgnameids);
dm_list_init(&all_devices);
@@ -2341,15 +3003,34 @@ int process_each_pv(struct cmd_context *cmd,
* Create two lists from argv:
* arg_pvnames: pvs explicitly named in argv
* arg_tags: tags explicitly named in argv
+ *
+ * Then convert arg_pvnames, which are free-form, user-specified,
+ * names/paths into arg_devices which can be used to match below.
*/
- if ((ret = _get_arg_pvnames(cmd, argc, argv, &arg_pvnames, &arg_tags)) != ECMD_PROCESSED)
+ if ((ret = _get_arg_pvnames(cmd, argc, argv, &arg_pvnames, &arg_tags)) != ECMD_PROCESSED) {
+ stack;
return ret;
+ }
process_all_pvs = dm_list_empty(&arg_pvnames) && dm_list_empty(&arg_tags);
- process_all_devices = process_all_pvs &&
- (cmd->command->flags & ENABLE_ALL_DEVS) &&
- arg_count(cmd, all_ARG);
+ process_all_devices = process_all_pvs && (cmd->command->flags & ENABLE_ALL_DEVS) &&
+ arg_count(cmd, all_ARG);
+
+ /* Needed for a current listing of the global VG namespace. */
+ if (!only_this_vgname && !lockd_gl(cmd, "sh", 0))
+ return_ECMD_FAILED;
+
+ /*
+ * Need pvid's set on all PVs before processing so that pvid's
+ * can be compared to find duplicates while processing.
+ */
+ lvmcache_seed_infos_from_lvmetad(cmd);
+
+ if (!get_vgnameids(cmd, &all_vgnameids, only_this_vgname, 1)) {
+ stack;
+ return ret;
+ }
/*
* If the caller wants to process all devices (not just PVs), then all PVs
@@ -2361,19 +3042,24 @@ int process_each_pv(struct cmd_context *cmd,
return ret;
}
- if ((ret = _get_vgnameids_on_system(cmd, &all_vgnameids, only_this_vgname, 1) != ECMD_PROCESSED)) {
- stack;
- return ret;
- }
+ if ((ret = _get_arg_devices(cmd, &arg_pvnames, &arg_devices) != ECMD_PROCESSED))
+ /* get_arg_devices reports the error for any PV names not found. */
+ ret_max = ECMD_FAILED;
ret = _process_pvs_in_vgs(cmd, flags, &all_vgnameids, &all_devices,
- &arg_pvnames, &arg_tags, process_all_pvs,
+ &arg_devices, &arg_tags,
+ process_all_pvs, process_all_devices,
handle, process_single_pv);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
+ dm_list_iterate_items(dil, &arg_devices) {
+ log_error("Failed to find physical volume \"%s\".", dev_name(dil->dev));
+ ret_max = ECMD_FAILED;
+ }
+
if (!process_all_devices)
goto out;
@@ -2387,8 +3073,10 @@ out:
}
int process_each_pv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
- void *handle, process_single_pv_fn_t process_single_pv)
+ struct processing_handle *handle,
+ process_single_pv_fn_t process_single_pv)
{
+ int whole_selected = 0;
int ret_max = ECMD_PROCESSED;
int ret;
struct pv_list *pvl;
@@ -2398,17 +3086,19 @@ int process_each_pv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
return_ECMD_FAILED;
ret = process_single_pv(cmd, vg, pvl->pv, handle);
+ _update_selection_result(handle, &whole_selected);
if (ret != ECMD_PROCESSED)
stack;
if (ret > ret_max)
ret_max = ret;
}
+ _set_final_selection_result(handle, whole_selected);
return ret_max;
}
int lvremove_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
/*
* Single force is equivalent to single --yes
diff --git a/tools/toollib.h b/tools/toollib.h
index 715f65eda..2dc5ad73f 100644
--- a/tools/toollib.h
+++ b/tools/toollib.h
@@ -17,68 +17,136 @@
#define _LVM_TOOLLIB_H
#include "metadata-exported.h"
+#include "report.h"
int become_daemon(struct cmd_context *cmd, int skip_lvm);
-int ignore_vg(struct volume_group *vg, const char *vg_name, int allow_inconsistent, int *skip);
+/*
+ * The "struct processing_handle" is used as a handle for processing
+ * functions (process_each_* and related).
+ *
+ * The "custom_handle" is any handle used to pass custom data into
+ * process_each_* and related functions.
+ *
+ * The "internal_report_for_select=0" makes processing function to
+ * skip checking the report/selection criteria (if given on cmd line)
+ * before executing the action on the item.
+ *
+ * The "selection_handle" is only used if "internal_report_for_select=1".
+ *
+ * Some important notes about selection:
+ * =====================================
+ * In case we're processing for display, the selection is directly
+ * a part of reporting for the display on output so we don't need to
+ * report the item in memory to get the selection result, then dropping
+ * the report and then reporting the same thing again for it to be
+ * displayed on output.
+ * For example, compare these code paths:
+ *
+ * - when reporting for display on output:
+ * _report -> process_each_* -> ... -> dm_report_object
+ * (Here the dm_report_object does both selection and
+ * reporting for display on output.)
+ *
+ * - for any other processing and reporting for selection:
+ * process_each_* -> _select_match_* -> ... -> dm_report_object_is_selected
+ * |
+ * --> (selection result) --> ...
+ * (Here the dm_report_object_is_selected just gets
+ * the selection result and it drops reporting buffer
+ * immediately. Then based on the selection result,
+ * the process_each_* action on the item is executed
+ * or not...)
+ *
+ * Simply, we want to avoid this double reporting when reporting
+ * for display on output:
+ * _report -> process_each_* -> _select_match_* -> ... -> dm_report_object_is_selected
+ * |
+ * --> (selection result) -> dm_report_object
+ *
+ * So whenever the processing action is "to display item on output", use
+ * "internal_report_for_select=0" as report/selection is already
+ * a part of that reporting for display (dm_report_object).
+ */
+struct processing_handle {
+ int internal_report_for_select;
+ struct selection_handle *selection_handle;
+ void *custom_handle;
+};
typedef int (*process_single_vg_fn_t) (struct cmd_context * cmd,
const char *vg_name,
struct volume_group * vg,
- void *handle);
+ struct processing_handle *handle);
typedef int (*process_single_pv_fn_t) (struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle);
+ struct processing_handle *handle);
typedef int (*process_single_label_fn_t) (struct cmd_context *cmd,
struct label *label,
- void *handle);
+ struct processing_handle *handle);
typedef int (*process_single_lv_fn_t) (struct cmd_context *cmd,
struct logical_volume *lv,
- void *handle);
+ struct processing_handle *handle);
typedef int (*process_single_seg_fn_t) (struct cmd_context * cmd,
struct lv_segment * seg,
- void *handle);
+ struct processing_handle *handle);
typedef int (*process_single_pvseg_fn_t) (struct cmd_context * cmd,
struct volume_group * vg,
struct pv_segment * pvseg,
- void *handle);
+ struct processing_handle *handle);
int process_each_vg(struct cmd_context *cmd, int argc, char **argv,
- uint32_t flags, void *handle,
+ uint32_t flags, struct processing_handle *handle,
process_single_vg_fn_t process_single_vg);
int process_each_pv(struct cmd_context *cmd, int argc, char **argv,
const char *vg_name, uint32_t lock_type,
- void *handle, process_single_pv_fn_t process_single_pv);
+ struct processing_handle *handle,
+ process_single_pv_fn_t process_single_pv);
int process_each_label(struct cmd_context *cmd, int argc, char **argv,
- void *handle, process_single_label_fn_t process_single_label);
+ struct processing_handle *handle,
+ process_single_label_fn_t process_single_label);
int process_each_segment_in_pv(struct cmd_context *cmd,
struct volume_group *vg,
struct physical_volume *pv,
- void *handle,
+ struct processing_handle *handle,
process_single_pvseg_fn_t process_single_pvseg);
int process_each_lv(struct cmd_context *cmd, int argc, char **argv,
- uint32_t flags, void *handle,
+ uint32_t flags, struct processing_handle *handle,
process_single_lv_fn_t process_single_lv);
int process_each_segment_in_lv(struct cmd_context *cmd,
- struct logical_volume *lv, void *handle,
+ struct logical_volume *lv,
+ struct processing_handle *handle,
process_single_seg_fn_t process_single_seg);
int process_each_pv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
- void *handle, process_single_pv_fn_t process_single_pv);
+ struct processing_handle *handle,
+ process_single_pv_fn_t process_single_pv);
int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg,
struct dm_list *arg_lvnames, const struct dm_list *tagsl,
- int stop_on_error, void *handle,
+ int stop_on_error, struct processing_handle *handle,
process_single_lv_fn_t process_single_lv);
+struct processing_handle *init_processing_handle(struct cmd_context *cmd);
+int init_selection_handle(struct cmd_context *cmd, struct processing_handle *handle,
+ report_type_t initial_report_type);
+void destroy_processing_handle(struct cmd_context *cmd, struct processing_handle *handle);
+
+int select_match_vg(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, int *selected);
+int select_match_lv(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, struct logical_volume *lv, int *selected);
+int select_match_pv(struct cmd_context *cmd, struct processing_handle *handle,
+ struct volume_group *vg, struct physical_volume *pv, int *selected);
+
const char *extract_vgname(struct cmd_context *cmd, const char *lv_name);
const char *skip_dev_dir(struct cmd_context *cmd, const char *vg_name,
unsigned *dev_dir_found);
@@ -104,8 +172,7 @@ int lv_refresh(struct cmd_context *cmd, struct logical_volume *lv);
int vg_refresh_visible(struct cmd_context *cmd, struct volume_group *vg);
void lv_spawn_background_polling(struct cmd_context *cmd,
struct logical_volume *lv);
-int pvcreate_params_validate(struct cmd_context *cmd,
- int argc, char **argv,
+int pvcreate_params_validate(struct cmd_context *cmd, int argc,
struct pvcreate_params *pp);
int get_activation_monitoring_mode(struct cmd_context *cmd,
@@ -123,7 +190,10 @@ int get_pool_params(struct cmd_context *cmd,
int get_stripe_params(struct cmd_context *cmd, uint32_t *stripes,
uint32_t *stripe_size);
-struct dm_config_tree *get_cachepolicy_params(struct cmd_context *cmd);
+int get_cache_params(struct cmd_context *cmd,
+ const char **mode,
+ const char **name,
+ struct dm_config_tree **settings);
int change_tag(struct cmd_context *cmd, struct volume_group *vg,
struct logical_volume *lv, struct physical_volume *pv, int arg);
@@ -138,6 +208,6 @@ int validate_restricted_lvname_param(struct cmd_context *cmd, const char **vg_na
const char **lv_name);
int lvremove_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle __attribute__((unused)));
+ struct processing_handle *handle __attribute__((unused)));
#endif
diff --git a/tools/tools.h b/tools/tools.h
index 2196dbaa4..4ed893fc4 100644
--- a/tools/tools.h
+++ b/tools/tools.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -16,18 +16,15 @@
#ifndef _LVM_TOOLS_H
#define _LVM_TOOLS_H
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
-
-#include "configure.h"
-#include <assert.h>
-#include "libdevmapper.h"
+#include "tool.h"
#include "lvm-logging.h"
+
#include "activate.h"
#include "archiver.h"
#include "lvmcache.h"
#include "lvmetad.h"
+#include "lvmlockd.h"
#include "lvm-version.h"
#include "config.h"
#include "defaults.h"
@@ -46,11 +43,7 @@
#include "toolcontext.h"
#include "toollib.h"
-#include <stdlib.h>
-#include <unistd.h>
#include <ctype.h>
-#include <limits.h>
-#include <stdarg.h>
#include <sys/types.h>
#define CMD_LEN 256
@@ -102,10 +95,16 @@ struct arg_value_group_list {
#define CACHE_VGMETADATA 0x00000001
#define PERMITTED_READ_ONLY 0x00000002
-/* Process all vgs if none specified on the command line. */
+/* Process all VGs if none specified on the command line. */
#define ALL_VGS_IS_DEFAULT 0x00000004
/* Process all devices with --all if none are specified on the command line. */
#define ENABLE_ALL_DEVS 0x00000008
+/* Exactly one VG name argument required. */
+#define ONE_VGNAME_ARG 0x00000010
+/* Command needs a shared lock on a VG; it only reads the VG. */
+#define LOCKD_VG_SH 0x00000020
+/* Command does not process any metadata. */
+#define NO_METADATA_PROCESSING 0x00000040
/* a register of the lvm commands */
struct command {
@@ -142,6 +141,7 @@ int metadatatype_arg(struct cmd_context *cmd, struct arg_values *av);
int units_arg(struct cmd_context *cmd, struct arg_values *av);
int segtype_arg(struct cmd_context *cmd, struct arg_values *av);
int alloc_arg(struct cmd_context *cmd, struct arg_values *av);
+int locktype_arg(struct cmd_context *cmd, struct arg_values *av);
int readahead_arg(struct cmd_context *cmd, struct arg_values *av);
int metadatacopies_arg(struct cmd_context *cmd __attribute__((unused)), struct arg_values *av);
@@ -172,7 +172,8 @@ int32_t grouped_arg_int_value(const struct arg_values *av, int a, const int32_t
const char *command_name(struct cmd_context *cmd);
-int pvmove_poll(struct cmd_context *cmd, const char *pv, unsigned background);
+int pvmove_poll(struct cmd_context *cmd, const char *pv_name, const char *uuid,
+ const char *vg_name, const char *lv_name, unsigned background);
int lvconvert_poll(struct cmd_context *cmd, struct logical_volume *lv, unsigned background);
int mirror_remove_missing(struct cmd_context *cmd,
diff --git a/tools/vgcfgbackup.c b/tools/vgcfgbackup.c
index 7cf19bc28..5e80c806a 100644
--- a/tools/vgcfgbackup.c
+++ b/tools/vgcfgbackup.c
@@ -49,9 +49,9 @@ static char *_expand_filename(const char *template, const char *vg_name,
static int vg_backup_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle)
+ struct processing_handle *handle)
{
- char **last_filename = (char **)handle;
+ char **last_filename = (char **)handle->custom_handle;
char *filename;
if (arg_count(cmd, file_ARG)) {
@@ -83,15 +83,24 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv)
{
int ret;
char *last_filename = NULL;
+ struct processing_handle *handle = NULL;
+
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ return ECMD_FAILED;
+ }
+
+ handle->custom_handle = &last_filename;
init_pvmove(1);
ret = process_each_vg(cmd, argc, argv, READ_ALLOW_INCONSISTENT,
- &last_filename, &vg_backup_single);
+ handle, &vg_backup_single);
dm_free(last_filename);
init_pvmove(0);
+ destroy_processing_handle(cmd, handle);
return ret;
}
diff --git a/tools/vgchange.c b/tools/vgchange.c
index e5d700de8..cbdc29a3e 100644
--- a/tools/vgchange.c
+++ b/tools/vgchange.c
@@ -85,7 +85,7 @@ static int _activate_lvs_in_vg(struct cmd_context *cmd, struct volume_group *vg,
{
struct lv_list *lvl;
struct logical_volume *lv;
- int count = 0, expected_count = 0;
+ int count = 0, expected_count = 0, r = 1;
sigint_allow();
dm_list_iterate_items(lvl, &vg->lvs) {
@@ -114,11 +114,6 @@ static int _activate_lvs_in_vg(struct cmd_context *cmd, struct volume_group *vg,
if (lv_is_replicator_dev(lv) && (lv != first_replicator_dev(lv)))
continue;
- /* Can't deactivate a pvmove LV */
- /* FIXME There needs to be a controlled way of doing this */
- if (lv_is_pvmove(lv) && !is_change_activating(activate))
- continue;
-
if (lv_activation_skip(lv, activate, arg_count(cmd, ignoreactivationskip_ARG)))
continue;
@@ -147,14 +142,19 @@ static int _activate_lvs_in_vg(struct cmd_context *cmd, struct volume_group *vg,
}
sigint_restore();
- sync_local_dev_names(vg->cmd); /* Wait until devices are available */
+
+ /* Wait until devices are available */
+ if (!sync_local_dev_names(vg->cmd)) {
+ log_error("Failed to sync local devices for VG %s.", vg->name);
+ r = 0;
+ }
if (expected_count)
log_verbose("%s %d logical volumes in volume group %s",
is_change_activating(activate) ?
"Activated" : "Deactivated", count, vg->name);
- return (expected_count != count) ? 0 : 1;
+ return (expected_count != count) ? 0 : r;
}
static int _vgchange_monitoring(struct cmd_context *cmd, struct volume_group *vg)
@@ -197,6 +197,19 @@ int vgchange_activate(struct cmd_context *cmd, struct volume_group *vg,
int do_activate = is_change_activating(activate);
/*
+ * We can get here in the odd case where an LV is already active in
+ * a foreign VG, which allows the VG to be accessed by vgchange -a
+ * so the LV can be deactivated.
+ */
+ if (vg->system_id && vg->system_id[0] &&
+ cmd->system_id && cmd->system_id[0] &&
+ strcmp(vg->system_id, cmd->system_id) &&
+ do_activate) {
+ log_error("Cannot activate LVs in a foreign VG.");
+ return ECMD_FAILED;
+ }
+
+ /*
* Safe, since we never write out new metadata here. Required for
* partial activation to work.
*/
@@ -300,38 +313,74 @@ static int _vgchange_clustered(struct cmd_context *cmd,
struct volume_group *vg)
{
int clustered = arg_int_value(cmd, clustered_ARG, 0);
+ const char *lock_type = arg_str_value(cmd, locktype_ARG, NULL);
+ struct lv_list *lvl;
+ struct lv_segment *mirror_seg;
- if (clustered && (vg_is_clustered(vg))) {
- log_error("Volume group \"%s\" is already clustered",
- vg->name);
+ if (find_config_tree_bool(cmd, global_use_lvmlockd_CFG, NULL)) {
+ log_error("lvmlockd requires using the vgchange --lock-type option.");
return 0;
}
- if (!clustered && !(vg_is_clustered(vg))) {
- log_error("Volume group \"%s\" is already not clustered",
- vg->name);
- return 0;
+ if (lock_type && !strcmp(lock_type, "clvm"))
+ clustered = 1;
+
+ if (clustered && vg_is_clustered(vg)) {
+ if (vg->system_id && *vg->system_id)
+ log_warn("WARNING: Clearing invalid system ID %s from volume group %s.",
+ vg->system_id, vg->name);
+ else {
+ log_error("Volume group \"%s\" is already clustered", vg->name);
+ return 0;
+ }
+ }
+
+ if (!clustered && !vg_is_clustered(vg)) {
+ if ((!vg->system_id || !*vg->system_id) && cmd->system_id && *cmd->system_id)
+ log_warn("Setting missing system ID on Volume Group %s to %s.",
+ vg->name, cmd->system_id);
+ else {
+ log_error("Volume group \"%s\" is already not clustered",
+ vg->name);
+ return 0;
+ }
}
if (clustered && !arg_count(cmd, yes_ARG)) {
if (!clvmd_is_running()) {
- if (yes_no_prompt("LVM cluster daemon (clvmd) is not"
- " running.\n"
- "Make volume group \"%s\" clustered"
- " anyway? [y/n]: ", vg->name) == 'n') {
+ if (yes_no_prompt("LVM cluster daemon (clvmd) is not running. "
+ "Make volume group \"%s\" clustered "
+ "anyway? [y/n]: ", vg->name) == 'n') {
log_error("No volume groups changed.");
return 0;
}
} else if (!locking_is_clustered() &&
- (yes_no_prompt("LVM locking type is not clustered.\n"
- "Make volume group \"%s\" clustered"
- " anyway? [y/n]: ", vg->name) == 'n')) {
+ (yes_no_prompt("LVM locking type is not clustered. "
+ "Make volume group \"%s\" clustered "
+ "anyway? [y/n]: ", vg->name) == 'n')) {
log_error("No volume groups changed.");
return 0;
}
+#ifdef CMIRROR_REGION_COUNT_LIMIT
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (!lv_is_mirror(lvl->lv))
+ continue;
+ mirror_seg = first_seg(lvl->lv);
+ if ((lvl->lv->size / mirror_seg->region_size) >
+ CMIRROR_REGION_COUNT_LIMIT) {
+ log_error("Unable to convert %s to clustered mode:"
+ " Mirror region size of %s is too small.",
+ vg->name, lvl->lv->name);
+ return 0;
+ }
+ }
+#endif
}
+ if (!vg_set_system_id(vg, clustered ? NULL : cmd->system_id))
+ return_0;
+
if (!vg_set_clustered(vg, clustered))
return_0;
@@ -471,9 +520,362 @@ static int _vgchange_profile(struct cmd_context *cmd,
return 1;
}
+static int _vgchange_locktype(struct cmd_context *cmd,
+ struct volume_group *vg)
+{
+ const char *lock_type = arg_str_value(cmd, locktype_ARG, NULL);
+ struct lv_list *lvl;
+ struct logical_volume *lv;
+ int lv_lock_count = 0;
+
+ /*
+ * This is a special/forced exception to change the lock type to none.
+ * It's needed for recovery cases and skips the normal steps of undoing
+ * the current lock type. It's a way to forcibly get access to a VG
+ * when the normal locking mechanisms are not working.
+ *
+ * It ignores: the current lvm locking config, lvmlockd, the state of
+ * the vg on other hosts, etc. It is meant to just remove any locking
+ * related metadata from the VG (cluster/lock_type flags, lock_type,
+ * lock_args).
+ *
+ * This can be necessary when manually recovering from certain failures.
+ * e.g. when a pv is lost containing the lvmlock lv (holding sanlock
+ * leases), the vg lock_type needs to be changed to none, and then
+ * back to sanlock, which recreates the lvmlock lv and leases.
+ */
+ if (!strcmp(lock_type, "none") && arg_is_set(cmd, force_ARG)) {
+ if (yes_no_prompt("Forcibly change VG %s lock type to none? [y/n]: ", vg->name) == 'n') {
+ log_error("VG lock type not changed.");
+ return 0;
+ }
+
+ vg->status &= ~CLUSTERED;
+ vg->lock_type = "none";
+ vg->lock_args = NULL;
+
+ dm_list_iterate_items(lvl, &vg->lvs)
+ lvl->lv->lock_args = NULL;
+
+ return 1;
+ }
+
+ if (!vg->lock_type) {
+ if (vg_is_clustered(vg))
+ vg->lock_type = "clvm";
+ else
+ vg->lock_type = "none";
+ }
+
+ if (!strcmp(vg->lock_type, lock_type)) {
+ log_warn("New lock_type %s matches the current lock_type %s.",
+ lock_type, vg->lock_type);
+ return 1;
+ }
+
+ /*
+ * When lvm is currently using clvm, this function is just an alternative
+ * to vgchange -c{y,n}, and can:
+ * - change none to clvm
+ * - change clvm to none
+ * - it CANNOT change to or from a lockd type
+ */
+ if (locking_is_clustered()) {
+ if (is_lockd_type(lock_type)) {
+ log_error("Changing to lock type %s requires lvmlockd.", lock_type);
+ return 0;
+ }
+
+ return _vgchange_clustered(cmd, vg);
+ }
+
+ /*
+ * When lvm is currently using lvmlockd, this function can:
+ * - change none to lockd type
+ * - change none to clvm (with warning about not being able to use it)
+ * - change lockd type to none
+ * - change lockd type to clvm (with warning about not being able to use it)
+ * - change clvm to none
+ * - change clvm to lockd type
+ */
+
+ if (lvs_in_vg_activated(vg)) {
+ log_error("Changing VG %s lock type not allowed with active LVs",
+ vg->name);
+ return 0;
+ }
+
+ /* none to clvm */
+ if (!strcmp(vg->lock_type, "none") && !strcmp(lock_type, "clvm")) {
+ log_warn("New clvm lock type will not be usable with lvmlockd.");
+ vg->status |= CLUSTERED;
+ vg->lock_type = "clvm"; /* this is optional */
+ return 1;
+ }
+
+ /* clvm to none */
+ if (!strcmp(vg->lock_type, "clvm") && !strcmp(lock_type, "none")) {
+ vg->status &= ~CLUSTERED;
+ vg->lock_type = "none";
+ return 1;
+ }
+
+ /* clvm to ..., first undo clvm */
+ if (!strcmp(vg->lock_type, "clvm")) {
+ vg->status &= ~CLUSTERED;
+ }
+
+ /*
+ * lockd type to ..., first undo lockd type
+ *
+ * To allow this, we need to do:
+ * lockd_stop_vg();
+ * lockd_free_vg_before();
+ * lockd_free_vg_after();
+ */
+ if (is_lockd_type(vg->lock_type)) {
+ /* FIXME: implement full undoing of the lock_type */
+ log_error("Changing VG %s from lock type %s not yet allowed.",
+ vg->name, vg->lock_type);
+ return 0;
+ }
+
+ /* ... to clvm */
+ if (!strcmp(lock_type, "clvm")) {
+ log_warn("New clvm lock type will not be usable with lvmlockd.");
+ vg->status |= CLUSTERED;
+ vg->lock_type = "clvm"; /* this is optional */
+ vg->system_id = NULL;
+ return 1;
+ }
+
+ /* ... to lockd type */
+ if (is_lockd_type(lock_type)) {
+ /*
+ * For lock_type dlm, lockd_init_vg() will do a single
+ * vg_write() that sets lock_type, sets lock_args, clears
+ * system_id, and sets all LV lock_args to dlm.
+ * For lock_type sanlock, lockd_init_vg() needs to know
+ * how many LV locks are needed so that it can make the
+ * sanlock lv large enough.
+ */
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ lv = lvl->lv;
+
+ if (lockd_lv_uses_lock(lv)) {
+ lv_lock_count++;
+
+ if (!strcmp(lock_type, "dlm"))
+ lv->lock_args = "dlm";
+ }
+ }
+
+ /*
+ * See below. We cannot set valid LV lock_args until stage 1
+ * of the change is done, so we need to skip the validation of
+ * the lock_args during stage 1.
+ */
+ if (!strcmp(lock_type, "sanlock"))
+ vg->skip_validate_lock_args = 1;
+
+ vg->system_id = NULL;
+
+ if (!lockd_init_vg(cmd, vg, lock_type, lv_lock_count)) {
+ log_error("Failed to initialize lock args for lock type %s", lock_type);
+ return 0;
+ }
+
+ /*
+ * For lock_type sanlock, there must be multiple steps
+ * because the VG needs an active lvmlock LV before
+ * LV lock areas can be allocated, which must be done
+ * before LV lock_args are written. So, the LV lock_args
+ * remain unset during the first stage of the conversion.
+ *
+ * Stage 1:
+ * lockd_init_vg() creates and activates the lvmlock LV,
+ * then sets lock_type, sets lock_args, and clears system_id.
+ *
+ * Stage 2:
+ * We get here, and can now set LV lock_args. This uses
+ * the standard code path for allocating LV locks in
+ * vg_write() by setting LV lock_args to "pending",
+ * which tells vg_write() to call lockd_init_lv()
+ * and sets the lv->lock_args value before writing the VG.
+ */
+ if (!strcmp(lock_type, "sanlock")) {
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ lv = lvl->lv;
+ if (lockd_lv_uses_lock(lv))
+ lv->lock_args = "pending";
+ }
+
+ vg->skip_validate_lock_args = 0;
+ }
+
+ return 1;
+ }
+
+ log_error("Unknown lock type");
+ return 0;
+}
+
+/*
+ * This function will not be called unless the local host is allowed to use the
+ * VG. Either the VG has no system_id, or the VG and host have matching
+ * system_ids, or the host has the VG's current system_id in its
+ * extra_system_ids list. This function is not allowed to change the system_id
+ * of a foreign VG (VG owned by another host).
+ */
+static int _vgchange_system_id(struct cmd_context *cmd, struct volume_group *vg)
+{
+ const char *system_id;
+ const char *system_id_arg_str = arg_str_value(cmd, systemid_ARG, NULL);
+
+ /* FIXME Merge with vg_set_system_id() */
+ if (systemid_on_pvs(vg)) {
+ log_error("Metadata format %s does not support this type of system ID.",
+ vg->fid->fmt->name);
+ return 0;
+ }
+
+ if (!(system_id = system_id_from_string(cmd, system_id_arg_str))) {
+ log_error("Unable to set system ID.");
+ return 0;
+ }
+
+ if (!strcmp(vg->system_id, system_id)) {
+ log_error("Volume Group system ID is already \"%s\".", vg->system_id);
+ return 0;
+ }
+
+ if (!*system_id && cmd->system_id && strcmp(system_id, cmd->system_id)) {
+ log_warn("WARNING: Removing the system ID allows unsafe access from other hosts.");
+
+ if (!arg_count(cmd, yes_ARG) &&
+ yes_no_prompt("Remove system ID %s from volume group %s? [y/n]: ",
+ vg->system_id, vg->name) == 'n') {
+ log_error("System ID of volume group %s not changed.", vg->name);
+ return 0;
+ }
+ }
+
+ if (*system_id && (!cmd->system_id || strcmp(system_id, cmd->system_id))) {
+ if (lvs_in_vg_activated(vg)) {
+ log_error("Logical Volumes in VG %s must be deactivated before system ID can be changed.",
+ vg->name);
+ return 0;
+ }
+
+ if (cmd->system_id)
+ log_warn("WARNING: Requested system ID %s does not match local system ID %s.",
+ system_id, cmd->system_id ? : "");
+ else
+ log_warn("WARNING: No local system ID is set.");
+ log_warn("WARNING: Volume group %s might become inaccessible from this machine.",
+ vg->name);
+
+ if (!arg_count(cmd, yes_ARG) &&
+ yes_no_prompt("Set foreign system ID %s on volume group %s? [y/n]: ",
+ system_id, vg->name) == 'n') {
+ log_error("Volume group %s system ID not changed.", vg->name);
+ return 0;
+ }
+ }
+
+ log_verbose("Changing system ID for VG %s from \"%s\" to \"%s\".",
+ vg->name, vg->system_id, system_id);
+
+ vg->system_id = system_id;
+
+ if (vg->lvm1_system_id)
+ *vg->lvm1_system_id = '\0';
+
+ /* update system_id in lvmlockd's record for this vg */
+ if (!lockd_start_vg(cmd, vg))
+ log_debug("Failed to update lvmlockd.");
+
+ return 1;
+}
+
+static int _passes_lock_start_filter(struct cmd_context *cmd,
+ struct volume_group *vg,
+ const int cfg_id)
+{
+ const struct dm_config_node *cn;
+ const struct dm_config_value *cv;
+ const char *str;
+
+ /* undefined list means no restrictions, all vg names pass */
+
+ cn = find_config_tree_array(cmd, cfg_id, NULL);
+ if (!cn)
+ return 1;
+
+ /* with a defined list, the vg name must be included to pass */
+
+ for (cv = cn->v; cv; cv = cv->next) {
+ if (cv->type == DM_CFG_EMPTY_ARRAY)
+ break;
+ if (cv->type != DM_CFG_STRING) {
+ log_error("Ignoring invalid string in lock_start list");
+ continue;
+ }
+ str = cv->v.str;
+ if (!*str) {
+ log_error("Ignoring empty string in config file");
+ continue;
+ }
+
+ /* ignoring tags for now */
+
+ if (!strcmp(str, vg->name))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int _vgchange_lock_start(struct cmd_context *cmd, struct volume_group *vg)
+{
+ const char *start_opt = arg_str_value(cmd, lockopt_ARG, NULL);
+ int auto_opt = 0;
+
+ if (!is_lockd_type(vg->lock_type))
+ return 1;
+
+ if (arg_is_set(cmd, force_ARG))
+ goto do_start;
+
+ /*
+ * Recognize both "auto" and "autonowait" options.
+ * Any waiting is done at the end of vgchange.
+ */
+ if (start_opt && !strncmp(start_opt, "auto", 4))
+ auto_opt = 1;
+
+ if (!_passes_lock_start_filter(cmd, vg, activation_lock_start_list_CFG)) {
+ log_verbose("Not starting %s since it does not pass lock_start_list", vg->name);
+ return 1;
+ }
+
+ if (auto_opt && !_passes_lock_start_filter(cmd, vg, activation_auto_lock_start_list_CFG)) {
+ log_verbose("Not starting %s since it does not pass auto_lock_start_list", vg->name);
+ return 1;
+ }
+
+do_start:
+ return lockd_start_vg(cmd, vg);
+}
+
+static int _vgchange_lock_stop(struct cmd_context *cmd, struct volume_group *vg)
+{
+ return lockd_stop_vg(cmd, vg);
+}
+
static int vgchange_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
int ret = ECMD_PROCESSED;
unsigned i;
@@ -494,11 +896,14 @@ static int vgchange_single(struct cmd_context *cmd, const char *vg_name,
{ clustered_ARG, &_vgchange_clustered },
{ vgmetadatacopies_ARG, &_vgchange_metadata_copies },
{ metadataprofile_ARG, &_vgchange_profile },
- { profile_ARG, &_vgchange_profile},
- { detachprofile_ARG, &_vgchange_profile},
+ { profile_ARG, &_vgchange_profile },
+ { detachprofile_ARG, &_vgchange_profile },
+ { locktype_ARG, &_vgchange_locktype },
+ { systemid_ARG, &_vgchange_system_id },
};
- if (vg_is_exported(vg)) {
+ if (vg_is_exported(vg) &&
+ !(arg_is_set(cmd, lockstop_ARG) || arg_is_set(cmd, lockstart_ARG))) {
log_error("Volume group \"%s\" is exported", vg_name);
return ECMD_FAILED;
}
@@ -584,18 +989,116 @@ static int vgchange_single(struct cmd_context *cmd, const char *vg_name,
if (!_vgchange_background_polling(cmd, vg))
return_ECMD_FAILED;
+ if (arg_is_set(cmd, lockstart_ARG)) {
+ if (!_vgchange_lock_start(cmd, vg))
+ return_ECMD_FAILED;
+ } else if (arg_is_set(cmd, lockstop_ARG)) {
+ if (!_vgchange_lock_stop(cmd, vg))
+ return_ECMD_FAILED;
+ }
+
return ret;
}
+/*
+ * vgchange can do different things that require different
+ * locking, so look at each of those things here.
+ *
+ * Set up overrides for the default VG locking for various special cases.
+ * The VG lock will be acquired in process_each_vg.
+ *
+ * Acquire the gl lock according to which kind of vgchange command this is.
+ */
+
+static int _lockd_vgchange(struct cmd_context *cmd, int argc, char **argv)
+{
+ /* The default vg lock mode is ex, but these options only need sh. */
+
+ if (!lvmlockd_use() && arg_is_set(cmd, locktype_ARG)) {
+ log_error("Using lock type requires lvmlockd.");
+ return 0;
+ }
+
+ if (!lvmlockd_use() && (arg_is_set(cmd, lockstart_ARG) || arg_is_set(cmd, lockstop_ARG))) {
+ log_error("Using lock start and lock stop requires lvmlockd.");
+ return 0;
+ }
+
+ if (arg_is_set(cmd, activate_ARG) || arg_is_set(cmd, refresh_ARG)) {
+ cmd->lockd_vg_default_sh = 1;
+ /* Allow deactivating if locks fail. */
+ if (is_change_activating((activation_change_t)arg_uint_value(cmd, activate_ARG, CHANGE_AY)))
+ cmd->lockd_vg_enforce_sh = 1;
+ }
+
+ /* Starting a vg lockspace means there are no locks available yet. */
+
+ if (arg_is_set(cmd, lockstart_ARG))
+ cmd->lockd_vg_disable = 1;
+
+ /*
+ * In most cases, lockd_vg does not apply when changing lock type.
+ * (We don't generally allow changing *from* lockd type yet.)
+ * lockd_vg could be called within _vgchange_locktype as needed.
+ */
+
+ if (arg_is_set(cmd, locktype_ARG))
+ cmd->lockd_vg_disable = 1;
+
+ /*
+ * Changing system_id or lock_type must only be done on explicitly
+ * named vgs.
+ */
+
+ if (arg_is_set(cmd, systemid_ARG) || arg_is_set(cmd, locktype_ARG))
+ cmd->command->flags &= ~ALL_VGS_IS_DEFAULT;
+
+ if (arg_is_set(cmd, lockstart_ARG)) {
+ /*
+ * The lockstart condition takes the global lock to serialize
+ * with any other host that tries to remove the VG while this
+ * tries to start it. (Zero argc means all VGs, in wich case
+ * process_each_vg will acquire the global lock.)
+ */
+ if (argc && !lockd_gl(cmd, "sh", 0))
+ return_ECMD_FAILED;
+
+ } else if (arg_is_set(cmd, systemid_ARG) || arg_is_set(cmd, locktype_ARG)) {
+ /*
+ * This is a special case where taking the global lock is
+ * not needed to protect global state, because the change is
+ * only to an existing VG. But, taking the global lock ex is
+ * helpful in this case to trigger a global cache validation
+ * on other hosts, to cause them to see the new system_id or
+ * lock_type.
+ */
+ if (!lockd_gl(cmd, "ex", LDGL_UPDATE_NAMES))
+ return_ECMD_FAILED;
+ }
+
+ return 1;
+}
+
int vgchange(struct cmd_context *cmd, int argc, char **argv)
{
- /* Update commands that can be combined */
+ uint32_t flags = 0;
+ int ret;
+
+ int noupdate =
+ arg_count(cmd, activate_ARG) ||
+ arg_count(cmd, lockstart_ARG) ||
+ arg_count(cmd, lockstop_ARG) ||
+ arg_count(cmd, monitor_ARG) ||
+ arg_count(cmd, poll_ARG) ||
+ arg_count(cmd, refresh_ARG);
+
int update_partial_safe =
arg_count(cmd, deltag_ARG) ||
arg_count(cmd, addtag_ARG) ||
arg_count(cmd, metadataprofile_ARG) ||
arg_count(cmd, profile_ARG) ||
arg_count(cmd, detachprofile_ARG);
+
int update_partial_unsafe =
arg_count(cmd, logicalvolume_ARG) ||
arg_count(cmd, maxphysicalvolumes_ARG) ||
@@ -604,18 +1107,14 @@ int vgchange(struct cmd_context *cmd, int argc, char **argv)
arg_count(cmd, physicalextentsize_ARG) ||
arg_count(cmd, clustered_ARG) ||
arg_count(cmd, alloc_ARG) ||
- arg_count(cmd, vgmetadatacopies_ARG);
+ arg_count(cmd, vgmetadatacopies_ARG) ||
+ arg_count(cmd, locktype_ARG) ||
+ arg_count(cmd, systemid_ARG);
+
int update = update_partial_safe || update_partial_unsafe;
- if (!update &&
- !arg_count(cmd, activate_ARG) &&
- !arg_count(cmd, monitor_ARG) &&
- !arg_count(cmd, poll_ARG) &&
- !arg_count(cmd, refresh_ARG)) {
- log_error("Need 1 or more of -a, -c, -l, -p, -s, -x, "
- "--refresh, --uuid, --alloc, --addtag, --deltag, "
- "--monitor, --poll, --vgmetadatacopies or "
- "--metadatacopies");
+ if (!update && !noupdate) {
+ log_error("Need one or more command options.");
return EINVALID_CMD_LINE;
}
@@ -705,6 +1204,40 @@ int vgchange(struct cmd_context *cmd, int argc, char **argv)
if (!update || !update_partial_unsafe)
cmd->handles_missing_pvs = 1;
- return process_each_vg(cmd, argc, argv, update ? READ_FOR_UPDATE : 0,
- NULL, &vgchange_single);
+ /*
+ * Include foreign VGs that contain active LVs.
+ * That shouldn't happen in general, but if it does by some
+ * mistake, then we want to allow those LVs to be deactivated.
+ */
+ if (arg_is_set(cmd, activate_ARG))
+ cmd->include_active_foreign_vgs = 1;
+
+ if (!_lockd_vgchange(cmd, argc, argv))
+ return_ECMD_FAILED;
+
+ if (update)
+ flags |= READ_FOR_UPDATE;
+ if (arg_is_set(cmd, lockstart_ARG) || arg_is_set(cmd, lockstop_ARG))
+ flags |= READ_ALLOW_EXPORTED;
+
+ ret = process_each_vg(cmd, argc, argv, flags, NULL, &vgchange_single);
+
+ /* Wait for lock-start ops that were initiated in vgchange_lockstart. */
+
+ if (arg_is_set(cmd, lockstart_ARG)) {
+ const char *start_opt = arg_str_value(cmd, lockopt_ARG, NULL);
+
+ if (!lockd_gl(cmd, "un", 0))
+ stack;
+
+ if (!start_opt || !strcmp(start_opt, "auto")) {
+ log_print_unless_silent("Starting locking. Waiting until locks are ready...");
+ lockd_start_wait(cmd);
+
+ } else if (!strcmp(start_opt, "nowait") || !strcmp(start_opt, "autonowait")) {
+ log_print_unless_silent("Starting locking. VG can only be read until locks are ready.");
+ }
+ }
+
+ return ret;
}
diff --git a/tools/vgck.c b/tools/vgck.c
index b9d05ae1c..16312891b 100644
--- a/tools/vgck.c
+++ b/tools/vgck.c
@@ -18,7 +18,7 @@
static int vgck_single(struct cmd_context *cmd __attribute__((unused)),
const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
if (!vg_check_status(vg, EXPORTED_VG))
return_ECMD_FAILED;
diff --git a/tools/vgconvert.c b/tools/vgconvert.c
index 10893441c..c4bb37a23 100644
--- a/tools/vgconvert.c
+++ b/tools/vgconvert.c
@@ -17,7 +17,7 @@
static int vgconvert_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
struct physical_volume *pv, *existing_pv;
struct pvcreate_restorable_params rp;
@@ -94,6 +94,13 @@ static int vgconvert_single(struct cmd_context *cmd, const char *vg_name,
return ECMD_FAILED;
}
+ /* New-style system ID supported? */
+ if (vg->system_id && *vg->system_id && (cmd->fmt->features & FMT_SYSTEMID_ON_PVS)) {
+ log_error("Unable to convert VG %s while it has a system ID set (%s).", vg->name,
+ vg->system_id);
+ return ECMD_FAILED;
+ }
+
/* Attempt to change any LVIDs that are too big */
if (cmd->fmt->features & FMT_RESTRICTED_LVIDS) {
dm_list_iterate_items(lvl, &vg->lvs) {
@@ -207,6 +214,11 @@ int vgconvert(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
+ if (arg_is_set(cmd, metadatatype_ARG) && lvmetad_used()) {
+ log_error("lvmetad must be disabled to change metadata types.");
+ return EINVALID_CMD_LINE;
+ }
+
if (arg_int_value(cmd, labelsector_ARG, 0) >= LABEL_SCAN_SECTORS) {
log_error("labelsector must be less than %lu",
LABEL_SCAN_SECTORS);
diff --git a/tools/vgcreate.c b/tools/vgcreate.c
index 01bf421f6..67b593dcc 100644
--- a/tools/vgcreate.c
+++ b/tools/vgcreate.c
@@ -37,7 +37,7 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
argv++;
pvcreate_params_set_defaults(&pp);
- if (!pvcreate_params_validate(cmd, argc, argv, &pp)) {
+ if (!pvcreate_params_validate(cmd, argc, &pp)) {
return EINVALID_CMD_LINE;
}
@@ -50,6 +50,13 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
if (!vgcreate_params_validate(cmd, &vp_new))
return EINVALID_CMD_LINE;
+ /*
+ * Needed to change the global VG namespace,
+ * and to change the set of orphan PVs.
+ */
+ if (!lockd_gl_create(cmd, "ex", vp_new.lock_type))
+ return ECMD_FAILED;
+
lvmcache_seed_infos_from_lvmetad(cmd);
/* Create the new VG */
@@ -71,6 +78,7 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
!vg_set_max_pv(vg, vp_new.max_pv) ||
!vg_set_alloc_policy(vg, vp_new.alloc) ||
!vg_set_clustered(vg, vp_new.clustered) ||
+ !vg_set_system_id(vg, vp_new.system_id) ||
!vg_set_mda_copies(vg, vp_new.vgmetadatacopies))
goto bad_orphan;
@@ -118,14 +126,55 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
if (!vg_write(vg) || !vg_commit(vg))
goto_bad;
+ /*
+ * The VG is initially written without lock_type set, i.e. it starts as
+ * a local VG. lockd_init_vg() then writes the VG a second time with
+ * both lock_type and lock_args set.
+ */
+ if (!lockd_init_vg(cmd, vg, vp_new.lock_type, 0)) {
+ log_error("Failed to initialize lock args for lock type %s",
+ vp_new.lock_type);
+ vg_remove_pvs(vg);
+ vg_remove_direct(vg);
+ goto_bad;
+ }
+
unlock_vg(cmd, VG_ORPHANS);
unlock_vg(cmd, vp_new.vg_name);
backup(vg);
- log_print_unless_silent("%s%colume group \"%s\" successfully created",
- clustered_message, *clustered_message ? 'v' : 'V', vg->name);
+ log_print_unless_silent("%s%colume group \"%s\" successfully created%s%s",
+ clustered_message, *clustered_message ? 'v' : 'V', vg->name,
+ vg->system_id ? " with system ID " : "", vg->system_id ? : "");
+
+ /*
+ * Start the VG lockspace because it will likely be used right away.
+ * Optionally wait for the start to complete so the VG can be fully
+ * used after this command completes (otherwise, the VG can only be
+ * read without locks until the lockspace is done starting.)
+ */
+ if (is_lockd_type(vg->lock_type)) {
+ const char *start_opt = arg_str_value(cmd, lockopt_ARG, NULL);
+
+ if (!lockd_start_vg(cmd, vg)) {
+ log_error("Failed to start locking");
+ goto out;
+ }
+ lockd_gl(cmd, "un", 0);
+
+ if (!start_opt || !strcmp(start_opt, "wait")) {
+ /* It is OK if the user does Ctrl-C to cancel the wait. */
+ log_print_unless_silent("Starting locking. Waiting until locks are ready...");
+ lockd_start_wait(cmd);
+
+ } else if (!strcmp(start_opt, "nowait")) {
+ log_print_unless_silent("Starting locking. VG is read-only until locks are ready.");
+ }
+
+ }
+out:
release_vg(vg);
return ECMD_PROCESSED;
diff --git a/tools/vgdisplay.c b/tools/vgdisplay.c
index 59d741ad8..0127e7cff 100644
--- a/tools/vgdisplay.c
+++ b/tools/vgdisplay.c
@@ -17,12 +17,13 @@
static int vgdisplay_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
if (arg_count(cmd, activevolumegroups_ARG) && !lvs_in_vg_activated(vg))
return ECMD_PROCESSED;
- vg_check_status(vg, EXPORTED_VG);
+ if (!vg_check_status(vg, EXPORTED_VG))
+ stack;
if (arg_count(cmd, colon_ARG)) {
vgdisplay_colons(vg);
@@ -66,7 +67,6 @@ int vgdisplay(struct cmd_context *cmd, int argc, char **argv)
arg_count(cmd, binary_ARG) ||
arg_count(cmd, noheadings_ARG) ||
arg_count(cmd, options_ARG) ||
- arg_count(cmd, select_ARG) ||
arg_count(cmd, separator_ARG) ||
arg_count(cmd, sort_ARG) || arg_count(cmd, unbuffered_ARG)) {
log_error("Incompatible options selected");
diff --git a/tools/vgexport.c b/tools/vgexport.c
index d9f8efadc..566cc96e4 100644
--- a/tools/vgexport.c
+++ b/tools/vgexport.c
@@ -18,7 +18,7 @@
static int vgexport_single(struct cmd_context *cmd __attribute__((unused)),
const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
struct pv_list *pvl;
@@ -28,10 +28,29 @@ static int vgexport_single(struct cmd_context *cmd __attribute__((unused)),
goto bad;
}
+ if (is_lockd_type(vg->lock_type)) {
+ struct lv_list *lvl;
+ dm_list_iterate_items(lvl, &vg->lvs) {
+ if (!lockd_lv_uses_lock(lvl->lv))
+ continue;
+
+ if (!lockd_lv(cmd, lvl->lv, "ex", 0)) {
+ log_error("LV %s/%s must be inactive on all hosts before vgexport.",
+ vg->name, display_lvname(lvl->lv));
+ goto bad;
+ }
+
+ if (!lockd_lv(cmd, lvl->lv, "un", 0))
+ goto bad;
+ }
+ }
+
+
if (!archive(vg))
goto_bad;
vg->status |= EXPORTED_VG;
+ vg->system_id = NULL;
dm_list_iterate_items(pvl, &vg->pvs)
pvl->pv->status |= EXPORTED_VG;
@@ -51,12 +70,12 @@ bad:
int vgexport(struct cmd_context *cmd, int argc, char **argv)
{
- if (!argc && !arg_count(cmd, all_ARG)) {
- log_error("Please supply volume groups or use -a for all.");
+ if (!argc && !arg_count(cmd, all_ARG) && !arg_is_set(cmd, select_ARG)) {
+ log_error("Please supply volume groups or use --select for selection or use -a for all.");
return EINVALID_CMD_LINE;
}
- if (argc && arg_count(cmd, all_ARG)) {
+ if (arg_count(cmd, all_ARG) && (argc || arg_is_set(cmd, select_ARG))) {
log_error("No arguments permitted when using -a for all.");
return EINVALID_CMD_LINE;
}
diff --git a/tools/vgextend.c b/tools/vgextend.c
index 2dc169980..581c21127 100644
--- a/tools/vgextend.c
+++ b/tools/vgextend.c
@@ -15,7 +15,13 @@
#include "tools.h"
-static int _restore_pv(struct volume_group *vg, char *pv_name)
+struct vgextend_params {
+ struct pvcreate_params pp;
+ int pv_count;
+ const char *const *pv_names;
+};
+
+static int _restore_pv(struct volume_group *vg, const char *pv_name)
{
struct pv_list *pvl = NULL;
pvl = find_pv_in_vg(vg, pv_name);
@@ -38,13 +44,92 @@ static int _restore_pv(struct volume_group *vg, char *pv_name)
return 1;
}
+static int _vgextend_restoremissing(struct cmd_context *cmd __attribute__((unused)),
+ const char *vg_name, struct volume_group *vg,
+ struct processing_handle *handle)
+{
+ struct vgextend_params *vp = (struct vgextend_params *) handle->custom_handle;
+ int fixed = 0;
+ int i;
+
+ for (i = 0; i < vp->pv_count; i++)
+ if (_restore_pv(vg, vp->pv_names[i]))
+ fixed++;
+
+ if (!fixed) {
+ log_error("No PV has been restored.");
+ return ECMD_FAILED;
+ }
+
+ if (!vg_write(vg) || !vg_commit(vg))
+ return_ECMD_FAILED;
+
+ backup(vg);
+
+ log_print_unless_silent("Volume group \"%s\" successfully extended", vg_name);
+
+ return ECMD_PROCESSED;
+}
+
+static int _vgextend_single(struct cmd_context *cmd, const char *vg_name,
+ struct volume_group *vg, struct processing_handle *handle)
+{
+ struct vgextend_params *vp = (struct vgextend_params *) handle->custom_handle;
+ struct pvcreate_params *pp = &vp->pp;
+ uint32_t mda_copies;
+ uint32_t mda_used;
+ int ret = ECMD_FAILED;
+
+ if (arg_count(cmd, metadataignore_ARG) &&
+ (pp->force == PROMPT) && !pp->yes &&
+ (vg_mda_copies(vg) != VGMETADATACOPIES_UNMANAGED) &&
+ (yes_no_prompt("Override preferred number of copies of VG %s metadata? [y/n]: ", vg_name) == 'n')) {
+ log_error("Volume group %s not changed", vg_name);
+ return ECMD_FAILED;
+ }
+
+ if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
+ log_error("Can't get lock for orphan PVs");
+ return ECMD_FAILED;
+ }
+
+ if (!vg_extend(vg, vp->pv_count, vp->pv_names, pp))
+ goto_out;
+
+ if (arg_count(cmd, metadataignore_ARG)) {
+ mda_copies = vg_mda_copies(vg);
+ mda_used = vg_mda_used_count(vg);
+
+ if ((mda_copies != VGMETADATACOPIES_UNMANAGED) &&
+ (mda_copies != mda_used)) {
+ log_warn("WARNING: Changing preferred number of copies of VG %s metadata from %"PRIu32" to %"PRIu32,
+ vg_name, mda_copies, mda_used);
+ vg_set_mda_copies(vg, mda_used);
+ }
+ }
+
+ log_verbose("Volume group \"%s\" will be extended by %d new physical volumes", vg_name, vp->pv_count);
+
+ if (!vg_write(vg) || !vg_commit(vg))
+ goto_out;
+
+ backup(vg);
+
+ log_print_unless_silent("Volume group \"%s\" successfully extended", vg_name);
+ ret = ECMD_PROCESSED;
+
+out:
+ unlock_vg(cmd, VG_ORPHANS);
+
+ return ret;
+}
+
int vgextend(struct cmd_context *cmd, int argc, char **argv)
{
- const char *vg_name;
- struct volume_group *vg = NULL;
- int r = ECMD_FAILED;
- struct pvcreate_params pp;
- int fixed = 0, i = 0;
+ struct vgextend_params vp;
+ unsigned restoremissing = arg_is_set(cmd, restoremissing_ARG);
+ struct processing_handle *handle;
+ int ret;
if (!argc) {
log_error("Please enter volume group name and "
@@ -52,19 +137,25 @@ int vgextend(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
- vg_name = skip_dev_dir(cmd, argv[0], NULL);
- argc--;
- argv++;
-
if (arg_count(cmd, metadatacopies_ARG)) {
log_error("Invalid option --metadatacopies, "
"use --pvmetadatacopies instead.");
return EINVALID_CMD_LINE;
}
- pvcreate_params_set_defaults(&pp);
- if (!pvcreate_params_validate(cmd, argc, argv, &pp)) {
- return EINVALID_CMD_LINE;
- }
+
+ pvcreate_params_set_defaults(&vp.pp);
+ vp.pv_count = argc - 1;
+ vp.pv_names = (const char* const*)(argv + 1);
+
+ if (!pvcreate_params_validate(cmd, vp.pv_count, &vp.pp))
+ return_EINVALID_CMD_LINE;
+
+ if (!(handle = init_processing_handle(cmd))) {
+ log_error("Failed to initialize processing handle.");
+ return ECMD_FAILED;
+ }
+
+ handle->custom_handle = &vp;
/*
* It is always ok to add new PVs to a VG - even if there are
@@ -74,71 +165,15 @@ int vgextend(struct cmd_context *cmd, int argc, char **argv)
*/
cmd->handles_missing_pvs = 1;
- log_verbose("Checking for volume group \"%s\"", vg_name);
- vg = vg_read_for_update(cmd, vg_name, NULL, 0);
- if (vg_read_error(vg)) {
- release_vg(vg);
+ /* Needed to change the set of orphan PVs. */
+ if (!lockd_gl(cmd, "ex", 0))
return_ECMD_FAILED;
- }
- if (!archive(vg))
- goto_bad;
+ ret = process_each_vg(cmd, argc, argv,
+ READ_FOR_UPDATE | ONE_VGNAME_ARG, handle,
+ restoremissing ? &_vgextend_restoremissing : &_vgextend_single);
- if (arg_count(cmd, restoremissing_ARG)) {
- for (i = 0; i < argc; ++i) {
- if (_restore_pv(vg, argv[i]))
- ++ fixed;
- }
- if (!fixed) {
- log_error("No PV has been restored.");
- goto bad;
- }
- } else { /* no --restore, normal vgextend */
- if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE, NULL)) {
- log_error("Can't get lock for orphan PVs");
- unlock_and_release_vg(cmd, vg, vg_name);
- return ECMD_FAILED;
- }
-
- if (arg_count(cmd, metadataignore_ARG) &&
- (vg_mda_copies(vg) != VGMETADATACOPIES_UNMANAGED) &&
- (pp.force == PROMPT) && !pp.yes &&
- yes_no_prompt("Override preferred number of copies "
- "of VG %s metadata? [y/n]: ",
- vg_name) == 'n') {
- log_error("Volume group %s not changed", vg_name);
- goto bad;
- }
-
- /* extend vg */
- if (!vg_extend(vg, argc, (const char* const*)argv, &pp))
- goto_bad;
-
- if (arg_count(cmd, metadataignore_ARG) &&
- (vg_mda_copies(vg) != VGMETADATACOPIES_UNMANAGED) &&
- (vg_mda_copies(vg) != vg_mda_used_count(vg))) {
- log_warn("WARNING: Changing preferred number of copies of VG %s "
- "metadata from %"PRIu32" to %"PRIu32, vg_name,
- vg_mda_copies(vg), vg_mda_used_count(vg));
- vg_set_mda_copies(vg, vg_mda_used_count(vg));
- }
-
- /* ret > 0 */
- log_verbose("Volume group \"%s\" will be extended by %d new "
- "physical volumes", vg_name, argc);
- }
-
- /* store vg on disk(s) */
- if (!vg_write(vg) || !vg_commit(vg))
- goto_bad;
-
- backup(vg);
- log_print_unless_silent("Volume group \"%s\" successfully extended", vg_name);
- r = ECMD_PROCESSED;
+ destroy_processing_handle(cmd, handle);
-bad:
- if (!arg_count(cmd, restoremissing_ARG))
- unlock_vg(cmd, VG_ORPHANS);
- unlock_and_release_vg(cmd, vg, vg_name);
- return r;
+ return ret;
}
diff --git a/tools/vgimport.c b/tools/vgimport.c
index 7cb93378f..04a59dce5 100644
--- a/tools/vgimport.c
+++ b/tools/vgimport.c
@@ -15,10 +15,10 @@
#include "tools.h"
-static int vgimport_single(struct cmd_context *cmd __attribute__((unused)),
+static int vgimport_single(struct cmd_context *cmd,
const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
struct pv_list *pvl;
struct physical_volume *pv;
@@ -38,6 +38,9 @@ static int vgimport_single(struct cmd_context *cmd __attribute__((unused)),
vg->status &= ~EXPORTED_VG;
+ if (!is_lockd_type(vg->lock_type))
+ vg->system_id = cmd->system_id ? dm_pool_strdup(vg->vgmem, cmd->system_id) : NULL;
+
dm_list_iterate_items(pvl, &vg->pvs) {
pv = pvl->pv;
pv->status &= ~EXPORTED_VG;
@@ -58,12 +61,12 @@ bad:
int vgimport(struct cmd_context *cmd, int argc, char **argv)
{
- if (!argc && !arg_count(cmd, all_ARG)) {
- log_error("Please supply volume groups or use -a for all.");
+ if (!argc && !arg_count(cmd, all_ARG) && !arg_is_set(cmd, select_ARG)) {
+ log_error("Please supply volume groups or -S for selection or use -a for all.");
return EINVALID_CMD_LINE;
}
- if (argc && arg_count(cmd, all_ARG)) {
+ if (arg_count(cmd, all_ARG) && (argc || arg_is_set(cmd, select_ARG))) {
log_error("No arguments permitted when using -a for all.");
return EINVALID_CMD_LINE;
}
@@ -84,6 +87,17 @@ int vgimport(struct cmd_context *cmd, int argc, char **argv)
cmd->handles_missing_pvs = 1;
}
+ /*
+ * Rescan devices and update lvmetad. lvmetad may hold a copy of the
+ * VG from before it was exported, if it was exported by another host.
+ * We need to reread it to see that it's been exported before we can
+ * import it.
+ */
+ if (lvmetad_active() && !lvmetad_pvscan_all_devs(cmd, NULL)) {
+ log_error("Failed to scan devices.");
+ return ECMD_FAILED;
+ }
+
return process_each_vg(cmd, argc, argv,
READ_FOR_UPDATE | READ_ALLOW_EXPORTED,
NULL,
diff --git a/tools/vgmerge.c b/tools/vgmerge.c
index a17a636c5..c5ac33299 100644
--- a/tools/vgmerge.c
+++ b/tools/vgmerge.c
@@ -20,11 +20,18 @@ static struct volume_group *_vgmerge_vg_read(struct cmd_context *cmd,
{
struct volume_group *vg;
log_verbose("Checking for volume group \"%s\"", vg_name);
- vg = vg_read_for_update(cmd, vg_name, NULL, 0);
+ vg = vg_read_for_update(cmd, vg_name, NULL, 0, 0);
if (vg_read_error(vg)) {
release_vg(vg);
return NULL;
}
+
+ if (is_lockd_type(vg->lock_type)) {
+ log_error("vgmerge not allowed for lock_type %s", vg->lock_type);
+ unlock_and_release_vg(cmd, vg, vg_name);
+ return NULL;
+ }
+
return vg;
}
@@ -194,6 +201,10 @@ int vgmerge(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
+ /* Needed change the global VG namespace. */
+ if (!lockd_gl(cmd, "ex", LDGL_UPDATE_NAMES))
+ return ECMD_FAILED;
+
vg_name_to = skip_dev_dir(cmd, argv[0], NULL);
argc--;
argv++;
diff --git a/tools/vgmknodes.c b/tools/vgmknodes.c
index 9ba06778f..92eb2d0d1 100644
--- a/tools/vgmknodes.c
+++ b/tools/vgmknodes.c
@@ -16,7 +16,7 @@
#include "tools.h"
static int _vgmknodes_single(struct cmd_context *cmd, struct logical_volume *lv,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
if (arg_count(cmd, refresh_ARG) && lv_is_visible(lv))
if (!lv_refresh(cmd, lv))
diff --git a/tools/vgreduce.c b/tools/vgreduce.c
index 7af5a7683..acd27a5e1 100644
--- a/tools/vgreduce.c
+++ b/tools/vgreduce.c
@@ -83,10 +83,12 @@ static int _make_vg_consistent(struct cmd_context *cmd, struct volume_group *vg)
restart:
vg_mark_partial_lvs(vg, 1);
+PFL();
dm_list_iterate_items(lvl, &vg->lvs) {
lv = lvl->lv;
+PFLA("lv=%s", display_lvname(lv));
/* Are any segments of this LV on missing PVs? */
if (lv->status & PARTIAL_LV) {
if (seg_is_raid(first_seg(lv))) {
@@ -123,7 +125,7 @@ static int _make_vg_consistent(struct cmd_context *cmd, struct volume_group *vg)
/* Or take pv_name instead? */
static int _vgreduce_single(struct cmd_context *cmd, struct volume_group *vg,
struct physical_volume *pv,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
int r;
@@ -141,6 +143,7 @@ int vgreduce(struct cmd_context *cmd, int argc, char **argv)
{
struct volume_group *vg;
const char *vg_name;
+ uint32_t lockd_state = 0;
int ret = ECMD_FAILED;
int fixed = 1;
int repairing = arg_count(cmd, removemissing_ARG);
@@ -195,7 +198,14 @@ int vgreduce(struct cmd_context *cmd, int argc, char **argv)
init_ignore_suspended_devices(1);
cmd->handles_missing_pvs = 1;
- vg = vg_read_for_update(cmd, vg_name, NULL, READ_ALLOW_EXPORTED);
+ /* Needed to change the set of orphan PVs. */
+ if (!lockd_gl(cmd, "ex", 0))
+ return_ECMD_FAILED;
+
+ if (!lockd_vg(cmd, vg_name, "ex", 0, &lockd_state))
+ return_ECMD_FAILED;
+
+ vg = vg_read_for_update(cmd, vg_name, NULL, READ_ALLOW_EXPORTED, lockd_state);
if (vg_read_error(vg) == FAILED_ALLOCATION ||
vg_read_error(vg) == FAILED_NOTFOUND)
goto_out;
@@ -218,7 +228,7 @@ int vgreduce(struct cmd_context *cmd, int argc, char **argv)
log_verbose("Trying to open VG %s for recovery...", vg_name);
vg = vg_read_for_update(cmd, vg_name, NULL,
- READ_ALLOW_INCONSISTENT | READ_ALLOW_EXPORTED);
+ READ_ALLOW_INCONSISTENT | READ_ALLOW_EXPORTED, lockd_state);
locked |= !vg_read_error(vg);
diff --git a/tools/vgremove.c b/tools/vgremove.c
index 1dce41f24..692d11461 100644
--- a/tools/vgremove.c
+++ b/tools/vgremove.c
@@ -17,9 +17,22 @@
static int vgremove_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
/*
+ * Though vgremove operates per VG by definition, internally, it
+ * actually means iterating over each LV it contains to do the remove.
+ *
+ * Use processing handle with void_handle.internal_report_for_select=0
+ * for the process_each_lv_in_vg that is called later in this fn.
+ * We need to disable internal selection for process_each_lv_in_vg
+ * here as selection is already done by process_each_vg which calls
+ * vgremove_single. Otherwise selection would be done per-LV and
+ * not per-VG as we intend!
+ */
+ struct processing_handle void_handle = {0};
+
+ /*
* Single force is equivalent to sinle --yes
* Even multiple --yes are equivalent to single --force
* When we require -ff it cannot be replaces with -f -y
@@ -47,13 +60,17 @@ static int vgremove_single(struct cmd_context *cmd, const char *vg_name,
return ECMD_FAILED;
}
}
- if ((ret = process_each_lv_in_vg(cmd, vg, NULL, NULL, 1, NULL,
+
+ if ((ret = process_each_lv_in_vg(cmd, vg, NULL, NULL, 1, &void_handle,
(process_single_lv_fn_t)lvremove_single)) != ECMD_PROCESSED) {
stack;
return ret;
}
}
+ if (!lockd_free_vg_before(cmd, vg))
+ return_ECMD_FAILED;
+
if (!force && !vg_remove_check(vg))
return_ECMD_FAILED;
@@ -62,6 +79,8 @@ static int vgremove_single(struct cmd_context *cmd, const char *vg_name,
if (!vg_remove(vg))
return_ECMD_FAILED;
+ lockd_free_vg_final(cmd, vg);
+
return ECMD_PROCESSED;
}
@@ -69,11 +88,26 @@ int vgremove(struct cmd_context *cmd, int argc, char **argv)
{
int ret;
- if (!argc) {
- log_error("Please enter one or more volume group paths");
+ if (!argc && !arg_is_set(cmd, select_ARG)) {
+ log_error("Please enter one or more volume group paths "
+ "or use --select for selection.");
return EINVALID_CMD_LINE;
}
+ /*
+ * Needed to change the global VG namespace,
+ * and to change the set of orphan PVs.
+ */
+ if (!lockd_gl(cmd, "ex", LDGL_UPDATE_NAMES))
+ return ECMD_FAILED;
+
+ /*
+ * This is a special case: if vgremove is given a tag, it causes
+ * process_each_vg to do lockd_gl(sh) when getting a list of all
+ * VG names. We don't want the gl converted to sh, so disable it.
+ */
+ cmd->lockd_gl_disable = 1;
+
cmd->handles_missing_pvs = 1;
ret = process_each_vg(cmd, argc, argv,
READ_FOR_UPDATE,
diff --git a/tools/vgrename.c b/tools/vgrename.c
index 860ccf196..7e77b5726 100644
--- a/tools/vgrename.c
+++ b/tools/vgrename.c
@@ -17,13 +17,14 @@
static struct volume_group *_get_old_vg_for_rename(struct cmd_context *cmd,
const char *vg_name_old,
- const char *vgid)
+ const char *vgid,
+ uint32_t lockd_state)
{
struct volume_group *vg;
/* FIXME we used to print an error about EXPORTED, but proceeded
nevertheless. */
- vg = vg_read_for_update(cmd, vg_name_old, vgid, READ_ALLOW_EXPORTED);
+ vg = vg_read_for_update(cmd, vg_name_old, vgid, READ_ALLOW_EXPORTED, lockd_state);
if (vg_read_error(vg)) {
release_vg(vg);
return_NULL;
@@ -67,6 +68,7 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
const char *vgid = NULL, *vg_name, *vg_name_old;
char old_path[NAME_LEN], new_path[NAME_LEN];
struct volume_group *vg = NULL;
+ uint32_t lockd_state = 0;
int lock_vg_old_first = 1;
vg_name_old = skip_dev_dir(cmd, old_vg_path, NULL);
@@ -114,11 +116,14 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
} else
vgid = NULL;
+ if (!lockd_vg(cmd, vg_name_old, "ex", 0, &lockd_state))
+ return_0;
+
if (strcmp(vg_name_new, vg_name_old) < 0)
lock_vg_old_first = 0;
if (lock_vg_old_first) {
- vg = _get_old_vg_for_rename(cmd, vg_name_old, vgid);
+ vg = _get_old_vg_for_rename(cmd, vg_name_old, vgid, lockd_state);
if (!vg)
return_0;
@@ -130,7 +135,7 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
if (!_lock_new_vg_for_rename(cmd, vg_name_new))
return_0;
- vg = _get_old_vg_for_rename(cmd, vg_name_old, vgid);
+ vg = _get_old_vg_for_rename(cmd, vg_name_old, vgid, lockd_state);
if (!vg) {
unlock_vg(cmd, vg_name_new);
return_0;
@@ -144,6 +149,9 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
if (!drop_cached_metadata(vg))
stack;
+ if (!lockd_rename_vg_before(cmd, vg))
+ return_0;
+
/* Change the volume group name */
vg_rename(cmd, vg, vg_name_new);
@@ -171,6 +179,8 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
}
}
+ lockd_rename_vg_final(cmd, vg, 1);
+
if (!backup(vg))
stack;
if (!backup_remove(cmd, vg_name_old))
@@ -190,6 +200,8 @@ static int vg_rename_path(struct cmd_context *cmd, const char *old_vg_path,
return 1;
error:
+ lockd_rename_vg_final(cmd, vg, 0);
+
if (lock_vg_old_first) {
unlock_vg(cmd, vg_name_new);
unlock_and_release_vg(cmd, vg, vg_name_old);
@@ -207,6 +219,10 @@ int vgrename(struct cmd_context *cmd, int argc, char **argv)
return EINVALID_CMD_LINE;
}
+ /* Needed change the global VG namespace. */
+ if (!lockd_gl(cmd, "ex", LDGL_UPDATE_NAMES))
+ return_ECMD_FAILED;
+
if (!vg_rename_path(cmd, argv[0], argv[1]))
return_ECMD_FAILED;
diff --git a/tools/vgscan.c b/tools/vgscan.c
index bca98cc6e..7328a096d 100644
--- a/tools/vgscan.c
+++ b/tools/vgscan.c
@@ -17,7 +17,7 @@
static int vgscan_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg,
- void *handle __attribute__((unused)))
+ struct processing_handle *handle __attribute__((unused)))
{
log_print_unless_silent("Found %svolume group \"%s\" using metadata type %s",
vg_is_exported(vg) ? "exported " : "", vg_name,
@@ -47,6 +47,8 @@ int vgscan(struct cmd_context *cmd, int argc, char **argv)
lvmcache_destroy(cmd, 1, 0);
if (arg_count(cmd, cache_long_ARG)) {
+ cmd->include_foreign_vgs = 1;
+
if (lvmetad_active()) {
if (!lvmetad_pvscan_all_devs(cmd, NULL))
return ECMD_FAILED;
diff --git a/tools/vgsplit.c b/tools/vgsplit.c
index 362f85410..7605bc4b0 100644
--- a/tools/vgsplit.c
+++ b/tools/vgsplit.c
@@ -350,7 +350,7 @@ static int _move_cache(struct volume_group *vg_from,
data = seg_lv(first_seg(seg->pool_lv), 0);
meta = first_seg(seg->pool_lv)->metadata_lv;
/* Ensure all components are coming along */
- is_moving = !!_lv_is_in_vg(vg_to, orig);
+ is_moving = _lv_is_in_vg(vg_to, orig);
} else {
if (!dm_list_empty(&seg->lv->segs_using_this_lv) &&
!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
@@ -364,20 +364,20 @@ static int _move_cache(struct volume_group *vg_from,
is_moving = 1;
}
- if (orig && (!!_lv_is_in_vg(vg_to, orig) != is_moving)) {
+ if (orig && (_lv_is_in_vg(vg_to, orig) != is_moving)) {
log_error("Can't split %s and its origin (%s)"
" into separate VGs", lv->name, orig->name);
return 0;
}
- if (data && (!!_lv_is_in_vg(vg_to, data) != is_moving)) {
+ if (data && (_lv_is_in_vg(vg_to, data) != is_moving)) {
log_error("Can't split %s and its cache pool"
" data LV (%s) into separate VGs",
lv->name, data->name);
return 0;
}
- if (meta && (!!_lv_is_in_vg(vg_to, meta) != is_moving)) {
+ if (meta && (_lv_is_in_vg(vg_to, meta) != is_moving)) {
log_error("Can't split %s and its cache pool"
" metadata LV (%s) into separate VGs",
lv->name, meta->name);
@@ -422,7 +422,7 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd,
if (vg_read_error(vg_to) == FAILED_EXIST) {
*existing_vg = 1;
release_vg(vg_to);
- vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0);
+ vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0, 0);
if (vg_read_error(vg_to)) {
release_vg(vg_to);
@@ -448,11 +448,18 @@ static struct volume_group *_vgsplit_from(struct cmd_context *cmd,
log_verbose("Checking for volume group \"%s\"", vg_name_from);
- vg_from = vg_read_for_update(cmd, vg_name_from, NULL, 0);
+ vg_from = vg_read_for_update(cmd, vg_name_from, NULL, 0, 0);
if (vg_read_error(vg_from)) {
release_vg(vg_from);
return NULL;
}
+
+ if (is_lockd_type(vg_from->lock_type)) {
+ log_error("vgsplit not allowed for lock_type %s", vg_from->lock_type);
+ unlock_and_release_vg(cmd, vg_from, vg_name_from);
+ return NULL;
+ }
+
return vg_from;
}
@@ -492,6 +499,10 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
return ECMD_FAILED;
}
+ /* Needed change the global VG namespace. */
+ if (!lockd_gl(cmd, "ex", LDGL_UPDATE_NAMES))
+ return_ECMD_FAILED;
+
if (arg_count(cmd, name_ARG))
lv_name = arg_value(cmd, name_ARG);
else
@@ -570,6 +581,7 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
!vg_set_max_pv(vg_to, vp_new.max_pv) ||
!vg_set_alloc_policy(vg_to, vp_new.alloc) ||
!vg_set_clustered(vg_to, vp_new.clustered) ||
+ !vg_set_system_id(vg_to, vp_new.system_id) ||
!vg_set_mda_copies(vg_to, vp_new.vgmetadatacopies))
goto_bad;
}
@@ -661,7 +673,7 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
if (!test_mode()) {
release_vg(vg_to);
vg_to = vg_read_for_update(cmd, vg_name_to, NULL,
- READ_ALLOW_EXPORTED);
+ READ_ALLOW_EXPORTED, 0);
if (vg_read_error(vg_to)) {
log_error("Volume group \"%s\" became inconsistent: "
"please fix manually", vg_name_to);
diff --git a/udev/.gitignore b/udev/.gitignore
new file mode 100644
index 000000000..303d78761
--- /dev/null
+++ b/udev/.gitignore
@@ -0,0 +1,5 @@
+10-dm.rules
+11-dm-lvm.rules
+13-dm-disk.rules
+69-dm-lvm-metad.rules
+95-dm-notify.rules
diff --git a/udev/Makefile.in b/udev/Makefile.in
index 390069b43..cd031aefa 100644
--- a/udev/Makefile.in
+++ b/udev/Makefile.in
@@ -47,7 +47,7 @@ BLKID_RULE=IMPORT{program}=\"${SBIN}\/blkid -o udev -p \$$tempnode\"
endif
ifeq ("@UDEV_SYSTEMD_BACKGROUND_JOBS@", "yes")
-PVSCAN_RULE=ACTION\!=\"remove\", ENV{LVM_PV_GONE}==\"1\", RUN\+=\"@bindir@/systemd-run $(LVM_EXEC)\/lvm pvscan --cache \$$major\:\$$minor\", GOTO=\"lvm_end\"\nENV{SYSTEMD_ALIAS}=\"\/dev\/block\/\$$major:\$$minor\"\nENV{ID_MODEL}=\"LVM PV \$$env{ID_FS_UUID_ENC} on \/dev\/\$$name\"\nENV{SYSTEMD_WANTS}=\"lvm2-pvscan@\$$major:\$$minor.service\"
+PVSCAN_RULE=ACTION\!=\"remove\", ENV{LVM_PV_GONE}==\"1\", RUN\+=\"@bindir@/systemd-run $(LVM_EXEC)\/lvm pvscan --cache \$$major\:\$$minor\", GOTO=\"lvm_end\"\nENV{SYSTEMD_ALIAS}=\"\/dev\/block\/\$$major:\$$minor\"\nENV{ID_MODEL}=\"LVM PV \$$env{ID_FS_UUID_ENC} on \/dev\/\$$name\"\nENV{SYSTEMD_WANTS}\+=\"lvm2-pvscan@\$$major:\$$minor.service\"
else
PVSCAN_RULE=RUN\+\=\"$(LVM_EXEC)/lvm pvscan --background --cache --activate ay --major \$$major --minor \$$minor\", ENV{LVM_SCANNED}=\"1\"
endif