summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2014-12-02 14:05:49 -0600
committerDavid Teigland <teigland@redhat.com>2014-12-03 16:10:18 -0600
commit03c5d164d3885f1475ee7c09c61563285ee9eb11 (patch)
tree3a84fae31cb8d10c64134c3a4e32bcafdd1372ab
parent24e7081b2742f3731e97dda8c79972ed328e5020 (diff)
downloadlvm2-dev-dct-lvmlockd-F.tar.gz
lvmlockd: add daemondev-dct-lvmlockd-F
-rw-r--r--configure.in1
-rw-r--r--daemons/Makefile.in8
-rw-r--r--daemons/lvmlockd/Makefile.in52
-rw-r--r--daemons/lvmlockd/lvmlock.c742
-rw-r--r--daemons/lvmlockd/lvmlockd-client.h4
-rw-r--r--daemons/lvmlockd/lvmlockd-core.c5300
-rw-r--r--daemons/lvmlockd/lvmlockd-dlm.c529
-rw-r--r--daemons/lvmlockd/lvmlockd-internal.h317
-rw-r--r--daemons/lvmlockd/lvmlockd-sanlock.c1453
-rw-r--r--man/lvmlockd.8.in829
10 files changed, 9232 insertions, 3 deletions
diff --git a/configure.in b/configure.in
index 02b477566..c87901cf0 100644
--- a/configure.in
+++ b/configure.in
@@ -1837,6 +1837,7 @@ daemons/dmeventd/plugins/mirror/Makefile
daemons/dmeventd/plugins/snapshot/Makefile
daemons/dmeventd/plugins/thin/Makefile
daemons/lvmetad/Makefile
+daemons/lvmlockd/Makefile
conf/Makefile
conf/example.conf
conf/command_profile_template.profile
diff --git a/daemons/Makefile.in b/daemons/Makefile.in
index 9a7351681..ba9c489a2 100644
--- a/daemons/Makefile.in
+++ b/daemons/Makefile.in
@@ -15,7 +15,7 @@ srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-.PHONY: dmeventd clvmd cmirrord lvmetad
+.PHONY: dmeventd clvmd cmirrord lvmetad lvmlockd
ifneq ("@CLVMD@", "none")
SUBDIRS += clvmd
@@ -36,8 +36,12 @@ ifeq ("@BUILD_LVMETAD@", "yes")
SUBDIRS += lvmetad
endif
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+ SUBDIRS += lvmlockd
+endif
+
ifeq ($(MAKECMDGOALS),distclean)
- SUBDIRS = clvmd cmirrord dmeventd lvmetad
+ SUBDIRS = clvmd cmirrord dmeventd lvmetad lvmlockd
endif
include $(top_builddir)/make.tmpl
diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in
new file mode 100644
index 000000000..12a85db3a
--- /dev/null
+++ b/daemons/lvmlockd/Makefile.in
@@ -0,0 +1,52 @@
+#
+# Copyright (C) 2011-2012 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = \
+ lvmlockd-core.c \
+ lvmlockd-sanlock.c \
+ lvmlockd-dlm.c
+
+TARGETS = lvmlockd lvmlock
+
+.PHONY: install_lvmlockd
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS) -ldlm_lt -lsanlock
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server
+CLDFLAGS += -L$(top_builddir)/libdaemon/server -D_GNU_SOURCE
+
+lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
+ $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
+
+lvmlock: lvmlock.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmlock.o \
+ $(DL_LIBS) $(LVMLIBS) -rdynamic
+
+install_lvmlockd: lvmlockd
+ $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmlockd
+
+install: install_lvm2
diff --git a/daemons/lvmlockd/lvmlock.c b/daemons/lvmlockd/lvmlock.c
new file mode 100644
index 000000000..d9472b685
--- /dev/null
+++ b/daemons/lvmlockd/lvmlock.c
@@ -0,0 +1,742 @@
+#define _GNU_SOURCE
+#include "configure.h"
+#include "lvmlockd-client.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+static int quit;
+static int info;
+static int dump;
+static int wait_opt;
+static int force_opt;
+static int gl_enable;
+static int gl_disable;
+static int stop_lockspaces;
+static char *able_vg_name;
+
+static int cmd_pipe[2];
+static int cmd_argc;
+static char *cmd_name;
+static char **cmd_argv;
+
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE];
+static int dump_len;
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+daemon_handle _lvmlockd;
+
+#define log_debug(fmt, args...) \
+do { \
+ printf(fmt "\n", ##args); \
+} while (0)
+
+#define log_error(fmt, args...) \
+do { \
+ printf(fmt "\n", ##args); \
+} while (0)
+
+#define MAX_LINE 512
+
+/* copied from lvmlockd-internal.h */
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+/*
+ * lvmlockd dumps the client info before the lockspaces,
+ * so we can look up client info when printing lockspace info.
+ */
+
+#define MAX_CLIENTS 100
+
+struct client_info {
+ uint32_t client_id;
+ int pid;
+ char name[MAX_NAME+1];
+};
+
+static struct client_info clients[MAX_CLIENTS];
+static int num_clients;
+
+static void save_client_info(char *line)
+{
+ uint32_t pid = 0;
+ int fd = 0;
+ int pi = 0;
+ uint32_t client_id = 0;
+ char name[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=client pid=%u fd=%d pi=%d id=%u name=%s",
+ &pid, &fd, &pi, &client_id, name);
+
+ clients[num_clients].client_id = client_id;
+ clients[num_clients].pid = pid;
+ strcpy(clients[num_clients].name, name);
+ num_clients++;
+}
+
+static void find_client_info(uint32_t client_id, uint32_t *pid, char *cl_name)
+{
+ int i;
+
+ for (i = 0; i < num_clients; i++) {
+ if (clients[i].client_id == client_id) {
+ *pid = clients[i].pid;
+ strcpy(cl_name, clients[i].name);
+ return;
+ }
+ }
+}
+
+static void format_info_local_vg(char *line)
+{
+ char vg_name[MAX_NAME+1] = { 0 };
+ char vg_uuid[MAX_NAME+1] = { 0 };
+ char vg_sysid[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=local_vg vg_name=%s vg_uuid=%s vg_sysid=%s",
+ vg_name, vg_uuid, vg_sysid);
+
+ if (strlen(vg_sysid) == 1 && vg_sysid[0] == '.')
+ strcpy(vg_sysid, "none");
+
+ printf("VG %s system_id=%s %s\n", vg_name, vg_sysid, vg_uuid);
+}
+
+static void format_info_ls(char *line)
+{
+ char ls_name[MAX_NAME+1] = { 0 };
+ char vg_name[MAX_NAME+1] = { 0 };
+ char vg_uuid[MAX_NAME+1] = { 0 };
+ char vg_sysid[MAX_NAME+1] = { 0 };
+ char lock_args[MAX_ARGS+1] = { 0 };
+ char lock_type[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=ls ls_name=%s vg_name=%s vg_uuid=%s vg_sysid=%s vg_args=%s lm_type=%s",
+ ls_name, vg_name, vg_uuid, vg_sysid, lock_args, lock_type);
+
+ printf("\n");
+
+ printf("VG %s lock_type=%s %s\n", vg_name, lock_type, vg_uuid);
+
+ printf("LS %s %s\n", lock_type, ls_name);
+}
+
+static void format_info_ls_action(char *line)
+{
+ uint32_t client_id = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ char version[MAX_NAME+1] = { 0 };
+ char op[MAX_NAME+1] = { 0 };
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ sscanf(line, "info=ls_action client_id=%u %s %s op=%s",
+ &client_id, flags, version, op);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ printf("OP %s pid %u (%s)", op, pid, cl_name);
+}
+
+static void format_info_r(char *line, char *r_name_out, char *r_type_out)
+{
+ char r_name[MAX_NAME+1] = { 0 };
+ char r_type[4] = { 0 };
+ char mode[4] = { 0 };
+ char sh_count[MAX_NAME+1] = { 0 };
+ uint32_t ver = 0;
+
+ sscanf(line, "info=r name=%s type=%s mode=%s %s version=%u",
+ r_name, r_type, mode, sh_count, &ver);
+
+ /* when mode is not un, wait and print each lk line */
+
+ if (strcmp(mode, "un")) {
+ strcpy(r_name_out, r_name);
+ strcpy(r_type_out, r_type);
+ return;
+ }
+
+ /* when mode is un, there will be no lk lines, so print now */
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LK GL un ver %4u\n", ver);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LK VG un ver %4u\n", ver);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LK LV un %s\n", r_name);
+ }
+}
+
+static void format_info_lk(char *line, char *r_name, char *r_type)
+{
+ char mode[4] = { 0 };
+ uint32_t ver = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ uint32_t client_id = 0;
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ if (!r_name[0] || !r_type[0]) {
+ printf("format_info_lk error r_name %s r_type %s\n", r_name, r_type);
+ printf("%s\n", line);
+ return;
+ }
+
+ sscanf(line, "info=lk mode=%s version=%u %s client_id=%u",
+ mode, &ver, flags, &client_id);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LK GL %s ver %4u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LK VG %s ver %4u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LK LV %s %s\n", mode, r_name);
+ }
+}
+
+static void format_info_r_action(char *line, char *r_name, char *r_type)
+{
+ uint32_t client_id = 0;
+ char flags[MAX_NAME+1] = { 0 };
+ char version[MAX_NAME+1] = { 0 };
+ char op[MAX_NAME+1] = { 0 };
+ char rt[4] = { 0 };
+ char mode[4] = { 0 };
+ char lm[MAX_NAME+1] = { 0 };
+ char result[MAX_NAME+1] = { 0 };
+ char lm_rv[MAX_NAME+1] = { 0 };
+ uint32_t pid = 0;
+ char cl_name[MAX_NAME+1] = { 0 };
+
+ if (!r_name[0] || !r_type[0]) {
+ printf("format_info_r_action error r_name %s r_type %s\n", r_name, r_type);
+ printf("%s\n", line);
+ return;
+ }
+
+ sscanf(line, "info=r_action client_id=%u %s %s op=%s rt=%s mode=%s %s %s %s",
+ &client_id, flags, version, op, rt, mode, lm, result, lm_rv);
+
+ find_client_info(client_id, &pid, cl_name);
+
+ if (strcmp(op, "lock")) {
+ printf("OP %s pid %u (%s)", op, pid, cl_name);
+ return;
+ }
+
+ if (!strcmp(r_type, "gl")) {
+ printf("LW GL %s ver %4u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+ } else if (!strcmp(r_type, "vg")) {
+ printf("LW VG %s ver %4u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+ } else if (!strcmp(r_type, "lv")) {
+ printf("LW LV %s %s\n", mode, r_name);
+ }
+}
+
+static void format_info_line(char *line)
+{
+ char r_name[MAX_NAME+1];
+ char r_type[MAX_NAME+1];
+
+ if (!strncmp(line, "info=client ", strlen("info=client "))) {
+ save_client_info(line);
+
+ } else if (!strncmp(line, "info=local_vg ", strlen("info=local_vg "))) {
+ format_info_local_vg(line);
+
+ } else if (!strncmp(line, "info=ls ", strlen("info=ls "))) {
+ format_info_ls(line);
+
+ } else if (!strncmp(line, "info=ls_action ", strlen("info=ls_action "))) {
+ format_info_ls_action(line);
+
+ } else if (!strncmp(line, "info=r ", strlen("info=r "))) {
+ memset(r_name, 0, sizeof(r_name));
+ memset(r_type, 0, sizeof(r_type));
+ format_info_r(line, r_name, r_type);
+
+ } else if (!strncmp(line, "info=lk ", strlen("info=lk "))) {
+ /* will use info from previous r */
+ format_info_lk(line, r_name, r_type);
+
+ } else if (!strncmp(line, "info=r_action ", strlen("info=r_action "))) {
+ /* will use info from previous r */
+ format_info_r_action(line, r_name, r_type);
+ } else {
+ printf("UN %s\n", line);
+ }
+}
+
+static void format_info(void)
+{
+ char line[MAX_LINE];
+ int i, j;
+
+ j = 0;
+ memset(line, 0, sizeof(line));
+
+ for (i = 0; i < dump_len; i++) {
+ line[j++] = dump_buf[i];
+
+ if ((line[j-1] == '\n') || (line[j-1] == '\0')) {
+ format_info_line(line);
+ j = 0;
+ memset(line, 0, sizeof(line));
+ }
+ }
+}
+
+
+static daemon_reply _lvmlockd_send(const char *req_name, ...)
+{
+ va_list ap;
+ daemon_reply repl;
+ daemon_request req;
+
+ req = daemon_request_make(req_name);
+
+ va_start(ap, req_name);
+ daemon_request_extend_v(req, ap);
+ va_end(ap);
+
+ repl = daemon_send(_lvmlockd, req);
+
+ daemon_request_destroy(req);
+
+ return repl;
+}
+
+static int _lvmlockd_result(daemon_reply reply, int *result)
+{
+ int reply_result;
+ const char *reply_flags;
+ const char *lock_type;
+
+ if (reply.error) {
+ log_error("lvmlockd_result reply error %d", reply.error);
+ return 0;
+ }
+
+ if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+ log_error("lvmlockd_result bad response");
+ return 0;
+ }
+
+ /* FIXME: using -1000 is dumb */
+
+ reply_result = daemon_reply_int(reply, "op_result", -1000);
+ if (reply_result == -1000) {
+ log_error("lvmlockd_result no op_result");
+ return 0;
+ }
+
+ /* The lock_type that lvmlockd used for locking. */
+ lock_type = daemon_reply_str(reply, "lock_type", "none");
+
+ *result = reply_result;
+
+ reply_flags = daemon_reply_str(reply, "result_flags", NULL);
+
+ log_debug("lvmlockd_result %d %s lm %s", reply_result, reply_flags, lock_type);
+ return 1;
+}
+
+static int do_quit(void)
+{
+ daemon_reply reply;
+ int rv = 0;
+
+ reply = daemon_send_simple(_lvmlockd, "quit", NULL);
+
+ if (reply.error) {
+ log_error("reply error %d", reply.error);
+ rv = reply.error;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static int setup_dump_socket(void)
+{
+ int s, rv;
+
+ s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (s < 0)
+ return s;
+
+ memset(&dump_addr, 0, sizeof(dump_addr));
+ dump_addr.sun_family = AF_LOCAL;
+ strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+ dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+ rv = bind(s, (struct sockaddr *) &dump_addr, dump_addrlen);
+ if (rv < 0)
+ return rv;
+
+ return s;
+}
+
+static int do_dump(const char *req_name)
+{
+ daemon_reply reply;
+ int result;
+ int fd, rv;
+
+ fd = setup_dump_socket();
+ if (fd < 0) {
+ log_error("socket error %d", fd);
+ return fd;
+ }
+
+ reply = daemon_send_simple(_lvmlockd, req_name, NULL);
+
+ if (reply.error) {
+ log_error("reply error %d", reply.error);
+ rv = reply.error;
+ goto out;
+ }
+
+ result = daemon_reply_int(reply, "result", 0);
+ dump_len = daemon_reply_int(reply, "dump_len", 0);
+
+ daemon_reply_destroy(reply);
+
+ if (result < 0)
+ log_error("result %d", result);
+
+ if (!dump_len)
+ goto out;
+
+ memset(dump_buf, 0, sizeof(dump_buf));
+
+ rv = recvfrom(fd, dump_buf, dump_len, MSG_WAITALL,
+ (struct sockaddr *)&dump_addr, &dump_addrlen);
+ if (rv < 0) {
+ log_error("recvfrom error %d %d", rv, errno);
+ rv = -errno;
+ goto out;
+ }
+
+ rv = 0;
+ if ((info && dump) || !strcmp(req_name, "dump"))
+ printf("%s\n", dump_buf);
+ else
+ format_info();
+out:
+ close(fd);
+ return rv;
+}
+
+static int do_able(const char *req_name)
+{
+ daemon_reply reply;
+ int result;
+ int rv;
+
+ reply = _lvmlockd_send(req_name,
+ "cmd = %s", "lvmlock",
+ "pid = %d", getpid(),
+ "vg_name = %s", able_vg_name,
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static int do_stop_lockspaces(void)
+{
+ daemon_reply reply;
+ char opts[32];
+ int result;
+ int rv;
+
+ memset(opts, 0, sizeof(opts));
+
+ if (wait_opt)
+ strcat(opts, "wait ");
+ if (force_opt)
+ strcat(opts, "force ");
+
+ reply = _lvmlockd_send("stop_all",
+ "cmd = %s", "lvmlock",
+ "pid = %d", getpid(),
+ "opts = %s", opts[0] ? opts : "none",
+ NULL);
+
+ if (!_lvmlockd_result(reply, &result)) {
+ log_error("lvmlockd result %d", result);
+ rv = result;
+ } else {
+ rv = 0;
+ }
+
+ daemon_reply_destroy(reply);
+ return rv;
+}
+
+static void print_usage(void)
+{
+ printf("lvmlock options\n");
+ printf("Options:\n");
+ printf("--help | -h\n");
+ printf(" Show this help information.\n");
+ printf("--quit | -q\n");
+ printf(" Tell lvmlockd to quit.\n");
+ printf("--info | -i\n");
+ printf(" Print lock state information from lvmlockd.\n");
+ printf("--dump | -d\n");
+ printf(" Print log buffer from lvmlockd.\n");
+ printf("--wait | -w 0|1\n");
+ printf(" Wait option for other commands.\n");
+ printf("--force | -f 0|1>\n");
+ printf(" Force option for other commands.\n");
+ printf("--stop-lockspaces | -S\n");
+ printf(" Stop all lockspaces.\n");
+ printf("--gl-enable <vg_name>\n");
+ printf(" Tell lvmlockd to enable the global lock in a sanlock vg.\n");
+ printf("--gl-disable <vg_name>\n");
+ printf(" Tell lvmlockd to disable the global lock in a sanlock vg.\n");
+}
+
+static int read_options(int argc, char *argv[])
+{
+ int option_index = 0;
+ int i, j, c, len;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"quit", no_argument, 0, 'q' },
+ {"info", no_argument, 0, 'i' },
+ {"dump", no_argument, 0, 'd' },
+ {"wait", required_argument, 0, 'w' },
+ {"force", required_argument, 0, 'f' },
+ {"gl-enable", required_argument, 0, 'E' },
+ {"gl-disable", required_argument, 0, 'D' },
+ {"stop-lockspaces", no_argument, 0, 'S' },
+ {"sleep", required_argument, 0, 's' },
+ {"command", required_argument, 0, 'c' },
+ {0, 0, 0, 0 }
+ };
+
+ /*
+ if (argc == 1) {
+ print_usage();
+ exit(0);
+ }
+ */
+
+ while (1) {
+ c = getopt_long(argc, argv, "hqidE:D:s:c:w:f:S", long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'h':
+ /* --help */
+ print_usage();
+ exit(0);
+ case 'q':
+ /* --quit */
+ quit = 1;
+ break;
+ case 'i':
+ /* --info */
+ info = 1;
+ break;
+ case 'd':
+ /* --dump */
+ dump = 1;
+ break;
+ case 'w':
+ wait_opt = atoi(optarg);
+ break;
+ case 'E':
+ gl_enable = 1;
+ able_vg_name = strdup(optarg);
+ break;
+ case 'D':
+ gl_disable = 1;
+ able_vg_name = strdup(optarg);
+ break;
+ case 'S':
+ stop_lockspaces = 1;
+ break;
+ case 'c':
+ /* --command path args */
+ cmd_name = strdup(optarg);
+ break;
+ default:
+ print_usage();
+ exit(1);
+ }
+
+ if (cmd_name)
+ break;
+ }
+
+ if (cmd_name) {
+ /*
+ * optind is the index in argv of the first argv element that
+ * is not an option.
+ */
+
+ cmd_argc = argc - optind + 1; /* +1 for cmd_name */
+
+ len = (cmd_argc + 1) * sizeof(char *); /* +1 for final NULL */
+ cmd_argv = malloc(len);
+ if (!cmd_argv)
+ return -ENOMEM;
+ memset(cmd_argv, 0, len);
+
+ j = 0;
+ cmd_argv[j++] = cmd_name;
+
+ for (i = optind; i < argc; i++) {
+ cmd_argv[j++] = strdup(argv[i]);
+ if (!cmd_argv[j-1])
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static void run_command(void)
+{
+ char go[1];
+ int rv;
+
+ while (1) {
+ /* wait for parent to tell us to go */
+ rv = read(cmd_pipe[0], go, 1);
+ if (rv == -1 && errno == EINTR)
+ continue;
+ if (rv == 1 && go[0] == 'g')
+ break;
+ else
+ exit(-1);
+ }
+
+ execv(cmd_name, cmd_argv);
+ log_error("execv failed");
+}
+
+int main(int argc, char **argv)
+{
+ int status;
+ int pid = 0;
+ int rv = 0;
+
+ rv = read_options(argc, argv);
+ if (rv < 0)
+ return rv;
+
+ /*
+ * fork child for command before acquiring locks,
+ * exec command in child after acquiring locks,
+ * release locks after child exits.
+ */
+
+ if (cmd_name) {
+ if (pipe(cmd_pipe)) {
+ log_error("pipe error");
+ return -1;
+ }
+ pid = fork();
+ if (pid < 0) {
+ log_error("cannot fork");
+ return -1;
+ }
+ if (!pid) {
+ run_command();
+ return -1;
+ }
+ }
+
+ _lvmlockd = lvmlockd_open(NULL);
+
+ if (_lvmlockd.socket_fd < 0 || _lvmlockd.error) {
+ log_error("lvmlockd open error %d", _lvmlockd.error);
+ goto out_pid;
+ }
+
+ if (quit) {
+ rv = do_quit();
+ goto out;
+ }
+
+ if (info) {
+ rv = do_dump("info");
+ goto out;
+ }
+
+ if (dump) {
+ rv = do_dump("dump");
+ goto out;
+ }
+
+ if (gl_enable) {
+ rv = do_able("enable_gl");
+ goto out;
+ }
+
+ if (gl_disable) {
+ rv = do_able("disable_gl");
+ goto out;
+ }
+
+ if (stop_lockspaces) {
+ rv = do_stop_lockspaces();
+ goto out;
+ }
+
+ if (pid) {
+ /* tell child to exec */
+ write(cmd_pipe[1], "g", 1);
+ waitpid(pid, &status, 0);
+ pid = 0;
+ }
+out:
+ lvmlockd_close(_lvmlockd);
+out_pid:
+ if (pid) {
+ kill(pid, SIGKILL);
+ waitpid(pid, &status, 0);
+ }
+
+ return rv;
+}
+
diff --git a/daemons/lvmlockd/lvmlockd-client.h b/daemons/lvmlockd/lvmlockd-client.h
index dcae38468..7010078a2 100644
--- a/daemons/lvmlockd/lvmlockd-client.h
+++ b/daemons/lvmlockd/lvmlockd-client.h
@@ -13,13 +13,15 @@
#include "daemon-client.h"
+#define LVMLOCKD_SOCKET DEFAULT_RUN_DIR "/lvmlockd.socket"
+
/* Wrappers to open/close connection */
static inline daemon_handle lvmlockd_open(const char *socket)
{
daemon_info lvmlockd_info = {
.path = "lvmlockd",
- .socket = socket ?: DEFAULT_RUN_DIR "/lvmlockd.socket",
+ .socket = socket ?: LVMLOCKD_SOCKET,
.protocol = "lvmlockd",
.protocol_version = 1,
.autostart = 0
diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c
new file mode 100644
index 000000000..ce3dba944
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-core.c
@@ -0,0 +1,5300 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+#define _GNU_SOURCE
+
+#include "configure.h"
+#include "daemon-io.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "config-util.h"
+#include "lvm-version.h"
+#include "lvmetad-client.h"
+#include "lvmlockd-client.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <poll.h>
+#include <errno.h>
+#include <signal.h>
+#include <getopt.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/utsname.h>
+#include <sys/un.h>
+
+#define EXTERN
+#include "lvmlockd-internal.h"
+
+static const char *lvmlockd_protocol = "lvmlockd";
+static const int lvmlockd_protocol_version = 1;
+static int daemon_quit;
+static char *our_system_id;
+
+static daemon_handle lvmetad_handle;
+static pthread_mutex_t lvmetad_mutex;
+static int lvmetad_connected;
+
+/*
+ * We use a separate socket for dumping daemon info.
+ * This will not interfere with normal operations, and allows
+ * free-form debug data to be dumped instead of the libdaemon
+ * protocol that wants all data in the cft format.
+ * 1MB should fit all the info we need to dump.
+ */
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE];
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+/*
+ * Main program polls client connections, adds new clients,
+ * adds work for client thread.
+ *
+ * pollfd_mutex is used for adding vs removing entries,
+ * and for resume vs realloc.
+ */
+#define POLL_FD_UNUSED -1 /* slot if free */
+#define POLL_FD_IGNORE -2 /* slot is used but ignore in poll */
+#define ADD_POLL_SIZE 16 /* increment slots by this amount */
+
+static pthread_mutex_t pollfd_mutex;
+static struct pollfd *pollfd;
+static int pollfd_size;
+static int pollfd_maxi;
+static int listen_pi;
+static int listen_fd;
+static int restart_pi;
+static int restart_fds[2];
+
+/*
+ * Each lockspace has its own thread to do locking.
+ * The lockspace thread makes synchronous lock requests to dlm/sanlock.
+ * Every vg with a lockd type, i.e. "dlm", "sanlock", should be on this list.
+ *
+ * lockspaces_inactive holds old ls structs for vgs that have been
+ * stopped, or for vgs that failed to start. The old ls structs
+ * are removed from the inactive list and freed when a new ls with
+ * the same name is started and added to the standard lockspaces list.
+ * Keeping this bit of "history" for the ls allows us to return a
+ * more informative error message if a vg lock request is made for
+ * an ls that has been stopped or failed to start.
+ */
+static pthread_mutex_t lockspaces_mutex;
+static struct list_head lockspaces;
+static struct list_head lockspaces_inactive;
+
+/*
+ * This flag is set to 1 if we see multiple vgs with the global
+ * lock enabled. While this is set, we return a special flag
+ * with the vg lock result indicating to the lvm command that
+ * there is a duplicate gl in the vg which should be resolved.
+ * While this is set, find_lockspace_name has the side job of
+ * counting the number of lockspaces with enabled gl's so that
+ * this can be set back to zero when the duplicates are disabled.
+ */
+static int sanlock_gl_dup;
+
+/*
+ * VG's that do not have a lockd type are on the local_vgs list.
+ * Every vg on the system should be in either the lockspaces
+ * list or the local_vgs list.
+ *
+ * lvm commands send lock requests to lvmlockd for local vgs
+ * because at the point locks are acquired in the command,
+ * the vg has not been read, so the command does not know if
+ * the vg's lock_type is local and the locks can be skipped.
+ * So lvmlockd keeps track of which vg's are local so it can
+ * quickly check if a vg lock request can be skipped. (Rather
+ * than having to look up the lock_type in lvmetad for every
+ * operation on a local vg.)
+ *
+ * When local_thread_also is set, lvmlockd's local_thread is
+ * used to manage locks for local pids on vgs from local_vgs.
+ * (In addition to standard locking for lockd type vgs.)
+ *
+ * When local_thread_only is set, lvmlockd is only used to
+ * manage locks for local pids on vgs from local_vgs, and
+ * not to manage lockd type vgs.
+ *
+ * local locking:
+ *
+ * lock_gl: only do local_thread locking for gl when local_thread_only
+ * is set. local_thread_only means that no standard lockd lockspaces
+ * are being used, and lvmlockd is used only for inter-pid locking.
+ * When local_thread_only is not set (meaning both local and shared vgs
+ * are expected), then the standard gl lockspace works for both local
+ * (between local pids) and remote (between pids on different nodes).
+ *
+ * lock_vg: only do local_thread locking for local, non-lockd, vgs in
+ * the local_vgs list. When the vg is a lockd-type, then the standard
+ * lockspace thread works for locking between pids also.
+ *
+ * local_thread_only=1 local_thread_also=1
+ * Use lvmlockd for locking only between local pids, both gl and vg locks.
+ * No shared disks or lockd type vgs should exist.
+ *
+ * local_thread_only=0 local_thread_also=1
+ * Use lvmlockd for locking between local pids for local vgs,
+ * and use lvmlockd for distributed locking for lockd-type vgs.
+ * Use global lock from a lockd-type vgs. A local-only gl does
+ * not make sense here.
+ *
+ * local_thread_only=0 local_thread_also=0
+ * Do not use lvmlockd for locking between local pids.
+ * No shared disks or lockd type vgs should exist.
+ * (lvmlockd should probably not be run at all in this case.)
+ *
+ * local_thread_only=1 local_thread_also=0
+ * Not allowed.
+ */
+static pthread_t local_thread;
+static pthread_mutex_t local_thread_mutex;
+static pthread_cond_t local_thread_cond;
+static struct list_head local_thread_actions;
+static struct list_head local_vgs;
+static struct lockspace *local_thread_gls;
+static int local_thread_also;
+static int local_thread_only;
+static int local_thread_stop;
+static int local_thread_work;
+
+/*
+ * Client thread reads client requests and writes client results.
+ */
+static pthread_t client_thread;
+static pthread_mutex_t client_mutex;
+static pthread_cond_t client_cond;
+static struct list_head client_list; /* connected clients */
+static struct list_head client_results; /* actions to send back to clients */
+static uint32_t client_ids;
+static int client_stop; /* stop the thread */
+static int client_work; /* a client on client_list has work to do */
+
+/*
+ * Worker thread performs misc non-locking actions, e.g. init/free.
+ */
+static pthread_t worker_thread;
+static pthread_mutex_t worker_mutex;
+static pthread_cond_t worker_cond;
+static struct list_head worker_list; /* actions for worker_thread */
+static int worker_stop; /* stop the thread */
+static int worker_wake; /* wake the thread without adding work */
+
+static int add_lock_action(struct action *act);
+static int str_to_lm(const char *str);
+static void clear_lockspace_inactive(char *name);
+
+/*
+ * The content of every log_foo() statement is saved in the
+ * circular buffer, which can be dumped to a client and printed.
+ */
+#define LOG_LINE_SIZE 256
+#define LOG_DUMP_SIZE DUMP_BUF_SIZE
+static char log_dump[LOG_DUMP_SIZE];
+static unsigned int log_point;
+static unsigned int log_wrap;
+static pthread_mutex_t log_mutex;
+static int syslog_priority = LOG_WARNING;
+
+#define DO_STOP 1
+#define NO_STOP 0
+#define DO_FREE 1
+#define NO_FREE 0
+#define DO_FORCE 1
+#define NO_FORCE 0
+
+static uint64_t monotime(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec;
+}
+
+static void log_save_line(int len, char *line,
+ char *log_buf, unsigned int *point, unsigned int *wrap)
+{
+ unsigned int p = *point;
+ unsigned int w = *wrap;
+ int i;
+
+ if (len < LOG_DUMP_SIZE - p) {
+ memcpy(log_buf + p, line, len);
+ p += len;
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ goto out;
+ }
+
+ for (i = 0; i < len; i++) {
+ log_buf[p++] = line[i];
+
+ if (p == LOG_DUMP_SIZE) {
+ p = 0;
+ w = 1;
+ }
+ }
+ out:
+ *point = p;
+ *wrap = w;
+}
+
+void log_level(int level, const char *fmt, ...)
+{
+ char line[LOG_LINE_SIZE];
+ va_list ap;
+ int len = LOG_LINE_SIZE - 1;
+ int ret, pos = 0;
+
+ memset(line, 0, sizeof(line));
+
+ ret = snprintf(line, len, "%llu ", (unsigned long long)time(NULL));
+ pos += ret;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(line + pos, len - pos, fmt, ap);
+ va_end(ap);
+
+ if (ret >= len - pos)
+ pos = len - 1;
+ else
+ pos += ret;
+
+ line[pos++] = '\n';
+ line[pos++] = '\0';
+
+ pthread_mutex_lock(&log_mutex);
+ log_save_line(pos - 1, line, log_dump, &log_point, &log_wrap);
+ pthread_mutex_unlock(&log_mutex);
+
+ if (level <= syslog_priority)
+ syslog(level, "%s", line);
+
+ if (daemon_debug)
+ fprintf(stderr, "%s", line);
+}
+
+static int dump_log(int *dump_len)
+{
+ int tail_len;
+
+ pthread_mutex_lock(&log_mutex);
+
+ if (!log_wrap && !log_point) {
+ *dump_len = 0;
+ } else if (log_wrap) {
+ tail_len = LOG_DUMP_SIZE - log_point;
+ memcpy(dump_buf, log_dump+log_point, tail_len);
+ if (log_point)
+ memcpy(dump_buf+tail_len, log_dump, log_point);
+ *dump_len = LOG_DUMP_SIZE;
+ } else {
+ memcpy(dump_buf, log_dump, log_point-1);
+ *dump_len = log_point-1;
+ }
+ pthread_mutex_unlock(&log_mutex);
+
+ return 0;
+}
+
+/*
+ * List from kernel
+ */
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define MAX_UNUSED_ACTION 32
+#define MAX_UNUSED_CLIENT 32
+#define MAX_UNUSED_RESOURCE 32
+#define MAX_UNUSED_LOCK 32
+static pthread_mutex_t unused_struct_mutex;
+static struct list_head unused_action;
+static struct list_head unused_client;
+static struct list_head unused_resource;
+static struct list_head unused_lock;
+static int unused_action_count;
+static int unused_client_count;
+static int unused_resource_count;
+static int unused_lock_count;
+
+static struct action *alloc_action(void)
+{
+ struct action *act;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_action_count) {
+ act = malloc(sizeof(struct action));
+ } else {
+ act = list_first_entry(&unused_action, struct action, list);
+ list_del(&act->list);
+ unused_action_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (act)
+ memset(act, 0, sizeof(struct action));
+ return act;
+}
+
+static struct client *alloc_client(void)
+{
+ struct client *cl;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_client_count) {
+ cl = malloc(sizeof(struct client));
+ } else {
+ cl = list_first_entry(&unused_client, struct client, list);
+ list_del(&cl->list);
+ unused_client_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (cl)
+ memset(cl, 0, sizeof(struct client));
+ return cl;
+}
+
+static struct resource *alloc_resource(void)
+{
+ struct resource *r;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_resource_count) {
+ r = malloc(sizeof(struct resource));
+ } else {
+ r = list_first_entry(&unused_resource, struct resource, list);
+ list_del(&r->list);
+ unused_resource_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (r)
+ memset(r, 0, sizeof(struct resource));
+ return r;
+}
+
+static struct lock *alloc_lock(void)
+{
+ struct lock *lk;
+
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (!unused_lock_count) {
+ lk = malloc(sizeof(struct lock));
+ } else {
+ lk = list_first_entry(&unused_lock, struct lock, list);
+ list_del(&lk->list);
+ unused_lock_count--;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+ if (lk)
+ memset(lk, 0, sizeof(struct lock));
+ return lk;
+}
+
+static void free_action(struct action *act)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_action_count >= MAX_UNUSED_ACTION) {
+ free(act);
+ } else {
+ list_add_tail(&act->list, &unused_action);
+ unused_action_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_client(struct client *cl)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_client_count >= MAX_UNUSED_CLIENT) {
+ free(cl);
+ } else {
+ list_add_tail(&cl->list, &unused_client);
+ unused_client_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_resource(struct resource *r)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_resource_count >= MAX_UNUSED_RESOURCE) {
+ free(r);
+ } else {
+ list_add_tail(&r->list, &unused_resource);
+ unused_resource_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void free_lock(struct lock *lk)
+{
+ pthread_mutex_lock(&unused_struct_mutex);
+ if (unused_lock_count >= MAX_UNUSED_LOCK) {
+ free(lk);
+ } else {
+ list_add_tail(&lk->list, &unused_lock);
+ unused_lock_count++;
+ }
+ pthread_mutex_unlock(&unused_struct_mutex);
+}
+
+static void setup_structs(void)
+{
+ struct action *act;
+ struct client *cl;
+ struct resource *r;
+ struct lock *lk;
+ int i;
+
+ pthread_mutex_init(&unused_struct_mutex, NULL);
+ INIT_LIST_HEAD(&unused_action);
+ INIT_LIST_HEAD(&unused_client);
+ INIT_LIST_HEAD(&unused_resource);
+ INIT_LIST_HEAD(&unused_lock);
+
+ for (i = 0; i < MAX_UNUSED_ACTION; i++) {
+ act = alloc_action();
+ free_action(act);
+ }
+
+ for (i = 0; i < MAX_UNUSED_CLIENT; i++) {
+ cl = alloc_client();
+ free_client(cl);
+ }
+
+ for (i = 0; i < MAX_UNUSED_RESOURCE; i++) {
+ r = alloc_resource();
+ free_resource(r);
+ }
+
+ for (i = 0; i < MAX_UNUSED_LOCK; i++) {
+ lk = alloc_lock();
+ free_lock(lk);
+ }
+}
+
+static int add_pollfd(int fd)
+{
+ int i, new_size;
+
+ pthread_mutex_lock(&pollfd_mutex);
+ for (i = 0; i < pollfd_size; i++) {
+ if (pollfd[i].fd != POLL_FD_UNUSED)
+ continue;
+
+ pollfd[i].fd = fd;
+ pollfd[i].events = POLLIN;
+ pollfd[i].revents = 0;
+
+ if (i > pollfd_maxi)
+ pollfd_maxi = i;
+
+ pthread_mutex_unlock(&pollfd_mutex);
+ return i;
+ }
+
+ new_size = pollfd_size + ADD_POLL_SIZE;
+
+ pollfd = realloc(pollfd, new_size * sizeof(struct pollfd));
+ if (!pollfd) {
+ log_error("can't alloc new size %d for pollfd", new_size);
+ return -ENOMEM;
+ }
+
+ for (i = pollfd_size; i < new_size; i++) {
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+
+ i = pollfd_size;
+ pollfd[i].fd = fd;
+ pollfd[i].events = POLLIN;
+ pollfd[i].revents = 0;
+ pollfd_maxi = i;
+
+ pollfd_size = new_size;
+
+ pthread_mutex_unlock(&pollfd_mutex);
+ return i;
+}
+
+static void rem_pollfd(int pi)
+{
+ if (pi < 0) {
+ log_error("rem_pollfd %d", pi);
+ return;
+ }
+ pthread_mutex_lock(&pollfd_mutex);
+ pollfd[pi].fd = POLL_FD_UNUSED;
+ pollfd[pi].events = 0;
+ pollfd[pi].revents = 0;
+ pthread_mutex_unlock(&pollfd_mutex);
+}
+
+static const char *lm_str(int x)
+{
+ switch (x) {
+ case LD_LM_NONE:
+ return "none";
+ case LD_LM_DLM:
+ return "dlm";
+ case LD_LM_SANLOCK:
+ return "sanlock";
+ default:
+ return "lm_unknown";
+ }
+}
+
+static const char *rt_str(int x)
+{
+ switch (x) {
+ case LD_RT_GL:
+ return "gl";
+ case LD_RT_VG:
+ return "vg";
+ case LD_RT_LV:
+ return "lv";
+ default:
+ return ".";
+ };
+}
+
+static const char *op_str(int x)
+{
+ switch (x) {
+ case LD_OP_INIT:
+ return "init";
+ case LD_OP_FREE:
+ return "free";
+ case LD_OP_START:
+ return "start";
+ case LD_OP_STOP:
+ return "stop";
+ case LD_OP_LOCK:
+ return "lock";
+ case LD_OP_UPDATE:
+ return "update";
+ case LD_OP_CLOSE:
+ return "close";
+ case LD_OP_ENABLE:
+ return "enable";
+ case LD_OP_DISABLE:
+ return "disable";
+ case LD_OP_ADD_LOCAL:
+ return "add_local";
+ case LD_OP_REM_LOCAL:
+ return "rem_local";
+ case LD_OP_UPDATE_LOCAL:
+ return "update_local";
+ case LD_OP_START_WAIT:
+ return "start_wait";
+ case LD_OP_STOP_ALL:
+ return "stop_all";
+ default:
+ return "op_unknown";
+ };
+}
+
+static const char *mode_str(int x)
+{
+ switch (x) {
+ case LD_LK_IV:
+ return "iv";
+ case LD_LK_UN:
+ return "un";
+ case LD_LK_NL:
+ return "nl";
+ case LD_LK_SH:
+ return "sh";
+ case LD_LK_EX:
+ return "ex";
+ default:
+ return ".";
+ };
+}
+
+int last_string_from_args(char *args_in, char *last)
+{
+ const char *args = args_in;
+ const char *colon, *str = NULL;
+
+ while (1) {
+ if (!args || (*args == '\0'))
+ break;
+ colon = strstr(args, ":");
+ if (!colon)
+ break;
+ str = colon;
+ args = colon + 1;
+ }
+
+ if (str) {
+ snprintf(last, MAX_ARGS, "%s", str + 1);
+ return 0;
+ }
+ return -1;
+}
+
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch)
+{
+ char version[MAX_ARGS];
+ char *major_str, *minor_str, *patch_str;
+ char *n, *d1, *d2;
+
+ strncpy(version, args, MAX_ARGS);
+
+ n = strstr(version, ":");
+ if (n)
+ *n = '\0';
+
+ d1 = strstr(version, ".");
+ if (!d1)
+ return -1;
+
+ d2 = strstr(d1 + 1, ".");
+ if (!d2)
+ return -1;
+
+ major_str = version;
+ minor_str = d1 + 1;
+ patch_str = d2 + 1;
+
+ *d1 = '\0';
+ *d2 = '\0';
+
+ if (major)
+ *major = atoi(major_str);
+ if (minor)
+ *minor = atoi(minor_str);
+ if (patch)
+ *patch = atoi(patch_str);
+
+ return 0;
+}
+
+/*
+ * These are few enough that arrays of function pointers can
+ * be avoided.
+ */
+
+static int lm_add_lockspace(struct lockspace *ls, struct action *act)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_add_lockspace_dlm(ls);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_add_lockspace_sanlock(ls);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_rem_lockspace(struct lockspace *ls, struct action *act, int free_vg)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_rem_lockspace_dlm(ls, free_vg);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_rem_lockspace_sanlock(ls, free_vg);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_lock(struct lockspace *ls, struct resource *r, int mode, struct action *act,
+ uint32_t *r_version, uint32_t *n_version, int *retry)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_lock_dlm(ls, r, mode, r_version, n_version);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_lock_sanlock(ls, r, mode, act->lv_args, r_version, n_version, retry);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_convert(struct lockspace *ls, struct resource *r,
+ int mode, struct action *act, uint32_t r_version)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ rv = lm_convert_dlm(ls, r, mode, r_version);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ rv = lm_convert_sanlock(ls, r, mode, r_version);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_unlock(struct lockspace *ls, struct resource *r, struct action *act,
+ uint32_t r_version, uint32_t n_version, uint32_t lmu_flags)
+{
+ int rv;
+
+ if (ls->lm_type == LD_LM_DLM)
+ return lm_unlock_dlm(ls, r, r_version, n_version, lmu_flags);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_unlock_sanlock(ls, r, r_version, n_version, lmu_flags);
+ else
+ return -1;
+
+ if (act)
+ act->lm_rv = rv;
+ return rv;
+}
+
+static int lm_hosts(struct lockspace *ls, int notify)
+{
+ if (ls->lm_type == LD_LM_DLM)
+ return 0;
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_hosts_sanlock(ls, notify);
+ return -1;
+}
+
+static void lm_rem_resource(struct lockspace *ls, struct resource *r)
+{
+ if (ls->lm_type == LD_LM_DLM)
+ lm_rem_resource_dlm(ls, r);
+ else if (ls->lm_type == LD_LM_SANLOCK)
+ lm_rem_resource_sanlock(ls, r);
+}
+
+static void add_client_result(struct action *act)
+{
+ pthread_mutex_lock(&client_mutex);
+ list_add_tail(&act->list, &client_results);
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+}
+
+static struct lock *find_lock_client(struct resource *r, uint32_t client_id)
+{
+ struct lock *lk;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ if (lk->client_id == client_id)
+ return lk;
+ }
+ return NULL;
+}
+
+static struct lock *find_lock_persistent(struct resource *r)
+{
+ struct lock *lk;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ if (lk->flags & LD_LF_PERSISTENT)
+ return lk;
+ }
+ return NULL;
+}
+
+static struct action *find_action_client(struct resource *r, uint32_t client_id)
+{
+ struct action *act;
+
+ list_for_each_entry(act, &r->actions, list) {
+ if (act->client_id != client_id)
+ continue;
+ return act;
+ }
+ return NULL;
+}
+
+static void add_work_action(struct action *act)
+{
+ pthread_mutex_lock(&worker_mutex);
+ if (!worker_stop) {
+ list_add_tail(&act->list, &worker_list);
+ pthread_cond_signal(&worker_cond);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+}
+
+static void create_work_action(int op)
+{
+ struct action *act;
+
+ act = alloc_action();
+ if (!act)
+ return;
+ act->op = op;
+ add_work_action(act);
+}
+
+static int res_lock(struct lockspace *ls, struct resource *r, struct action *act, int *retry)
+{
+ struct lock *lk;
+ uint32_t r_version = 0;
+ uint32_t n_version = 0;
+ int rv;
+
+ log_debug("S %s R %s res_lock mode %d", ls->name, r->name, act->mode);
+
+ if (r->mode == LD_LK_SH && act->mode == LD_LK_SH)
+ goto add_lk;
+
+ rv = lm_lock(ls, r, act->mode, act, &r_version, &n_version, retry);
+ if (rv == -EAGAIN)
+ return rv;
+ if (rv < 0) {
+ log_error("S %s R %s res_lock lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_lock lm done r_version %u n_version %u",
+ ls->name, r->name, r_version, n_version);
+
+ /* lm_lock() reads new r_version and n_version */
+
+ if (r_version > r->version) {
+ /*
+ * New r_version of the lock: means that another
+ * host has changed data protected by this lock
+ * since the last time we acquired it. We
+ * should invalidate any local cache of the data
+ * protected by this lock and reread it from disk.
+ */
+ r->version = r_version;
+
+ /*
+ * r is vglk: tell lvmetad to set the vg invalid
+ * flag, and provide the new r_version. If lvmetad finds
+ * that its cached vg has seqno less than the value
+ * we send here, it will set the vg invalid flag.
+ * lvm commands that read the vg from lvmetad, will
+ * see the invalid flag returned, will reread the
+ * vg from disk, update the lvmetad copy, and go on.
+ *
+ * r is global: tell lvmetad to set the global invalid
+ * flag. When commands see this flag returned from lvmetad,
+ * they will reread metadata from disk, update the lvmetad
+ * caches, and tell lvmetad to set global invalid to 0.
+ */
+
+ if ((r->type == LD_RT_VG) && lvmetad_connected) {
+ daemon_reply reply;
+ char *uuid;
+
+ log_debug("S %s R %s res_lock set lvmetad vg version %u",
+ ls->name, r->name, r_version);
+
+ if (!ls->vg_uuid[0] || !strcmp(ls->vg_uuid, "none"))
+ uuid = ls->name;
+ else
+ uuid = ls->vg_uuid;
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "set_vg_info",
+ "token = %s", "skip",
+ "uuid = %s", uuid,
+ "version = %d", (int)r_version,
+ NULL);
+ pthread_mutex_unlock(&lvmetad_mutex);
+ /* TODO: check reply? */
+ daemon_reply_destroy(reply);
+ }
+
+ if ((r->type == LD_RT_GL) && lvmetad_connected) {
+ daemon_reply reply;
+
+ log_debug("S %s R %s res_lock set lvmetad global invalid",
+ ls->name, r->name);
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "set_global_info",
+ "token = %s", "skip",
+ "global_invalid = %d", 1,
+ NULL);
+ pthread_mutex_unlock(&lvmetad_mutex);
+ /* TODO: check reply? */
+ daemon_reply_destroy(reply);
+ }
+ }
+
+ if ((r->type == LD_RT_GL) && (n_version > ls->names_version)) {
+ /*
+ * Set a flag that will cause update_local_vgs to be run
+ * when the gl is unlocked (by queueing an UPDATE_LOCK action).
+ * It needs to happen on unlock because lvmetad needs to be updated
+ * by the command before there is an updated vg list to be read.
+ */
+ log_debug("S %s gl res_lock set update_local_vgs", ls->name);
+ ls->update_local_vgs = 1;
+ ls->names_version = n_version;
+ }
+
+ if ((r->type == LD_RT_GL) && (act->flags & LD_AF_UPDATE_NAMES_VERSION)) {
+ /*
+ * Set a flag that will cause the ls->names_version to be
+ * incremented and written to the gl lvb n_version when
+ * the gl is unlocked.
+ * Other hosts will eventually take the gl lock, see the new
+ * n_version and run update_local_vgs.
+ */
+ log_debug("S %s gl res_lock set update_names_version", ls->name);
+ ls->update_names_version = 1;
+ }
+
+ r->mode = act->mode;
+
+add_lk:
+ if (r->mode == LD_LK_SH)
+ r->sh_count++;
+
+ lk = alloc_lock();
+ if (!lk) {
+ /* TODO */
+ log_error("res_lock ENOMEM");
+ return -ENOMEM;
+ }
+
+ lk->client_id = act->client_id;
+ lk->mode = act->mode;
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ lk->flags |= LD_LF_PERSISTENT;
+ lk->client_id = 0;
+ }
+
+ list_add_tail(&lk->list, &r->locks);
+
+ return 0;
+}
+
+static int res_convert(struct lockspace *ls, struct resource *r,
+ struct lock *lk, struct action *act)
+{
+ uint32_t r_version;
+ int rv;
+
+ log_debug("S %s R %s res_convert mode %d", ls->name, r->name, act->mode);
+
+ if (act->mode == LD_LK_EX && lk->mode == LD_LK_SH && r->sh_count > 1)
+ return -EAGAIN;
+
+ /*
+ * lm_convert() writes new version (from ex)
+ * Same as lm_unlock()
+ */
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ lk->version = r->version;
+ r_version = r->version;
+ log_debug("S %s R %s res_convert r_version inc %u",
+ ls->name, r->name, r_version);
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
+ r->version = lk->version;
+ r_version = r->version;
+ log_debug("S %s R %s res_convert r_version new %u", ls->name, r->name, r_version);
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_convert(ls, r, act->mode, act, r_version);
+ if (rv < 0) {
+ log_error("S %s R %s res_convert lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_convert lm done", ls->name, r->name);
+
+ if (lk->mode == LD_LK_EX && act->mode == LD_LK_SH) {
+ r->sh_count = 1;
+ } else if (lk->mode == LD_LK_SH && act->mode == LD_LK_EX) {
+ r->sh_count = 0;
+ } else {
+ /* should not be possible */
+ log_error("S %s R %s res_convert invalid modes %d %d",
+ ls->name, r->name, lk->mode, act->mode);
+ return -1;
+ }
+
+ r->mode = act->mode;
+ lk->mode = act->mode;
+
+ return 0;
+}
+
+static int res_cancel(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct action *cact;
+
+ /*
+ * a client can cancel its own non-persistent lock requests,
+ * when could this happen?
+ *
+ * a client can cancel other client's persistent lock requests,
+ * when could this happen?
+ */
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ list_for_each_entry(cact, &r->actions, list) {
+ if (!(cact->flags & LD_AF_PERSISTENT))
+ continue;
+ goto do_cancel;
+ }
+ } else {
+ cact = find_action_client(r, act->client_id);
+ if (cact)
+ goto do_cancel;
+ }
+
+ return -ENOENT;
+
+do_cancel:
+ log_debug("S %s R %s res_cancel client %d", ls->name, r->name, cact->client_id);
+ cact->result = -ECANCELED;
+ list_del(&cact->list);
+ add_client_result(cact);
+
+ return -ECANCELED;
+}
+
+/*
+ * lm_unlock() writes new a r_version (from ex)
+ *
+ * The r_version of the vg resource is incremented if
+ * an "update" was received for the vg lock. The update
+ * contains the new vg seqno from the vg metadata which is
+ * used as the r_version.
+ *
+ * The r_version of the global resource is automatically
+ * incremented when it is unlocked from ex mode.
+ *
+ * For the global resource, n_version is used in addition
+ * to r_version:
+ *
+ * r_version is incremented every time a command releases
+ * the global lock from ex.
+ *
+ * n_version is incremented every time a command that
+ * changes the list of vg names releases the global lock from ex.
+ *
+ * Changes to n_version are used by hosts to detect that other
+ * hosts have added/removed/renamed local (non-lockd) vgs which
+ * can be seen by multiple hosts, so the local_vgs list probably
+ * needs to be updated. lvmlockd knows about changes to lockd-type
+ * vgs through their locks, but local vgs do not have locks,
+ * so the n_version change is the only way to know that the
+ * local_vgs list should be updated.
+ */
+
+/*
+ * persistent locks will not be unlocked for OP_CLOSE/act_close
+ * because act_close->flags does not have the PERSISTENT flag
+ * set, and a persistent lk->client_id is zero, which will not
+ * match the client in act_close->client_id.
+ */
+
+static int res_unlock(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct lock *lk;
+ uint32_t r_version;
+ uint32_t n_version = 0;
+ int rv;
+
+ if (act->flags & LD_AF_PERSISTENT) {
+ lk = find_lock_persistent(r);
+ if (lk)
+ goto do_unlock;
+ } else {
+ lk = find_lock_client(r, act->client_id);
+ if (lk)
+ goto do_unlock;
+ }
+
+ if (act->op != LD_OP_CLOSE)
+ log_error("S %s R %s res_unlock no locks", ls->name, r->name);
+ return -ENOENT;
+
+do_unlock:
+ log_debug("S %s R %s res_unlock %s", ls->name, r->name,
+ (act->op == LD_OP_CLOSE) ? "from close" : "");
+
+ /* send unlock to lm when last sh lock is unlocked */
+ if (lk->mode == LD_LK_SH) {
+ r->sh_count--;
+ if (r->sh_count > 0)
+ goto rem_lk;
+ }
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ lk->version = r->version;
+ r_version = r->version;
+
+ log_debug("S %s R %s res_unlock r_version inc %u", ls->name, r->name, r_version);
+
+ if (ls->update_names_version) {
+ ls->names_version++;
+ n_version = ls->names_version;
+ log_debug("S %s gl res_unlock got update_names_version %u",
+ ls->name, n_version);
+ }
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk->version > r->version)) {
+ r->version = lk->version;
+ r_version = r->version;
+
+ log_debug("S %s R %s res_unlock r_version new %u",
+ ls->name, r->name, r_version);
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_unlock(ls, r, act, r_version, n_version, 0);
+ if (rv < 0) {
+ /* should never happen, retry? */
+ log_error("S %s R %s res_unlock lm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ log_debug("S %s R %s res_unlock lm done", ls->name, r->name);
+
+ if ((r->type == LD_RT_GL) && (ls->update_local_vgs || ls->update_names_version)) {
+ log_debug("S %s gl res_unlock got update_local_vgs %d update_names_version %d",
+ ls->name, ls->update_local_vgs, ls->update_names_version);
+ ls->update_local_vgs = 0;
+ ls->update_names_version = 0;
+ create_work_action(LD_OP_UPDATE_LOCAL);
+ }
+
+rem_lk:
+ list_del(&lk->list);
+ free_lock(lk);
+
+ /*
+ * TODO: if unlock isn't synchronous, and next lock runs into
+ * it, what will the effect be?
+ */
+
+ if (list_empty(&r->locks))
+ r->mode = LD_LK_UN;
+
+ return 0;
+}
+
+static int res_update(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ struct lock *lk;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk) {
+ log_error("S %s R %s res_update client %u lock not found",
+ ls->name, r->name, act->client_id);
+ return -ENOENT;
+ }
+
+ if (r->mode != LD_LK_EX) {
+ log_error("S %s R %s res_update version on non-ex lock",
+ ls->name, r->name);
+ return -EINVAL;
+ }
+
+ /* lk version will be written to lm by unlock */
+
+ /* TODO: try to write it to lm here in some cases?
+ * when a SYNC flag is set for update? */
+
+ if (act->flags & LD_AF_NEXT_VERSION)
+ lk->version = r->version + 1;
+ else
+ lk->version = act->version;
+
+ log_debug("S %s R %s res_update lk version to %u", ls->name, r->name, lk->version);
+
+ return 0;
+}
+
+static int free_lv(struct lockspace *ls, struct resource *r)
+{
+ if (ls->lm_type == LD_LM_SANLOCK)
+ return lm_free_lv_sanlock(ls, r);
+ else if (ls->lm_type == LD_LM_DLM)
+ return 0;
+ else
+ return -EINVAL;
+}
+
+/*
+ * NB. we can't do this if sanlock is holding any locks on
+ * the resource; we'd be rewriting the resource from under
+ * sanlock and would confuse or break it badly. We don't
+ * know what another host is doing, so these must be used
+ * very carefully.
+ */
+
+static int res_able(struct lockspace *ls, struct resource *r,
+ struct action *act)
+{
+ int rv;
+
+ if (ls->lm_type != LD_LM_SANLOCK) {
+ log_error("enable/disable only applies to sanlock");
+ return -EINVAL;
+ }
+
+ if (r->type != LD_RT_GL) {
+ log_error("enable/disable only applies to global lock");
+ return -EINVAL;
+ }
+
+ if (r->mode != LD_LK_UN) {
+ log_error("enable/disable only allowed on unlocked resource");
+ return -EINVAL;
+ }
+
+ if (act->op == LD_OP_ENABLE && gl_lsname_sanlock[0]) {
+ log_error("disable global lock in %s before enable in %s",
+ gl_lsname_sanlock, ls->name);
+ return -EINVAL;
+ }
+
+ if ((act->op == LD_OP_DISABLE) && (act->flags & LD_AF_EX_DISABLE)) {
+ rv = lm_ex_disable_gl_sanlock(ls);
+ goto out;
+ }
+
+ rv = lm_able_gl_sanlock(ls, act->op == LD_OP_ENABLE);
+out:
+ return rv;
+}
+
+/*
+ * Go through queued actions, and make lock/unlock calls on the resource
+ * based on the actions and the existing lock state.
+ *
+ * All lock operations sent to the lock manager are non-blocking.
+ * This is because sanlock does not support lock queueing.
+ * Eventually we could enhance this to take advantage of lock
+ * queueing when available (i.e. for the dlm).
+ *
+ * act_close_list: list of CLOSE actions, identifying clients that have
+ * closed/terminated their lvmlockd connection, and whose locks should
+ * be released. Do not remove these actions from act_close_list.
+ *
+ * retry_out: set to 1 if the lock manager said we should retry,
+ * meaning we should call res_process() again in a short while to retry.
+ */
+
+static void res_process(struct lockspace *ls, struct resource *r,
+ struct list_head *act_close_list, int *retry_out)
+{
+ struct action *act, *safe, *act_close;
+ struct lock *lk;
+ int lm_retry;
+ int rv;
+
+ /*
+ * handle version updates for ex locks
+ * (new version will be written by unlock)
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_UPDATE) {
+ rv = res_update(ls, r, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * handle explicit unlock actions
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if ((act->op == LD_OP_LOCK) &&
+ (act->mode == LD_LK_IV || act->mode == LD_LK_NL)) {
+ act->result = -EINVAL;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
+ rv = res_unlock(ls, r, act);
+
+ if (rv == -ENOENT && (act->flags & LD_AF_UNLOCK_CANCEL))
+ rv = res_cancel(ls, r, act);
+
+ /*
+ * possible unlock results:
+ * 0: unlock succeeded
+ * -ECANCELED: cancel succeeded
+ * -ENOENT: nothing to unlock or cancel
+ */
+
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * handle implicit unlocks due to client exit,
+ * also clear any outstanding actions for the client
+ */
+
+ list_for_each_entry(act_close, act_close_list, list) {
+ res_unlock(ls, r, act_close);
+ res_cancel(ls, r, act_close);
+ }
+
+ /*
+ * handle freeing a lock for an lv that has been removed
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_FREE && act->rt == LD_RT_LV) {
+ log_debug("S %s R %s free_lv", ls->name, r->name);
+ rv = free_lv(ls, r);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ goto r_free;
+
+ }
+ }
+
+ /*
+ * handle enable/disable
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE) {
+ rv = res_able(ls, r, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ if (!rv && act->op == LD_OP_DISABLE) {
+ log_debug("S %s R %s free disabled", ls->name, r->name);
+ goto r_free;
+ }
+ }
+
+ /*
+ * transient requests on existing transient locks
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ continue;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* convert below */
+ /*
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ */
+ continue;
+ } else {
+ /* success */
+ act->result = -EALREADY;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * persistent requests on existing persistent locks
+ *
+ * persistent locks are not owned by a client, so any
+ * existing with matching mode satisfies a request.
+ * only one persistent lock is kept on a resource.
+ * a single "unowned" persistent lock satisfies
+ * any/multiple client requests for a persistent lock.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (!(act->flags & LD_AF_PERSISTENT))
+ continue;
+
+ lk = find_lock_persistent(r);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* convert below */
+ /*
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ */
+ continue;
+ } else {
+ /* success */
+ act->result = -EALREADY;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * transient requests with existing persistent locks
+ *
+ * Just grant the transient request and do not
+ * keep a record of it. Assume that the persistent
+ * lock will not go away while the transient lock
+ * is needed.
+ *
+ * TODO: define exactly when this can be used,
+ * because there are a number of cases where it
+ * will not work: updating version number (lv
+ * locks have none), ex locks from multiple
+ * clients will not conflict, explicit un of the
+ * transient lock will fail.
+ *
+ * This would be used when an ex, persistent lv lock
+ * exists from activation, and then something like
+ * lvextend asks for ex lock to change the lv. The
+ * lv could not be unlocked by deactivation while
+ * the lvextend was running.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ continue;
+
+ lk = find_lock_persistent(r);
+ if (!lk)
+ continue;
+
+ if ((lk->mode == LD_LK_EX) ||
+ (lk->mode == LD_LK_SH && act->mode == LD_LK_SH)) {
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ } else {
+ /* persistent lock is sh, transient request is ex */
+ /* is this case needed? do a convert here? */
+ log_debug("res_process %s existing persistent lock new transient", r->name);
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * persistent requests with existing transient locks
+ *
+ * If a client requests a P lock for a T lock it already
+ * holds, we can just change T to P. Fail if the same
+ * happens for locks from different clients. Changing
+ * another client's lock from T to P may cause problems
+ * if that client tries to unlock or update version.
+ *
+ * This would be used in a case like vgchange --lock-vg ex vgname
+ * where a transient vg lock was acquired to read the vg,
+ * then the command wants to acquire a persistent lock.
+ * The command could instead unlock, then relock in the mode
+ * it wants, so this case may not be necessary. Or, lvmlockd
+ * could itself attempt a lock conversion by unlock+relock.
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (!(act->flags & LD_AF_PERSISTENT))
+ continue;
+
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode != act->mode) {
+ /* TODO: convert and change to persistent? */
+ log_debug("res_process %s existing transient lock new persistent", r->name);
+ act->result = -EEXIST;
+ list_del(&act->list);
+ add_client_result(act);
+ } else {
+ lk->flags |= LD_LF_PERSISTENT;
+ lk->client_id = 0;
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+
+ /*
+ * convert mode of existing locks
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->flags & LD_AF_PERSISTENT)
+ lk = find_lock_persistent(r);
+ else
+ lk = find_lock_client(r, act->client_id);
+ if (!lk)
+ continue;
+
+ if (lk->mode == act->mode) {
+ /* should never happen, should be found above */
+ log_error("convert same mode");
+ continue;
+ }
+
+ /* convert fails immediately, no EAGAIN retry */
+ rv = res_convert(ls, r, lk, act);
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ /*
+ * Cases above are all requests addressed by existing locks.
+ * Below handles the rest. Transient and persistent are
+ * handled the same, except
+ * - if mode of existing lock is incompat with requested,
+ * leave the act on r->actions
+ * - if r mode is EX, any lock action is blocked, just quit
+ */
+
+ if (r->mode == LD_LK_EX)
+ return;
+
+ /*
+ * Retry a lock request that fails due to a lock conflict (-EAGAIN):
+ * if we have not exceeded max retries and lm sets lm_retry (sanlock
+ * transient conflicts from shared lock implementation), or r type
+ * is gl or vg (transient real conflicts we want to hide from command).
+ * lv lock conflicts won't be transient so don't retry them.
+ */
+
+ /*
+ * r mode is SH or UN, pass lock-sh actions to lm
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ /* grant in order, so break here */
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX)
+ break;
+
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_SH) {
+ lm_retry = 0;
+
+ rv = res_lock(ls, r, act, &lm_retry);
+ if ((rv == -EAGAIN) &&
+ (act->retries <= act->max_retries) &&
+ (lm_retry || (r->type != LD_RT_LV))) {
+ /* leave act on list */
+ log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
+ act->retries++;
+ *retry_out = 1;
+ } else {
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ if (rv == -EUNATCH)
+ goto r_free;
+ }
+ }
+
+ /*
+ * r mode is SH, any ex lock action is blocked, just quit
+ */
+
+ if (r->mode == LD_LK_SH)
+ return;
+
+ /*
+ * r mode is UN, pass lock-ex action to lm
+ */
+
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ if (act->op == LD_OP_LOCK && act->mode == LD_LK_EX) {
+ lm_retry = 0;
+
+ rv = res_lock(ls, r, act, &lm_retry);
+ if ((rv == -EAGAIN) &&
+ (act->retries <= act->max_retries) &&
+ (lm_retry || (r->type != LD_RT_LV))) {
+ /* leave act on list */
+ log_debug("S %s R %s res_lock EAGAIN retry", ls->name, r->name);
+ act->retries++;
+ *retry_out = 1;
+ } else {
+ act->result = rv;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ if (rv == -EUNATCH)
+ goto r_free;
+ break;
+ }
+ }
+
+ return;
+
+r_free:
+ /* For the EUNATCH case it may be possible there are queued actions? */
+ list_for_each_entry_safe(act, safe, &r->actions, list) {
+ log_error("S %s R %s res_process r_free cancel %s client %d",
+ ls->name, r->name, op_str(act->op), act->client_id);
+ act->result = -ECANCELED;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ log_debug("S %s R %s res_process free", ls->name, r->name);
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+}
+
+#define LOCKS_EXIST_ANY 1
+#define LOCKS_EXIST_GL 2
+#define LOCKS_EXIST_VG 3
+#define LOCKS_EXIST_LV 4
+
+static int for_each_lock(struct lockspace *ls, int locks_do)
+{
+ struct resource *r;
+ struct lock *lk;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ list_for_each_entry(lk, &r->locks, list) {
+ if (locks_do == LOCKS_EXIST_ANY)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_GL && r->type == LD_RT_GL)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_VG && r->type == LD_RT_VG)
+ return 1;
+
+ if (locks_do == LOCKS_EXIST_LV && r->type == LD_RT_LV)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int clear_locks(struct lockspace *ls, int free_vg)
+{
+ struct resource *r, *r_safe;
+ struct lock *lk, *lk_safe;
+ struct action *act, *act_safe;
+ uint32_t lk_version;
+ uint32_t r_version;
+ int lk_count = 0;
+ int rv;
+
+ list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
+ lk_version = 0;
+
+ list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
+ lk_count++;
+
+ if (lk->flags & LD_LF_PERSISTENT)
+ log_error("S %s R %s clear lock persistent", ls->name, r->name);
+ else
+ log_error("S %s R %s clear lock client %d", ls->name, r->name, lk->client_id);
+
+ if (lk->version > lk_version)
+ lk_version = lk->version;
+
+ list_del(&lk->list);
+ free_lock(lk);
+ }
+
+ if (r->mode == LD_LK_UN)
+ goto r_free;
+
+ if ((r->type == LD_RT_GL) && (r->mode == LD_LK_EX)) {
+ r->version++;
+ r_version = r->version;
+ log_debug("S %s R %s clear_locks r_version inc %u",
+ ls->name, r->name, r_version);
+
+ } else if ((r->type == LD_RT_VG) && (r->mode == LD_LK_EX) && (lk_version > r->version)) {
+ r->version = lk_version;
+ r_version = r->version;
+ log_debug("S %s R %s clear_locks r_version new %u",
+ ls->name, r->name, r_version);
+
+ } else {
+ r_version = 0;
+ }
+
+ rv = lm_unlock(ls, r, NULL, r_version, 0, free_vg ? LMUF_FREE_VG : 0);
+ if (rv < 0) {
+ /* should never happen */
+ log_error("S %s R %s clear_locks free %d lm unlock error %d",
+ ls->name, r->name, free_vg, rv);
+ }
+
+ list_for_each_entry_safe(act, act_safe, &r->actions, list) {
+ log_error("S %s R %s clear_locks cancel %s client %d",
+ ls->name, r->name, op_str(act->op), act->client_id);
+ act->result = -ECANCELED;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ r_free:
+ log_debug("S %s R %s free", ls->name, r->name);
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+ }
+
+ return lk_count;
+}
+
+/*
+ * find and return the resource that is referenced by the action
+ * - there is a single gl resource per lockspace
+ * - there is a single vg resource per lockspace
+ * - there can be many lv resources per lockspace, compare names
+ */
+
+static struct resource *find_resource_act(struct lockspace *ls,
+ struct action *act,
+ int nocreate)
+{
+ struct resource *r;
+
+ list_for_each_entry(r, &ls->resources, list) {
+ if (r->type != act->rt)
+ continue;
+
+ if (r->type == LD_RT_GL && act->rt == LD_RT_GL)
+ return r;
+
+ if (r->type == LD_RT_VG && act->rt == LD_RT_VG)
+ return r;
+
+ if (r->type == LD_RT_LV && act->rt == LD_RT_LV &&
+ !strcmp(r->name, act->lv_name))
+ return r;
+ }
+
+ if (nocreate)
+ return NULL;
+
+ r = alloc_resource();
+ if (!r)
+ return NULL;
+
+ r->type = act->rt;
+
+ r->mode = LD_LK_UN;
+
+ if (r->type == LD_RT_GL)
+ strncpy(r->name, R_NAME_GL, MAX_NAME);
+ else if (r->type == LD_RT_VG)
+ strncpy(r->name, R_NAME_VG, MAX_NAME);
+ else if (r->type == LD_RT_LV)
+ strncpy(r->name, act->lv_name, MAX_NAME);
+
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->actions);
+
+ list_add_tail(&r->list, &ls->resources);
+
+ return r;
+}
+
+static void free_ls_resources(struct lockspace *ls)
+{
+ struct resource *r, *r_safe;
+
+ list_for_each_entry_safe(r, r_safe, &ls->resources, list) {
+ lm_rem_resource(ls, r);
+ list_del(&r->list);
+ free_resource(r);
+ }
+}
+
+/*
+ * Process actions queued for this lockspace by
+ * client_recv_action / add_lock_action.
+ *
+ * The lockspace_thread can touch its own ls struct without holding
+ * lockspaces_mutex until it sets ls->thread_done, after which it
+ * cannot touch ls without holding lockspaces_mutex.
+ */
+
+#define LOCK_RETRY_MS 1000 /* milliseconds to delay between retry */
+
+static void *lockspace_thread_main(void *arg_in)
+{
+ struct lockspace *ls = arg_in;
+ struct resource *r, *r2;
+ struct action *add_act, *act, *safe;
+ struct list_head tmp_act;
+ struct list_head act_close;
+ int free_vg = 0;
+ int error = 0;
+ int retry;
+ int rv;
+
+ INIT_LIST_HEAD(&act_close);
+
+ /* first action may be client add */
+ pthread_mutex_lock(&ls->mutex);
+ act = NULL;
+ add_act = NULL;
+ if (!list_empty(&ls->actions)) {
+ act = list_first_entry(&ls->actions, struct action, list);
+ if (act->op == LD_OP_START) {
+ add_act = act;
+ list_del(&add_act->list);
+ }
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ log_debug("S %s lm_add_lockspace %s", ls->name, lm_str(ls->lm_type));
+
+ if (add_act && !(add_act->flags & LD_AF_WAIT)) {
+ /* send partial join result back to client */
+ add_act->result = 0;
+ pthread_mutex_lock(&client_mutex);
+ list_add_tail(&add_act->list, &client_results);
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+ add_act = NULL;
+ }
+
+ /* the lm join can take a while */
+
+ error = lm_add_lockspace(ls, add_act);
+
+ log_debug("S %s lm_add_lockspace done %d", ls->name, error);
+
+ if (ls->sanlock_gl_enabled && gl_lsname_sanlock[0] &&
+ strcmp(ls->name, gl_lsname_sanlock))
+ sanlock_gl_dup = 1;
+
+ if (add_act) {
+ /* send synchronous join result back to client */
+ add_act->result = error;
+ pthread_mutex_lock(&client_mutex);
+ list_add_tail(&add_act->list, &client_results);
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (error) {
+ ls->thread_stop = 1;
+ ls->create_fail = 1;
+ } else {
+ ls->create_done = 1;
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ if (error)
+ goto out_act;
+
+ while (1) {
+ pthread_mutex_lock(&ls->mutex);
+ while (!ls->thread_work) {
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ goto out_rem;
+ }
+ pthread_cond_wait(&ls->cond, &ls->mutex);
+ }
+
+ /* client thread queues actions on ls->actions, we move
+ ls->actions to r->actions, then process the resources */
+
+ while (1) {
+ if (list_empty(&ls->actions)) {
+ ls->thread_work = 0;
+ break;
+ }
+
+ act = list_first_entry(&ls->actions, struct action, list);
+
+ if (sanlock_gl_dup && ls->sanlock_gl_enabled)
+ act->flags |= LD_AF_DUP_GL_LS;
+
+ if (act->op == LD_OP_STOP) {
+ ls->thread_work = 0;
+ break;
+ }
+
+ if (act->op == LD_OP_FREE && act->rt == LD_RT_VG) {
+ /* vgremove */
+ log_debug("S %s checking for lockspace hosts", ls->name);
+ rv = lm_hosts(ls, 1);
+ if (rv) {
+ /*
+ * Checking for hosts here in addition to after the
+ * main loop allows vgremove to fail and be rerun
+ * after the ls is stopped on other hosts.
+ */
+ log_error("S %s lockspace hosts %d", ls->name, rv);
+ list_del(&act->list);
+ act->result = -EBUSY;
+ add_client_result(act);
+ continue;
+ }
+ ls->thread_work = 0;
+ ls->thread_stop = 1;
+ free_vg = 1;
+ break;
+ }
+
+ list_del(&act->list);
+
+ /* applies to all resources */
+ if (act->op == LD_OP_CLOSE) {
+ list_add(&act->list, &act_close);
+ continue;
+ }
+
+ /*
+ * Find the specific resource this action refers to;
+ * creates resource if not found.
+ */
+
+ r = find_resource_act(ls, act, (act->op == LD_OP_FREE) ? 1 : 0);
+ if (!r) {
+ act->result = (act->op == LD_OP_FREE) ? -ENOENT : -ENOMEM;
+ add_client_result(act);
+ continue;
+ }
+
+ list_add_tail(&act->list, &r->actions);
+
+ log_debug("S %s R %s action %s %s", ls->name, r->name,
+ op_str(act->op), mode_str(act->mode));
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ retry = 0;
+
+ list_for_each_entry_safe(r, r2, &ls->resources, list)
+ res_process(ls, r, &act_close, &retry);
+
+ list_for_each_entry_safe(act, safe, &act_close, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ if (retry) {
+ ls->thread_work = 1;
+ usleep(LOCK_RETRY_MS * 1000);
+ }
+ }
+
+out_rem:
+ log_debug("S %s stopping", ls->name);
+
+ /*
+ * For sanlock, we need to unlock any existing locks
+ * before removing the lockspace, otherwise the sanlock
+ * daemon will kill us when the lockspace goes away.
+ * For dlm, we leave with force, so all locks will
+ * automatically be dropped when we leave the lockspace,
+ * so unlocking all before leaving could be skipped.
+ *
+ * Blindly dropping all existing locks must only be
+ * allowed in emergency/force situations, otherwise it's
+ * obviously dangerous, since the lock holders are still
+ * operating under the assumption that they hold the lock.
+ *
+ * For vgremove of a sanlock vg, the vg lock will be held,
+ * and possibly the gl lock if this vg holds the gl.
+ * sanlock vgremove wants to unlock-rename these locks.
+ */
+
+ log_debug("S %s clearing locks", ls->name);
+
+ rv = clear_locks(ls, free_vg);
+
+ /*
+ * Tell any other hosts in the lockspace to leave it
+ * before we remove it (for vgremove). We do this
+ * before leaving the lockspace ourself because we
+ * need to be in the lockspace to see others.
+ */
+
+ if (free_vg) {
+ log_debug("S %s checking for lockspace hosts", ls->name);
+ rv = lm_hosts(ls, 1);
+ if (rv)
+ log_error("S %s other lockspace hosts %d", ls->name, rv);
+ }
+
+ /*
+ * Leave the lockspace.
+ */
+
+ rv = lm_rem_lockspace(ls, NULL, free_vg);
+
+ log_debug("S %s rem_lockspace done %d", ls->name, rv);
+
+out_act:
+ /*
+ * Move remaining actions to results; this will usually (always?)
+ * be only the stop action.
+ */
+ INIT_LIST_HEAD(&tmp_act);
+
+ pthread_mutex_lock(&ls->mutex);
+ list_for_each_entry_safe(act, safe, &ls->actions, list) {
+ if (act->op == LD_OP_FREE)
+ act->result = 0;
+ else if (act->op == LD_OP_STOP)
+ act->result = 0;
+ else
+ act->result = -ENOLS;
+ list_del(&act->list);
+ list_add_tail(&act->list, &tmp_act);
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ pthread_mutex_lock(&client_mutex);
+ list_for_each_entry_safe(act, safe, &tmp_act, list) {
+ list_del(&act->list);
+ list_add_tail(&act->list, &client_results);
+ }
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls->thread_done = 1;
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /*
+ * worker_thread will join this thread, and move the
+ * ls struct from lockspaces list to lockspaces_inactive.
+ */
+ pthread_mutex_lock(&worker_mutex);
+ worker_wake = 1;
+ pthread_cond_signal(&worker_cond);
+ pthread_mutex_unlock(&worker_mutex);
+
+ return NULL;
+}
+
+static void process_local_ls(struct lockspace *ls)
+{
+ struct resource *r = list_first_entry(&ls->resources, struct resource, list);
+ struct action *act, *act_safe;
+ struct lock *lk;
+ int prev_mode;
+ int result;
+
+ list_for_each_entry_safe(act, act_safe, &ls->actions, list) {
+ if (act->op != LD_OP_LOCK)
+ continue;
+ if (act->mode != LD_LK_UN)
+ continue;
+
+ result = -ENOENT;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ if (lk->client_id != act->client_id)
+ continue;
+ list_del(&lk->list);
+ free_lock(lk);
+ result = 0;
+ break;
+ }
+
+ act->result = result;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ prev_mode = LD_LK_UN;
+
+ if (!list_empty(&r->locks)) {
+ lk = list_first_entry(&r->locks, struct lock, list);
+ if (lk->mode == LD_LK_EX)
+ return;
+
+ /* sanity check */
+ if (lk->mode != LD_LK_SH) {
+ log_error("process_local_ls bad lk mode %d", lk->mode);
+ return;
+ }
+
+ prev_mode = LD_LK_SH;
+ }
+
+ /* grant lock requests until we reach one that's one not compat with prev_mode */
+
+ list_for_each_entry_safe(act, act_safe, &ls->actions, list) {
+
+ if (act->mode == LD_LK_EX && prev_mode == LD_LK_UN) {
+ /* grant it and return because no more can be granted */
+
+ lk = alloc_lock();
+ if (!lk)
+ return;
+
+ lk->client_id = act->client_id;
+ lk->mode = LD_LK_EX;
+ list_add(&lk->list, &r->locks);
+
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ return;
+
+ } else if (act->mode == LD_LK_EX && prev_mode == LD_LK_SH) {
+
+ /* we'll process this act and try to grant it the
+ next we come through here. */
+
+ return;
+
+ } else if (act->mode == LD_LK_SH) {
+ prev_mode = LD_LK_SH;
+
+ /* grant it and continue */
+
+ lk = alloc_lock();
+ if (!lk)
+ return;
+
+ lk->client_id = act->client_id;
+ lk->mode = LD_LK_SH;
+ list_add_tail(&lk->list, &r->locks);
+
+ act->result = 0;
+ list_del(&act->list);
+ add_client_result(act);
+ }
+ }
+}
+
+static void purge_local_client(uint32_t client_id)
+{
+ struct lockspace *ls;
+ struct resource *r;
+ struct lock *lk, *lk_safe;
+ struct action *act, *act_safe;
+
+ list_for_each_entry(ls, &local_vgs, list) {
+ r = list_first_entry(&ls->resources, struct resource, list);
+
+ list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
+ if (lk->client_id != client_id)
+ continue;
+ list_del(&lk->list);
+ free_lock(lk);
+ }
+
+ list_for_each_entry_safe(act, act_safe, &ls->actions, list) {
+ if (act->client_id != client_id)
+ continue;
+ list_del(&act->list);
+ free_action(act);
+ }
+ }
+}
+
+static void *local_thread_main(void *arg_in)
+{
+ struct lockspace *ls;
+ struct action *act, *act_safe;
+
+ while (1) {
+ pthread_mutex_lock(&local_thread_mutex);
+ while (!local_thread_work) {
+ if (local_thread_stop) {
+ pthread_mutex_unlock(&local_thread_mutex);
+ goto out;
+ }
+ pthread_cond_wait(&local_thread_cond, &local_thread_mutex);
+ }
+
+ /* close actions: clear all locks and actions in all lockspaces for client */
+ list_for_each_entry_safe(act, act_safe, &local_thread_actions, list) {
+ if (act->op != LD_OP_CLOSE)
+ continue;
+ purge_local_client(act->client_id);
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ list_for_each_entry(ls, &local_vgs, list) {
+ if (list_empty(&ls->actions))
+ continue;
+ process_local_ls(ls);
+ }
+
+ local_thread_work = 0;
+ pthread_mutex_unlock(&local_thread_mutex);
+ }
+out:
+ return NULL;
+}
+
+int lockspaces_empty(void)
+{
+ int rv;
+ pthread_mutex_lock(&lockspaces_mutex);
+ rv = list_empty(&lockspaces);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return rv;
+}
+
+/*
+ * lockspaces_mutex is locked
+ *
+ * When duplicate sanlock global locks have been seen,
+ * this function has a secondary job of counting the
+ * number of lockspaces that exist with the gl enabled,
+ * with the side effect of setting sanlock_gl_dup back to
+ * zero when the duplicates have been removed/disabled.
+ */
+
+static struct lockspace *find_lockspace_name(char *ls_name)
+{
+ struct lockspace *ls_found = NULL;
+ struct lockspace *ls;
+ int gl_count = 0;
+
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (!strcmp(ls->name, ls_name))
+ ls_found = ls;
+
+ if (!sanlock_gl_dup && ls_found)
+ return ls_found;
+
+ if (sanlock_gl_dup && ls->sanlock_gl_enabled)
+ gl_count++;
+ }
+
+ /* this is the side effect we want from this function */
+ if (sanlock_gl_dup && gl_count < 2)
+ sanlock_gl_dup = 0;
+
+ return ls_found;
+}
+
+/* local_thread_mutex is locked */
+static struct lockspace *find_local_vg(const char *name, const char *uuid)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &local_vgs, list) {
+ if (name && name[0] && !strcmp(ls->vg_name, name))
+ return ls;
+ if (uuid && uuid[0] && !strcmp(ls->vg_uuid, uuid))
+ return ls;
+ }
+ return NULL;
+}
+
+/*
+ * vgcreate/vgremove of local vgs do add_local/rem_local which
+ * updates local_vgs on the local host. Other hosts' local_vgs
+ * are updated with these changes asynchronously when they see
+ * the n_version change in the global lock lvb, and do
+ * update_local_vgs.
+ *
+ * So, the global lock n_version and update_local_vgs is about
+ * asyncronous propagation of add_local/rem_local to other hosts.
+ * Because these are local vgs, they are not used concurrently
+ * by multiple hosts, but will be used only by the host in the
+ * vg's system_id, which is doing the add_local/rem_local.
+ *
+ * A local vg created on host1 does not need to be immediately
+ * usable on host2, and is not locked between hosts anyway.
+ * So, returning a not found error on host2 for a while will
+ * be ok. Once node2 asynchronously updates local_vgs, it
+ * would know about a new local vg created on host1. Then
+ * lockd_vg on this vg would change from "not found" ENOLS
+ * (as above) to -EOTHERVG (or ELOCALVG if no sysid is set,
+ * but hosts shouldn't be actively sharing a vg with no
+ * lock_type, so an async delay in this case is not a problem.)
+ */
+
+/* local_thread_mutex is locked */
+static void add_local_vg(const char *vg_name, const char *vg_uuid, const char *vg_sysid)
+{
+ struct lockspace *ls;
+ struct resource *r;
+
+ /* not really a lockspace, we're just reusing the struct */
+
+ if (!vg_name || !vg_uuid || !vg_name[0] || !vg_uuid[0]) {
+ log_error("add_local_vg incomplete %s %s",
+ vg_name ? vg_name : "no-name",
+ vg_uuid ? vg_uuid : "no-uuid");
+
+ return;
+ }
+
+ ls = find_local_vg(vg_name, vg_uuid);
+ if (ls) {
+ if (vg_sysid && ls->vg_sysid[0] && !strcmp(vg_sysid, "none")) {
+ log_debug("add_local_vg %s %s clear sysid", vg_name, vg_uuid);
+ memset(&ls->vg_sysid, 0, MAX_NAME);
+ } else if (vg_sysid && strcmp(ls->vg_sysid, vg_sysid)) {
+ log_debug("add_local_vg %s %s update %s", vg_name, vg_uuid, vg_sysid);
+ strncpy(ls->vg_sysid, vg_sysid, MAX_NAME);
+ }
+ return;
+ }
+
+ ls = malloc(sizeof(struct lockspace));
+ if (!ls)
+ return;
+
+ r = alloc_resource();
+ if (!r) {
+ free(ls);
+ return;
+ }
+
+ memset(ls, 0, sizeof(struct lockspace));
+ strncpy(ls->vg_name, vg_name, MAX_NAME);
+ strncpy(ls->vg_uuid, vg_uuid, 64);
+ strncpy(ls->vg_sysid, vg_sysid, MAX_NAME);
+ INIT_LIST_HEAD(&ls->actions);
+ INIT_LIST_HEAD(&ls->resources);
+
+ r->type = LD_RT_VG;
+ r->mode = LD_LK_UN;
+ strncpy(r->name, R_NAME_VG, MAX_NAME);
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->actions);
+ list_add_tail(&r->list, &ls->resources);
+
+ list_add(&ls->list, &local_vgs);
+
+ log_debug("add_local_vg %s %s %s", vg_name, vg_uuid, vg_sysid ?: "");
+}
+
+/* local_thread_mutex is locked */
+static void rem_local_vg(const char *vg_name, const char *vg_uuid)
+{
+ struct lockspace *ls;
+ struct resource *r;
+ struct lock *lk, *lk_safe;
+ struct action *act, *act_safe;
+
+ log_debug("rem_local_vg %s %s", vg_name, vg_uuid);
+
+ ls = find_local_vg(vg_name, vg_uuid);
+ if (!ls)
+ return;
+
+ r = list_first_entry(&ls->resources, struct resource, list);
+
+ list_for_each_entry_safe(lk, lk_safe, &r->locks, list) {
+ list_del(&lk->list);
+ free_lock(lk);
+ }
+
+ list_del(&r->list);
+ free_resource(r);
+
+ list_for_each_entry_safe(act, act_safe, &ls->actions, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ list_del(&ls->list);
+ free(ls);
+}
+
+static struct lockspace *find_update_vg(struct list_head *head, const char *name, const char *uuid)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, head, list) {
+ if (!strcmp(ls->vg_name, name) && !strcmp(ls->vg_uuid, uuid))
+ return ls;
+ }
+ return NULL;
+}
+
+/*
+ * called by worker_thread. the work action is queued when we see that another
+ * host has changed the global lock n_version, which means they have changed the
+ * global vg name list, so our local_vgs list may need updating.
+ *
+ * Handle the issue where a lot of devices all appear together,
+ * pvscan is run for each of them to populate lvmetad, each pvscan
+ * triggers an update_local, and we end up calling this function many
+ * times in a row. We only really need/want one update_local when all
+ * the pvscans are done, and this is a rough approximation of that.
+ * If we're asked to do update_local within one second of the previous run,
+ * then push it off to the delayed work list, so it will be called in a
+ * couple seconds. Ignore more update_local actions while a delayed
+ * update_local action exists. IOW, if we see two quick back to back
+ * update_local actions, delay the second one for a couple seconds in
+ * an attempt to buffer more of them which can be eliminated.
+ */
+
+static uint64_t last_update_local;
+
+static int work_update_local_vgs(void)
+{
+ struct list_head update_vgs;
+ daemon_reply reply;
+ struct dm_config_node *cn;
+ struct dm_config_node *metadata;
+ struct lockspace *lls, *uls, *safe;
+ const char *vg_name;
+ const char *vg_uuid;
+ const char *lock_type;
+ const char *system_id;
+ int mutex_unlocked = 0;
+
+ INIT_LIST_HEAD(&update_vgs);
+
+ if (monotime() - last_update_local <= 1)
+ return -EAGAIN;
+
+ last_update_local = monotime();
+
+ /* get a list of all vg uuids from lvmetad */
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "vg_list",
+ "token = %s", "skip",
+ NULL);
+
+ if (!(cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
+ log_error("work_update_local no vgs");
+ goto out;
+ }
+
+ /* create an update_vgs list of all vg uuids */
+
+ for (cn = cn->child; cn; cn = cn->sib) {
+ vg_uuid = cn->key;
+
+ uls = malloc(sizeof(struct lockspace));
+ if (!uls)
+ goto out;
+
+ memset(uls, 0, sizeof(struct lockspace));
+ strncpy(uls->vg_uuid, vg_uuid, 64);
+ list_add_tail(&uls->list, &update_vgs);
+ log_debug("work_update_local %s", vg_uuid);
+ }
+
+ daemon_reply_destroy(reply);
+
+ /* get vg_name and system_id for each vg uuid entry in update_vgs */
+
+ list_for_each_entry(uls, &update_vgs, list) {
+ reply = daemon_send_simple(lvmetad_handle, "vg_lookup",
+ "token = %s", "skip",
+ "uuid = %s", uls->vg_uuid,
+ NULL);
+
+ vg_name = daemon_reply_str(reply, "name", NULL);
+ if (!vg_name) {
+ log_error("work_update_local %s no name", uls->vg_uuid);
+ goto next;
+ }
+
+ strncpy(uls->vg_name, vg_name, MAX_NAME);
+
+ metadata = dm_config_find_node(reply.cft->root, "metadata");
+ if (!metadata) {
+ log_error("work_update_local %s name %s no metadata",
+ uls->vg_uuid, uls->vg_name);
+ goto next;
+ }
+
+ lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
+ uls->lm_type = str_to_lm(lock_type);
+
+ system_id = dm_config_find_str(metadata, "metadata/system_id", NULL);
+ if (system_id)
+ strncpy(uls->vg_sysid, system_id, MAX_NAME);
+next:
+ daemon_reply_destroy(reply);
+
+ log_debug("work_update_local %s lock_type %s %d sysid %s %s",
+ uls->vg_name, lock_type ?: "NULL", uls->lm_type, uls->vg_sysid, uls->vg_uuid);
+
+ if (!vg_name || !metadata)
+ goto out;
+ }
+ pthread_mutex_unlock(&lvmetad_mutex);
+ mutex_unlocked = 1;
+
+ /* remove local_vgs entries that no longer exist in update_vgs */
+
+ pthread_mutex_lock(&local_thread_mutex);
+
+ list_for_each_entry_safe(lls, safe, &local_vgs, list) {
+ uls = find_update_vg(&update_vgs, lls->vg_name, lls->vg_uuid);
+ if (!uls) {
+ log_debug("work_update_local remove local_vg %s %s",
+ lls->vg_name, lls->vg_uuid);
+ list_del(&lls->list);
+ free(lls);
+
+ } else if (uls->lm_type != LD_LM_NONE) {
+ log_debug("work_update_local remove local_vg %s %s new lm_type %d",
+ lls->vg_name, lls->vg_uuid, uls->lm_type);
+ list_del(&lls->list);
+ free(lls);
+ }
+ }
+
+ /* add local_vgs entries for any new non-lockd entries in update_vgs */
+
+ list_for_each_entry_safe(uls, safe, &update_vgs, list) {
+ if (uls->lm_type != LD_LM_NONE)
+ continue;
+ /* add_local_vg doesn't add any that already exist, it may update sysid */
+ add_local_vg(uls->vg_name, uls->vg_uuid, uls->vg_sysid);
+ }
+ pthread_mutex_unlock(&local_thread_mutex);
+out:
+ list_for_each_entry_safe(uls, safe, &update_vgs, list) {
+ list_del(&uls->list);
+ free(uls);
+ }
+
+ if (!mutex_unlocked)
+ pthread_mutex_unlock(&lvmetad_mutex);
+
+ return 0;
+}
+
+/*
+ * TODO: we don't use the reply here, so it would be more
+ * efficient to send without waiting for a reply.
+ */
+
+static void invalidate_lvmetad_vg(struct lockspace *ls)
+{
+ daemon_reply reply;
+
+ pthread_mutex_lock(&lvmetad_mutex);
+ reply = daemon_send_simple(lvmetad_handle, "set_vg_info",
+ "token = %s", "skip",
+ "uuid = %s", ls->vg_uuid,
+ "version = %d", 0,
+ NULL);
+ pthread_mutex_unlock(&lvmetad_mutex);
+ daemon_reply_destroy(reply);
+}
+
+/* TODO: handle lvm_<vg_name> longer than max lockspace name? Use vg uuid? */
+
+static int vg_ls_name(const char *vg_name, char *ls_name)
+{
+ if (strlen(vg_name) + 4 > MAX_NAME) {
+ log_error("vg name too long %s", vg_name);
+ return -1;
+ }
+
+ snprintf(ls_name, MAX_NAME, "lvm_%s", vg_name);
+ return 0;
+}
+
+/* TODO: add mutex for gl_lsname_ ? */
+
+static int gl_ls_name(char *ls_name)
+{
+ if (gl_use_dlm)
+ memcpy(ls_name, gl_lsname_dlm, MAX_NAME);
+ else if (gl_use_sanlock)
+ memcpy(ls_name, gl_lsname_sanlock, MAX_NAME);
+ else {
+ log_error("gl_ls_name: global lockspace type unknown");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * When this function returns an error, the caller needs to deal
+ * with act (in the cases where act exists).
+ */
+
+static int add_lockspace_thread(const char *ls_name,
+ const char *vg_name,
+ const char *vg_uuid,
+ int lm_type, const char *vg_args,
+ struct action *act)
+{
+ struct lockspace *ls, *ls2;
+ struct resource *r;
+ uint32_t version = 0;
+ int rv;
+
+ if (act)
+ version = act->version;
+
+ log_debug("add_lockspace_thread %s %s version %u",
+ lm_str(lm_type), ls_name, version);
+
+ ls = malloc(sizeof(struct lockspace));
+ if (!ls)
+ return -ENOMEM;
+
+ memset(ls, 0, sizeof(struct lockspace));
+
+ strncpy(ls->name, ls_name, MAX_NAME);
+ ls->lm_type = lm_type;
+
+ if (act)
+ ls->start_client_id = act->client_id;
+
+ if (vg_uuid)
+ strncpy(ls->vg_uuid, vg_uuid, 64);
+
+ if (vg_name)
+ strncpy(ls->vg_name, vg_name, MAX_NAME);
+
+ if (vg_args)
+ strncpy(ls->vg_args, vg_args, MAX_ARGS);
+
+ if (act)
+ ls->host_id = act->host_id;
+
+ pthread_mutex_init(&ls->mutex, NULL);
+ pthread_cond_init(&ls->cond, NULL);
+ INIT_LIST_HEAD(&ls->actions);
+ INIT_LIST_HEAD(&ls->resources);
+
+ r = alloc_resource();
+ if (!r) {
+ free(ls);
+ return -ENOMEM;
+ }
+
+ r->type = LD_RT_VG;
+ r->mode = LD_LK_UN;
+ r->version = version;
+ strncpy(r->name, R_NAME_VG, MAX_NAME);
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->actions);
+ list_add_tail(&r->list, &ls->resources);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls2 = find_lockspace_name(ls->name);
+ if (ls2) {
+ if (ls2->thread_stop)
+ rv = -EAGAIN;
+ else
+ rv = -EEXIST;
+ pthread_mutex_unlock(&lockspaces_mutex);
+ free_resource(r);
+ free(ls);
+ return rv;
+ }
+
+ /*
+ * act will be null when this lockspace is added automatically/internally
+ * and not by an explicit client action that wants a result.
+ */
+ if (act)
+ list_add(&act->list, &ls->actions);
+
+ clear_lockspace_inactive(ls->name);
+
+ list_add_tail(&ls->list, &lockspaces);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ rv = pthread_create(&ls->thread, NULL, lockspace_thread_main, ls);
+ if (rv < 0) {
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_del(&ls->list);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ free_resource(r);
+ free(ls);
+ return rv;
+ }
+
+ return 0;
+}
+
+/*
+ * There is no add_sanlock_global_lockspace or
+ * rem_sanlock_global_lockspace because with sanlock,
+ * the global lockspace is one of the vg lockspaces.
+ */
+
+static int add_dlm_global_lockspace(struct action *act)
+{
+ int rv;
+
+ if (gl_running_dlm)
+ return -EEXIST;
+
+ gl_running_dlm = 1;
+
+ /* Keep track of whether we automatically added
+ the global ls, so we know to automatically
+ remove it. */
+
+ if (act)
+ gl_auto_dlm = 0;
+ else
+ gl_auto_dlm = 1;
+
+ /*
+ * There's a short period after which a previous gl lockspace thread
+ * has set gl_running_dlm = 0, but before its ls struct has been
+ * deleted, during which this add_lockspace_thread() can fail with
+ * -EAGAIN.
+ */
+
+ rv = add_lockspace_thread(gl_lsname_dlm, NULL, NULL, LD_LM_DLM, NULL, act);
+
+ if (rv < 0) {
+ log_error("add_dlm_global_lockspace add_lockspace_thread %d", rv);
+ gl_running_dlm = 0;
+ gl_auto_dlm = 0;
+ }
+
+ return rv;
+}
+
+/*
+ * If dlm gl lockspace is the only one left, then stop it.
+ * This is not used for an explicit rem_lockspace action from
+ * the client, only for auto remove.
+ */
+
+static int rem_dlm_global_lockspace(void)
+{
+ struct lockspace *ls, *ls_gl = NULL;
+ int others = 0;
+ int rv = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (!strcmp(ls->name, gl_lsname_dlm)) {
+ ls_gl = ls;
+ continue;
+ }
+ if (ls->thread_stop)
+ continue;
+ others++;
+ break;
+ }
+
+ if (others) {
+ rv = -EAGAIN;
+ goto out;
+ }
+
+ if (!ls_gl) {
+ rv = -ENOENT;
+ goto out;
+ }
+
+ ls = ls_gl;
+ pthread_mutex_lock(&ls->mutex);
+ ls->thread_stop = 1;
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ rv = 0;
+out:
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return rv;
+}
+
+/*
+ * When the first dlm lockspace is added for a vg,
+ * automatically add a separate dlm lockspace for the
+ * global lock if it hasn't been done explicitly.
+ * This is to make the dlm global lockspace work similarly to
+ * the sanlock global lockspace, which is "automatic" by
+ * nature of being one of the vg lockspaces.
+ *
+ * For sanlock, a separate lockspace is not used for
+ * the global lock, but the gl lock lives in a vg
+ * lockspace, (although it's recommended to create a
+ * special vg dedicated to holding the gl).
+ *
+ * N.B. for dlm, if this is an add+WAIT action for a vg
+ * lockspace, and this triggered the automatic addition
+ * of the global lockspace, then the action may complete
+ * for the vg ls add, while the gl ls add is still in
+ * progress. If the caller wants to ensure that the
+ * gl ls add is complete, they should explicitly add+WAIT
+ * the gl ls.
+ *
+ * If this function returns and error, the caller
+ * will queue the act with that error for the client.
+ */
+
+static int add_lockspace(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ int rv;
+
+ if (local_thread_only) {
+ log_error("add_lockspace not allowed local_thread_only");
+ return -EINVAL;
+ }
+
+ /*
+ * This should not generally happen, but does happen when a vg
+ * lock_type is changed from none to sanlock.
+ */
+ pthread_mutex_lock(&local_thread_mutex);
+ ls = find_local_vg(act->vg_name, NULL);
+ if (ls) {
+ log_error("add_lockspace vg %s remove matching local_vg", act->vg_name);
+ list_del(&ls->list);
+ free_ls_resources(ls);
+ free(ls);
+ }
+ pthread_mutex_unlock(&local_thread_mutex);
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ if (act->rt == LD_RT_GL) {
+ if (gl_use_dlm) {
+ rv = add_dlm_global_lockspace(act);
+ return rv;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ if (act->rt == LD_RT_VG) {
+ if (gl_use_dlm) {
+ rv = add_dlm_global_lockspace(NULL);
+ if (rv < 0 && rv != -EEXIST)
+ return rv;
+ }
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ rv = add_lockspace_thread(ls_name, act->vg_name, act->vg_uuid,
+ act->lm_type, act->vg_args,
+ act);
+
+ if (rv)
+ log_error("add_lockspace %s add_lockspace_thread %d", ls_name, rv);
+ return rv;
+ }
+
+ log_error("add_lockspace bad type %d", act->rt);
+ return -1;
+}
+
+/*
+ * vgchange --lock-stop vgname will lock the vg ex, then send a stop,
+ * so we exect to find the ex vg lock held here, and will automatically
+ * unlock it when stopping.
+ *
+ * TODO: if the vg contains the gl lock, should we also automatically
+ * unlock that when other lockspaces exist? Or return an error about
+ * stopping the vg with the gl lock while other lockspaces are running,
+ * and require a force to do that?
+ */
+
+static int rem_lockspace(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ int force = act->flags & LD_AF_FORCE;
+ int rt = act->rt;
+
+ if (act->rt == LD_RT_GL && act->lm_type != LD_LM_DLM)
+ return -EINVAL;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ if (act->rt == LD_RT_GL)
+ gl_ls_name(ls_name);
+ else
+ vg_ls_name(act->vg_name, ls_name);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls = find_lockspace_name(ls_name);
+ if (!ls) {
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -ENOLS;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -ESTALE;
+ }
+
+ if (!force && for_each_lock(ls, LOCKS_EXIST_LV)) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ return -EBUSY;
+ }
+ ls->thread_work = 1;
+ ls->thread_stop = 1;
+ if (act)
+ list_add_tail(&act->list, &ls->actions);
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /*
+ * If the dlm global lockspace was automatically added when
+ * the first dlm vg lockspace was added, then reverse that
+ * by automatically removing the dlm global lockspace when
+ * the last dlm vg lockspace is removed.
+ */
+
+ if (rt == LD_RT_VG && gl_use_dlm && gl_auto_dlm)
+ rem_dlm_global_lockspace();
+
+ return 0;
+}
+
+/*
+ * count how many lockspaces started by this client are still starting;
+ * the client will use this to wait for all its start operations to finish
+ * (START_WAIT).
+ */
+
+static int count_lockspace_starting(uint32_t client_id)
+{
+ struct lockspace *ls;
+ int count = 0;
+ int done = 0;
+ int fail = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->start_client_id != client_id)
+ continue;
+
+ if (!ls->create_done && !ls->create_fail) {
+ count++;
+ continue;
+ }
+
+ if (ls->create_done)
+ done++;
+ if (ls->create_fail)
+ fail++;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ log_debug("count_lockspace_starting client %u count %d done %d fail %d",
+ client_id, count, done, fail);
+
+ return count;
+}
+
+/* lockspaces_mutex is held */
+static struct lockspace *find_lockspace_inactive(char *ls_name)
+{
+ struct lockspace *ls;
+
+ list_for_each_entry(ls, &lockspaces_inactive, list) {
+ if (!strcmp(ls->name, ls_name))
+ return ls;
+ }
+
+ return NULL;
+}
+
+/* lockspaces_mutex is held */
+static void clear_lockspace_inactive(char *ls_name)
+{
+ struct lockspace *ls;
+
+ ls = find_lockspace_inactive(ls_name);
+ if (ls) {
+ list_del(&ls->list);
+ free(ls);
+ }
+}
+
+static void free_lockspaces_inactive(void)
+{
+ struct lockspace *ls, *safe;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry_safe(ls, safe, &lockspaces_inactive, list) {
+ list_del(&ls->list);
+ free(ls);
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+/*
+ * Loop through all lockspaces, and:
+ * - if do_stop is set, stop any that are not stopped
+ * - if do_free is set, join any that are done stopping (and free ls)
+ *
+ * do_stop will not stop an ls with lv locks unless force is set.
+ *
+ * This function does not block or wait for anything.
+ *
+ * do_stop (no do_free):
+ * returns count of lockspaces that need stop (have locks and no force)
+ *
+ * do_free (no do_stop):
+ * returns count of lockspaces that are stopped and need freeing
+ *
+ * do_stop and do_free:
+ * returns sum of the previous two
+ */
+
+static int for_each_lockspace(int do_stop, int do_free, int do_force)
+{
+ struct lockspace *ls, *safe;
+ int need_stop = 0;
+ int need_free = 0;
+ int stop_count = 0;
+ int free_count = 0;
+ int done;
+ int stop;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+
+ if (do_stop) {
+ list_for_each_entry(ls, &lockspaces, list) {
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ continue;
+ }
+
+ if (!do_force && for_each_lock(ls, LOCKS_EXIST_ANY)) {
+ need_stop++;
+ } else {
+ ls->thread_work = 1;
+ ls->thread_stop = 1;
+ pthread_cond_signal(&ls->cond);
+ stop_count++;
+ }
+ pthread_mutex_unlock(&ls->mutex);
+ }
+ }
+
+ if (do_free) {
+ list_for_each_entry_safe(ls, safe, &lockspaces, list) {
+
+ pthread_mutex_lock(&ls->mutex);
+ done = ls->thread_done;
+ stop = ls->thread_stop;
+ pthread_mutex_unlock(&ls->mutex);
+
+ /* This ls has locks and force is not set. */
+ if (!stop)
+ continue;
+
+ /*
+ * Once thread_done is set, we know that the lockspace_thread
+ * will not be using/touching the ls struct. Any other
+ * thread touches the ls struct under lockspaces_mutex.
+ */
+ if (done) {
+ pthread_join(ls->thread, NULL);
+ list_del(&ls->list);
+
+ /* TODO: remove this if unneeded */
+ if (!list_empty(&ls->actions))
+ log_error("TODO: free ls actions");
+
+ free_ls_resources(ls);
+ list_add(&ls->list, &lockspaces_inactive);
+ free_count++;
+ } else {
+ need_free++;
+ }
+ }
+ }
+
+ if (list_empty(&lockspaces)) {
+ if (!gl_type_static) {
+ gl_use_dlm = 0;
+ gl_use_sanlock = 0;
+ }
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (stop_count || free_count || need_stop || need_free) {
+ log_debug("for_each_lockspace do_stop %d do_free %d "
+ "stop_count %d free_count %d need_stop %d need_free %d",
+ do_stop, do_free, stop_count, free_count, need_stop, need_free);
+ }
+
+ return need_stop + need_free;
+}
+
+/*
+ * This is only called when the daemon is exiting so the sleep/retry
+ * loop doesn't have any adverse impact.
+ */
+
+static void for_each_lockspace_retry(int do_stop, int do_free, int do_force)
+{
+ int count;
+
+ while (1) {
+ count = for_each_lockspace(do_stop, do_free, do_force);
+ if (!count)
+ break;
+
+ log_debug("for_each_lockspace_retry remaining %d", count);
+ sleep(1);
+ }
+}
+
+static int work_init_vg(struct action *act)
+{
+ char ls_name[MAX_NAME+1];
+ int rv = 0;
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ if (act->lm_type == LD_LM_SANLOCK)
+ rv = lm_init_vg_sanlock(ls_name, act->vg_name, act->flags, act->vg_args);
+ else if (act->lm_type == LD_LM_DLM)
+ rv = lm_init_vg_dlm(ls_name, act->vg_name, act->flags, act->vg_args);
+ else
+ rv = -EINVAL;
+
+ return rv;
+}
+
+static void work_test_gl(void)
+{
+ struct lockspace *ls;
+ int is_enabled = 0;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ if (ls->lm_type != LD_LM_SANLOCK)
+ continue;
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->create_done && !ls->thread_stop) {
+ is_enabled = lm_gl_is_enabled(ls);
+ if (is_enabled) {
+ log_debug("S %s worker found gl_is_enabled", ls->name);
+ strncpy(gl_lsname_sanlock, ls->name, MAX_NAME);
+ }
+ }
+ pthread_mutex_unlock(&ls->mutex);
+
+ if (is_enabled)
+ break;
+ }
+
+ if (!is_enabled)
+ log_debug("worker found no gl_is_enabled");
+ pthread_mutex_unlock(&lockspaces_mutex);
+}
+
+static int work_init_lv(struct action *act)
+{
+ struct lockspace *ls;
+ char ls_name[MAX_NAME+1];
+ char vg_args[MAX_ARGS];
+ char lv_args[MAX_ARGS];
+ int lm_type = 0;
+ int rv = 0;
+
+ memset(ls_name, 0, sizeof(ls_name));
+ memset(vg_args, 0, MAX_ARGS);
+ memset(lv_args, 0, MAX_ARGS);
+
+ vg_ls_name(act->vg_name, ls_name);
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ ls = find_lockspace_name(ls_name);
+ if (ls) {
+ lm_type = ls->lm_type;
+ memcpy(vg_args, ls->vg_args, MAX_ARGS);
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (!ls) {
+ lm_type = act->lm_type;
+ memcpy(vg_args, act->vg_args, MAX_ARGS);
+ }
+
+ if (act->lm_type != lm_type) {
+ log_error("init_lv ls_name %s wrong lm_type %d %d",
+ ls_name, act->lm_type, lm_type);
+ return -EINVAL;
+ }
+
+ if (lm_type == LD_LM_SANLOCK) {
+ rv = lm_init_lv_sanlock(ls_name, act->vg_name, act->lv_name,
+ vg_args, lv_args);
+
+ memcpy(act->lv_args, lv_args, MAX_ARGS);
+ return rv;
+
+ } else if (act->lm_type == LD_LM_DLM) {
+ return 0;
+ } else {
+ log_error("init_lv ls_name %s bad lm_type %d", ls_name, act->lm_type);
+ return -EINVAL;
+ }
+}
+
+/*
+ * When an action is queued for the worker_thread, it is processed right away.
+ * After processing, some actions need to be retried again in a short while.
+ * These actions are put on the delayed_list, and the worker_thread will
+ * process these delayed actions again in SHORT_DELAY_PERIOD.
+ */
+
+#define SHORT_DELAY_PERIOD 2
+#define LONG_DELAY_PERIOD 60
+
+static void *worker_thread_main(void *arg_in)
+{
+ struct list_head delayed_list;
+ struct timespec ts;
+ struct action *act, *safe;
+ uint64_t last_delayed_time = 0;
+ int delayed_update_local = 0;
+ int delay_sec = LONG_DELAY_PERIOD;
+ int rv;
+
+ INIT_LIST_HEAD(&delayed_list);
+
+ while (1) {
+ pthread_mutex_lock(&worker_mutex);
+ clock_gettime(CLOCK_REALTIME, &ts);
+ ts.tv_sec += delay_sec;
+ rv = 0;
+ act = NULL;
+
+ while (list_empty(&worker_list) && !worker_stop && !worker_wake && !rv) {
+ rv = pthread_cond_timedwait(&worker_cond, &worker_mutex, &ts);
+ }
+ worker_wake = 0;
+
+ if (worker_stop) {
+ pthread_mutex_unlock(&worker_mutex);
+ goto out;
+ }
+
+ if (!list_empty(&worker_list)) {
+ act = list_first_entry(&worker_list, struct action, list);
+ list_del(&act->list);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+
+ /*
+ * Do new work actions before processing delayed work actions.
+ */
+
+ if (!act)
+ goto delayed_work;
+
+ if ((act->op == LD_OP_LOCK) && (act->flags & LD_AF_SEARCH_LS)) {
+ /*
+ * worker_thread used as a helper to search existing
+ * sanlock vgs for an enabled gl.
+ */
+ log_debug("work search for gl");
+ work_test_gl();
+
+ /* try again to find a gl lockspace for this act */
+ rv = add_lock_action(act);
+ if (rv < 0) {
+ act->result = rv;
+ add_client_result(act);
+ }
+
+ } else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_VG)) {
+ log_debug("work init_vg %s", act->vg_name);
+ act->result = work_init_vg(act);
+ add_client_result(act);
+
+ } else if ((act->op == LD_OP_INIT) && (act->rt == LD_RT_LV)) {
+ log_debug("work init_lv %s/%s", act->vg_name, act->lv_name);
+ act->result = work_init_lv(act);
+ add_client_result(act);
+
+ } else if (act->op == LD_OP_UPDATE_LOCAL) {
+ if (delayed_update_local) {
+ log_debug("work update_local ignore repeat");
+ free_action(act);
+ } else {
+ log_debug("work update_local");
+ rv = work_update_local_vgs();
+ if (rv == -EAGAIN) {
+ delayed_update_local = 1;
+ list_add(&act->list, &delayed_list);
+ } else {
+ free_action(act);
+ }
+ }
+
+ } else if (act->op == LD_OP_START_WAIT) {
+ act->result = count_lockspace_starting(act->client_id);
+ if (!act->result)
+ add_client_result(act);
+ else
+ list_add(&act->list, &delayed_list);
+
+ } else if (act->op == LD_OP_STOP_ALL) {
+ act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
+ if (!act->result || !(act->flags & LD_AF_WAIT))
+ add_client_result(act);
+ else
+ list_add(&act->list, &delayed_list);
+
+ } else {
+ log_error("work unknown op %d", act->op);
+ act->result = -EINVAL;
+ add_client_result(act);
+ }
+
+ delayed_work:
+ /*
+ * We may want to track retry times per action so that
+ * we can delay different actions by different amounts.
+ */
+
+ if (monotime() - last_delayed_time < SHORT_DELAY_PERIOD) {
+ delay_sec = 1;
+ continue;
+ }
+ last_delayed_time = monotime();
+
+ list_for_each_entry_safe(act, safe, &delayed_list, list) {
+ if (act->op == LD_OP_START_WAIT) {
+ log_debug("work delayed start_wait for client %u", act->client_id);
+ act->result = count_lockspace_starting(act->client_id);
+ if (!act->result) {
+ list_del(&act->list);
+ add_client_result(act);
+ }
+
+ } else if (act->op == LD_OP_UPDATE_LOCAL) {
+ log_debug("work delayed update_local");
+ rv = work_update_local_vgs();
+ if (rv == -EAGAIN)
+ continue;
+ list_del(&act->list);
+ free_action(act);
+ delayed_update_local = 0;
+
+ } else if (act->op == LD_OP_STOP_ALL) {
+ log_debug("work delayed stop_all");
+ act->result = for_each_lockspace(DO_STOP, DO_FREE, (act->flags & LD_AF_FORCE) ? DO_FORCE : NO_FORCE);
+ if (!act->result) {
+ list_del(&act->list);
+ act->result = 0;
+ add_client_result(act);
+ }
+ }
+ }
+
+ /*
+ * This is not explicitly queued work, and not delayed work,
+ * but lockspace thread cleanup that's needed when a
+ * lockspace has been stopped/removed or failed to start.
+ */
+
+ for_each_lockspace(NO_STOP, DO_FREE, NO_FORCE);
+
+ if (list_empty(&delayed_list))
+ delay_sec = LONG_DELAY_PERIOD;
+ else
+ delay_sec = 1;
+ }
+out:
+ list_for_each_entry_safe(act, safe, &delayed_list, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+
+ pthread_mutex_lock(&worker_mutex);
+ list_for_each_entry_safe(act, safe, &worker_list, list) {
+ list_del(&act->list);
+ free_action(act);
+ }
+ pthread_mutex_unlock(&worker_mutex);
+ return NULL;
+}
+
+static int setup_worker_thread(void)
+{
+ int rv;
+
+ INIT_LIST_HEAD(&worker_list);
+
+ pthread_mutex_init(&worker_mutex, NULL);
+ pthread_cond_init(&worker_cond, NULL);
+
+ rv = pthread_create(&worker_thread, NULL, worker_thread_main, NULL);
+ if (rv)
+ return -1;
+ return 0;
+}
+
+static void close_worker_thread(void)
+{
+ pthread_mutex_lock(&worker_mutex);
+ worker_stop = 1;
+ pthread_cond_signal(&worker_cond);
+ pthread_mutex_unlock(&worker_mutex);
+ pthread_join(worker_thread, NULL);
+}
+
+/* client_mutex is locked */
+static struct client *find_client_work(void)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->recv || cl->dead)
+ return cl;
+ }
+ return NULL;
+}
+
+/* client_mutex is locked */
+static struct client *find_client_id(uint32_t id)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->id == id)
+ return cl;
+ }
+ return NULL;
+}
+
+/* client_mutex is locked */
+static struct client *find_client_pi(int pi)
+{
+ struct client *cl;
+
+ list_for_each_entry(cl, &client_list, list) {
+ if (cl->pi == pi)
+ return cl;
+ }
+ return NULL;
+}
+
+/*
+ * wake up poll() because we have added an fd
+ * back into pollfd and poll() needs to be restarted
+ * to recognize it.
+ */
+static void restart_poll(void)
+{
+ write(restart_fds[1], "w", 1);
+}
+
+/* poll will take requests from client again, cl->mutex must be held */
+static void client_resume(struct client *cl)
+{
+ if (cl->dead)
+ return;
+
+ if (!cl->poll_ignore || cl->fd == -1 || cl->pi == -1) {
+ /* shouldn't happen */
+ log_error("client_resume %d bad state ig %d fd %d pi %d",
+ cl->id, cl->poll_ignore, cl->fd, cl->pi);
+ return;
+ }
+
+ pthread_mutex_lock(&pollfd_mutex);
+ if (pollfd[cl->pi].fd != POLL_FD_IGNORE) {
+ log_error("client_resume %d pi %d fd %d not IGNORE",
+ cl->id, cl->pi, cl->fd);
+ }
+ pollfd[cl->pi].fd = cl->fd;
+ pollfd[cl->pi].events = POLLIN;
+ pthread_mutex_unlock(&pollfd_mutex);
+
+ restart_poll();
+}
+
+/* called from client_thread, cl->mutex is held */
+static void client_send_result(struct client *cl, struct action *act)
+{
+ response res;
+ char result_flags[128];
+
+ if (cl->dead) {
+ log_debug("client send %d skip dead", cl->id);
+ return;
+ }
+
+ memset(result_flags, 0, sizeof(result_flags));
+
+ buffer_init(&res.buffer);
+
+ /*
+ * EUNATCH is returned when the global lock existed,
+ * but had been disabled when we tried to lock it,
+ * so we removed it, and no longer have a gl to lock.
+ */
+
+ if (act->result == -EUNATCH)
+ act->result = -ENOLS;
+
+ /*
+ * init_vg with dlm|sanlock returns vg_args
+ * init_lv with sanlock returns lv_args
+ */
+
+ if (act->result == -ENOLS) {
+ /*
+ * The lockspace could not be found, in which case
+ * the caller may want to know if any lockspaces exist
+ * or if lockspaces exist, but not one with the global lock.
+ * Given this detail, it may be able to procede without
+ * the lock.
+ */
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (list_empty(&lockspaces))
+ strcat(result_flags, "NO_LOCKSPACES,");
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (gl_use_sanlock && !gl_lsname_sanlock[0])
+ strcat(result_flags, "NO_GL_LS,");
+ else if (gl_use_dlm && !gl_lsname_dlm[0])
+ strcat(result_flags, "NO_GL_LS,");
+ else
+ strcat(result_flags, "NO_GL_LS,");
+ }
+
+ if (act->flags & LD_AF_LOCAL_LS)
+ strcat(result_flags, "LOCAL_LS,");
+
+ if (act->flags & LD_AF_DUP_GL_LS)
+ strcat(result_flags, "DUP_GL_LS,");
+
+ if (act->flags & LD_AF_INACTIVE_LS)
+ strcat(result_flags, "INACTIVE_LS,");
+
+ if (act->flags & LD_AF_ADD_LS_ERROR)
+ strcat(result_flags, "ADD_LS_ERROR,");
+
+ if (act->op == LD_OP_INIT) {
+ /*
+ * init is a special case where lock args need
+ * to be passed back to the client.
+ */
+ const char *vg_args = "none";
+ const char *lv_args = "none";
+
+ if (act->vg_args[0])
+ vg_args = act->vg_args;
+
+ if (act->lv_args[0])
+ lv_args = act->lv_args;
+
+ log_debug("send %s[%d.%u] %s %s rv %d vg_args %s lv_args %s",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt),
+ act->result, vg_args ? vg_args : "", lv_args ? lv_args : "");
+
+ res = daemon_reply_simple("OK",
+ "op = %d", act->op,
+ "op_result = %d", act->result,
+ "lm_result = %d", act->lm_rv,
+ "vg_lock_args = %s", vg_args,
+ "lv_lock_args = %s", lv_args,
+ "result_flags = %s", result_flags[0] ? result_flags : "none",
+ NULL);
+ } else {
+ /*
+ * A normal reply.
+ */
+
+ log_debug("send %s[%d.%u] %s %s rv %d %s %s",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt),
+ act->result, (act->result == -ENOLS) ? "ENOLS" : "", result_flags);
+
+ res = daemon_reply_simple("OK",
+ "op = %d", act->op,
+ "lock_type = %s", lm_str(act->lm_type),
+ "op_result = %d", act->result,
+ "lm_result = %d", act->lm_rv,
+ "result_flags = %s", result_flags[0] ? result_flags : "none",
+ NULL);
+ }
+
+ buffer_write(cl->fd, &res.buffer);
+ buffer_destroy(&res.buffer);
+
+ client_resume(cl);
+}
+
+/*
+ * TODO: optimize common case where a client has only locked
+ * a couple of lockspaces. Keep two lockspace ids in cl->ls_ids
+ * and queue OP_CLOSE in only these lockspaces. If more than two
+ * are used, then clear ls_ids and queue for all.
+ */
+
+/* called from client_thread */
+static void client_purge(struct client *cl)
+{
+ struct lockspace *ls;
+ struct action *act;
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+ act = alloc_action();
+ if (!act)
+ continue;
+
+ act->op = LD_OP_CLOSE;
+ act->client_id = cl->id;
+ act->flags |= LD_AF_CLIENT_DEAD;
+
+ pthread_mutex_lock(&ls->mutex);
+ if (!ls->thread_stop) {
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ } else {
+ free_action(act);
+ }
+ pthread_mutex_unlock(&ls->mutex);
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (local_thread_also) {
+ act = alloc_action();
+ if (!act)
+ return;
+
+ act->op = LD_OP_CLOSE;
+ act->client_id = cl->id;
+ act->flags |= LD_AF_CLIENT_DEAD;
+
+ pthread_mutex_lock(&local_thread_mutex);
+ list_add_tail(&act->list, &local_thread_actions);
+ local_thread_work = 1;
+ pthread_cond_signal(&local_thread_cond);
+ pthread_mutex_unlock(&local_thread_mutex);
+ }
+}
+
+static int add_lock_action(struct action *act)
+{
+ struct lockspace *ls = NULL;
+ char ls_name[MAX_NAME+1];
+
+ memset(ls_name, 0, sizeof(ls_name));
+
+ /* Determine which lockspace this action is for, and set ls_name. */
+
+ if (act->rt == LD_RT_GL && gl_use_sanlock &&
+ (act->op == LD_OP_ENABLE || act->op == LD_OP_DISABLE))
+ vg_ls_name(act->vg_name, ls_name);
+ else if (act->rt == LD_RT_GL)
+ gl_ls_name(ls_name);
+ else
+ vg_ls_name(act->vg_name, ls_name);
+
+ retry:
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (ls_name[0])
+ ls = find_lockspace_name(ls_name);
+ if (!ls) {
+ int ls_inactive = 0;
+ int ls_create_fail = 0;
+
+ ls = find_lockspace_inactive(ls_name);
+ if (ls) {
+ ls_inactive = 1;
+ ls_create_fail = ls->create_fail;
+ ls = NULL;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (act->op == LD_OP_UPDATE && act->rt == LD_RT_VG) {
+ log_debug("lockspace not found ignored for vg update");
+ return -ENOLS;
+
+ } else if (act->flags & LD_AF_SEARCH_LS) {
+ /* fail if we've already tried searching for the ls */
+ log_error("lockspace search repeated %s", ls_name);
+ return -ENOLS;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_sanlock) {
+ /* gl may have been enabled in an existing vg */
+ log_debug("gl lockspace not found check sanlock vgs");
+ act->flags |= LD_AF_SEARCH_LS;
+ add_work_action(act);
+ return 0;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_GL && gl_use_dlm) {
+ log_debug("gl lockspace not found add dlm global");
+ act->flags |= LD_AF_SEARCH_LS;
+ act->flags |= LD_AF_WAIT_STARTING;
+ add_dlm_global_lockspace(NULL);
+ gl_ls_name(ls_name);
+ goto retry;
+
+ } else if (act->op == LD_OP_LOCK && act->mode == LD_LK_UN) {
+ log_debug("lockspace not found ignored for unlock");
+ return -ENOLS;
+
+ } else if (act->op == LD_OP_LOCK && act->rt == LD_RT_VG && ls_inactive) {
+ /* ls has been stopped or previously failed to start */
+ log_debug("lockspace inactive create_fail %d %s",
+ ls_create_fail, ls_name);
+ act->flags |= LD_AF_INACTIVE_LS;
+ if (ls_create_fail)
+ act->flags |= LD_AF_ADD_LS_ERROR;
+ return -ENOLS;
+
+ } else {
+ log_error("lockspace not found %s", ls_name);
+ return -ENOLS;
+ }
+ }
+
+ if (act->lm_type == LD_LM_NONE) {
+ /* return to the command the type we are using */
+ act->lm_type = ls->lm_type;
+ } else if (act->lm_type != ls->lm_type) {
+ /* should not happen */
+ log_error("S %s add_lock_action bad lm_type %d ls %d",
+ ls_name, act->lm_type, ls->lm_type);
+ return -EINVAL;
+ }
+
+ pthread_mutex_lock(&ls->mutex);
+ if (ls->thread_stop) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_error("lockspace is stopping %s", ls_name);
+ return -ESTALE;
+ }
+
+ if (!ls->create_fail && !ls->create_done && !(act->flags & LD_AF_WAIT_STARTING)) {
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+ log_debug("lockspace is starting %s", ls_name);
+ return -ESTARTING;
+ }
+
+ list_add_tail(&act->list, &ls->actions);
+ ls->thread_work = 1;
+ pthread_cond_signal(&ls->cond);
+ pthread_mutex_unlock(&ls->mutex);
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ /* lockspace_thread_main / res_process take it from here */
+
+ return 0;
+}
+
+static int add_local_lock_action(struct lockspace *ls, struct action *act)
+{
+ act->flags |= LD_AF_LOCAL_LS;
+ pthread_mutex_lock(&local_thread_mutex);
+ if (!ls && local_thread_only)
+ list_add_tail(&act->list, &local_thread_gls->actions);
+ else if (ls)
+ list_add_tail(&act->list, &ls->actions);
+ local_thread_work = 1;
+ pthread_cond_signal(&local_thread_cond);
+ pthread_mutex_unlock(&local_thread_mutex);
+ return 0;
+}
+
+static int str_to_op_rt(const char *req_name, int *op, int *rt)
+{
+ if (!req_name)
+ goto out;
+
+ if (!strcmp(req_name, "hello")) {
+ *op = LD_OP_HELLO;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "quit")) {
+ *op = LD_OP_QUIT;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "info")) {
+ *op = LD_OP_DUMP_INFO;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "dump")) {
+ *op = LD_OP_DUMP_LOG;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "init_vg")) {
+ *op = LD_OP_INIT;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "init_lv")) {
+ *op = LD_OP_INIT;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "free_vg")) {
+ *op = LD_OP_FREE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "free_lv")) {
+ *op = LD_OP_FREE;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "start_vg")) {
+ *op = LD_OP_START;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "stop_vg")) {
+ *op = LD_OP_STOP;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "start_wait")) {
+ *op = LD_OP_START_WAIT;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "stop_all")) {
+ *op = LD_OP_STOP_ALL;
+ *rt = 0;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_gl")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_vg")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "lock_lv")) {
+ *op = LD_OP_LOCK;
+ *rt = LD_RT_LV;
+ return 0;
+ }
+ if (!strcmp(req_name, "vg_update")) {
+ *op = LD_OP_UPDATE;
+ *rt = LD_RT_VG;
+ return 0;
+ }
+ if (!strcmp(req_name, "enable_gl")) {
+ *op = LD_OP_ENABLE;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "disable_gl")) {
+ *op = LD_OP_DISABLE;
+ *rt = LD_RT_GL;
+ return 0;
+ }
+ if (!strcmp(req_name, "add_local")) {
+ *op = LD_OP_ADD_LOCAL;
+ return 0;
+ }
+ if (!strcmp(req_name, "rem_local")) {
+ *op = LD_OP_REM_LOCAL;
+ return 0;
+ }
+ if (!strcmp(req_name, "update_local")) {
+ *op = LD_OP_UPDATE_LOCAL;
+ return 0;
+ }
+out:
+ return -1;
+}
+
+static int str_to_mode(const char *str)
+{
+ if (!str)
+ goto out;
+ if (!strcmp(str, "un"))
+ return LD_LK_UN;
+ if (!strcmp(str, "nl"))
+ return LD_LK_NL;
+ if (!strcmp(str, "sh"))
+ return LD_LK_SH;
+ if (!strcmp(str, "ex"))
+ return LD_LK_EX;
+out:
+ return LD_LK_IV;
+}
+
+static int str_to_lm(const char *str)
+{
+ if (!str || !strcmp(str, "none"))
+ return LD_LM_NONE;
+ if (!strcmp(str, "sanlock"))
+ return LD_LM_SANLOCK;
+ if (!strcmp(str, "dlm"))
+ return LD_LM_DLM;
+ return -2;
+}
+
+static uint32_t str_to_opts(const char *str)
+{
+ uint32_t flags = 0;
+
+ if (!str)
+ goto out;
+ if (strstr(str, "persistent"))
+ flags |= LD_AF_PERSISTENT;
+ if (strstr(str, "unlock_cancel"))
+ flags |= LD_AF_UNLOCK_CANCEL;
+ if (strstr(str, "next_version"))
+ flags |= LD_AF_NEXT_VERSION;
+ if (strstr(str, "wait"))
+ flags |= LD_AF_WAIT;
+ if (strstr(str, "force"))
+ flags |= LD_AF_FORCE;
+ if (strstr(str, "ex_disable"))
+ flags |= LD_AF_EX_DISABLE;
+ if (strstr(str, "enable"))
+ flags |= LD_AF_ENABLE;
+ if (strstr(str, "disable"))
+ flags |= LD_AF_DISABLE;
+ if (strstr(str, "update_names"))
+ flags |= LD_AF_UPDATE_NAMES_VERSION;
+out:
+ return flags;
+}
+
+static int is_other_sysid(struct lockspace *lls)
+{
+ if (!our_system_id || !lls->vg_sysid[0])
+ return 0;
+ if (!strcmp(lls->vg_sysid, our_system_id))
+ return 0;
+ return 1;
+}
+
+
+/*
+ * dump info
+ * client_list: each client struct
+ * local_vgs: each lockspace struct (representing a local vg)
+ * lockspaces: each lockspace struct
+ * lockspace actions: each action struct
+ * lockspace resources: each resource struct
+ * lockspace resource actions: each action struct
+ * lockspace resource locks: each lock struct
+ */
+
+static int setup_dump_socket(void)
+{
+ int s;
+
+ s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (s < 0)
+ return s;
+
+ memset(&dump_addr, 0, sizeof(dump_addr));
+ dump_addr.sun_family = AF_LOCAL;
+ strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+ dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+ return s;
+}
+
+static int send_dump_buf(int fd, int dump_len)
+{
+ int pos = 0;
+ int ret;
+
+retry:
+ ret = sendto(fd, dump_buf + pos, dump_len - pos, MSG_DONTWAIT | MSG_NOSIGNAL,
+ (struct sockaddr *)&dump_addr, dump_addrlen);
+ if (ret <= 0)
+ return ret;
+
+ pos += ret;
+
+ if (pos < dump_len)
+ goto retry;
+
+ return 0;
+}
+
+static int print_client(struct client *cl, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "pid=%d "
+ "fd=%d "
+ "pi=%d "
+ "id=%u "
+ "name=%s\n",
+ prefix,
+ cl->pid,
+ cl->fd,
+ cl->pi,
+ cl->id,
+ cl->name[0] ? cl->name : ".");
+}
+
+static int print_local_vg(struct lockspace *ls, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "vg_name=%s "
+ "vg_uuid=%s "
+ "vg_sysid=%s\n",
+ prefix,
+ ls->vg_name,
+ ls->vg_uuid,
+ ls->vg_sysid[0] ? ls->vg_sysid : ".");
+}
+
+static int print_lockspace(struct lockspace *ls, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "ls_name=%s "
+ "vg_name=%s "
+ "vg_uuid=%s "
+ "vg_sysid=%s "
+ "vg_args=%s "
+ "lm_type=%s "
+ "host_id=%llu "
+ "names_version=%u "
+ "create_fail=%d "
+ "create_done=%d "
+ "thread_work=%d "
+ "thread_stop=%d "
+ "thread_done=%d "
+ "update_local_vgs=%d "
+ "update_names_version=%d "
+ "sanlock_gl_enabled=%d "
+ "sanlock_gl_dup=%d\n",
+ prefix,
+ ls->name,
+ ls->vg_name,
+ ls->vg_uuid,
+ ls->vg_sysid[0] ? ls->vg_sysid : ".",
+ ls->vg_args,
+ lm_str(ls->lm_type),
+ (unsigned long long)ls->host_id,
+ ls->names_version,
+ ls->create_fail ? 1 : 0,
+ ls->create_done ? 1 : 0,
+ ls->thread_work ? 1 : 0,
+ ls->thread_stop ? 1 : 0,
+ ls->thread_done ? 1 : 0,
+ ls->update_local_vgs ? 1 : 0,
+ ls->update_names_version ? 1 : 0,
+ ls->sanlock_gl_enabled ? 1 : 0,
+ ls->sanlock_gl_dup ? 1 : 0);
+}
+
+static int print_action(struct action *act, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "client_id=%u "
+ "flags=0x%x "
+ "version=%u "
+ "op=%s "
+ "rt=%s "
+ "mode=%s "
+ "lm_type=%s "
+ "result=%d "
+ "lm_rv=%d\n",
+ prefix,
+ act->client_id,
+ act->flags,
+ act->version,
+ op_str(act->op),
+ rt_str(act->rt),
+ mode_str(act->mode),
+ lm_str(act->lm_type),
+ act->result,
+ act->lm_rv);
+}
+
+static int print_resource(struct resource *r, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "name=%s "
+ "type=%s "
+ "mode=%s "
+ "sh_count=%d "
+ "version=%u\n",
+ prefix,
+ r->name,
+ rt_str(r->type),
+ mode_str(r->mode),
+ r->sh_count,
+ r->version);
+}
+
+static int print_lock(struct lock *lk, const char *prefix, int pos, int len)
+{
+ return snprintf(dump_buf + pos, len - pos,
+ "info=%s "
+ "mode=%s "
+ "version=%u "
+ "flags=0x%x "
+ "client_id=%u\n",
+ prefix,
+ mode_str(lk->mode),
+ lk->version,
+ lk->flags,
+ lk->client_id);
+}
+
+static int dump_info(int *dump_len)
+{
+ struct client *cl;
+ struct lockspace *ls;
+ struct resource *r;
+ struct lock *lk;
+ struct action *act;
+ int len, pos, ret;
+ int rv = 0;
+
+ memset(dump_buf, 0, sizeof(dump_buf));
+ len = sizeof(dump_buf);
+ pos = 0;
+
+ /*
+ * clients
+ */
+
+ pthread_mutex_lock(&client_mutex);
+ list_for_each_entry(cl, &client_list, list) {
+ ret = print_client(cl, "client", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ break;
+ }
+ pos += ret;
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ if (rv < 0)
+ return rv;
+
+ /*
+ * local vgs
+ */
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &local_vgs, list) {
+ ret = print_local_vg(ls, "local_vg", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ break;
+ }
+ pos += ret;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ if (rv < 0)
+ return rv;
+
+ /*
+ * lockspaces with their action/resource/lock info
+ */
+
+ pthread_mutex_lock(&lockspaces_mutex);
+ list_for_each_entry(ls, &lockspaces, list) {
+
+ ret = print_lockspace(ls, "ls", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+
+ list_for_each_entry(act, &ls->actions, list) {
+ ret = print_action(act, "ls_action", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(r, &ls->resources, list) {
+ ret = print_resource(r, "r", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+
+ list_for_each_entry(lk, &r->locks, list) {
+ ret = print_lock(lk, "lk", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+
+ list_for_each_entry(act, &r->actions, list) {
+ ret = print_action(act, "r_action", pos, len);
+ if (ret >= len - pos) {
+ rv = -ENOSPC;
+ goto out;
+ }
+ pos += ret;
+ }
+ }
+ }
+out:
+ pthread_mutex_unlock(&lockspaces_mutex);
+
+ *dump_len = pos;
+
+ return rv;
+}
+
+/* called from client_thread, cl->mutex is held */
+static void client_recv_action(struct client *cl)
+{
+ request req;
+ response res;
+ struct lockspace *lls = NULL;
+ struct action *act;
+ const char *cl_name;
+ const char *vg_name;
+ const char *vg_uuid;
+ const char *vg_sysid;
+ const char *str;
+ int64_t val;
+ uint32_t opts = 0;
+ int result = 0;
+ int cl_pid;
+ int op, rt, lm, mode;
+ int rv;
+
+ buffer_init(&req.buffer);
+
+ rv = buffer_read(cl->fd, &req.buffer);
+ if (!rv) {
+ if (errno == ECONNRESET) {
+ log_debug("client recv %d ECONNRESET", cl->id);
+ cl->dead = 1;
+ } else {
+ log_error("client recv %d buffer_read error %d", cl->id, errno);
+ }
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ req.cft = dm_config_from_string(req.buffer.mem);
+ if (!req.cft) {
+ log_error("client recv %d config_from_string error", cl->id);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ str = daemon_request_str(req, "request", NULL);
+ rv = str_to_op_rt(str, &op, &rt);
+ if (rv < 0) {
+ log_error("client recv %d bad request name \"%s\"", cl->id, str ? str : "");
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ if (op == LD_OP_HELLO || op == LD_OP_QUIT ||
+ op == LD_OP_DUMP_INFO || op == LD_OP_DUMP_LOG) {
+
+ /*
+ * TODO: add the client command name to the hello messages
+ * so it can be saved in cl->name here.
+ */
+
+ result = 0;
+
+ if (op == LD_OP_QUIT) {
+ log_debug("op quit");
+ pthread_mutex_lock(&lockspaces_mutex);
+ if (list_empty(&lockspaces)) {
+ daemon_quit = 1;
+ } else {
+ result = -EBUSY;
+ }
+ pthread_mutex_unlock(&lockspaces_mutex);
+ }
+
+ buffer_init(&res.buffer);
+
+ if (op == LD_OP_DUMP_INFO || op == LD_OP_DUMP_LOG) {
+ int dump_len = 0;
+ int fd;
+
+ fd = setup_dump_socket();
+ if (fd < 0)
+ result = fd;
+ else if (op == LD_OP_DUMP_INFO)
+ result = dump_info(&dump_len);
+ else if (op == LD_OP_DUMP_LOG)
+ result = dump_log(&dump_len);
+ else
+ result = -EINVAL;
+
+ res = daemon_reply_simple("OK",
+ "result = %d", result,
+ "dump_len = %d", dump_len,
+ NULL);
+ if (fd >= 0) {
+ send_dump_buf(fd, dump_len);
+ close(fd);
+ }
+
+ } else {
+ res = daemon_reply_simple("OK",
+ "result = %d", result,
+ "protocol = %s", lvmlockd_protocol,
+ "version = %d", lvmlockd_protocol_version,
+ NULL);
+ }
+
+ buffer_write(cl->fd, &res.buffer);
+ buffer_destroy(&res.buffer);
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ cl_name = daemon_request_str(req, "cmd", NULL);
+ cl_pid = daemon_request_int(req, "pid", 0);
+ vg_name = daemon_request_str(req, "vg_name", NULL);
+ vg_uuid = daemon_request_str(req, "vg_uuid", NULL);
+ vg_sysid = daemon_request_str(req, "vg_sysid", NULL);
+ str = daemon_request_str(req, "mode", NULL);
+ mode = str_to_mode(str);
+ str = daemon_request_str(req, "opts", NULL);
+ opts = str_to_opts(str);
+ str = daemon_request_str(req, "vg_lock_type", NULL);
+ lm = str_to_lm(str);
+
+ if (cl_pid && cl_pid != cl->pid)
+ log_error("client recv bad message pid %d client %d", cl_pid, cl->pid);
+
+ /* TODO: do this in hello message instead */
+ if (!cl->name[0] && cl_name)
+ strncpy(cl->name, cl_name, MAX_NAME-1);
+
+ if (!our_system_id) {
+ str = daemon_request_str(req, "our_system_id", NULL);
+ if (str && strcmp(str, "none"))
+ our_system_id = strdup(str);
+ }
+
+ /*
+ * Detect the common case of a lock op on a local vg and queue
+ * a reply immediately without going through a thread.
+ */
+
+ if (rt == LD_RT_VG && op == LD_OP_LOCK) {
+ pthread_mutex_lock(&local_thread_mutex);
+ lls = find_local_vg(vg_name, vg_uuid);
+ pthread_mutex_unlock(&local_thread_mutex);
+ if (lls)
+ result = is_other_sysid(lls) ? -EOTHERVG : -ELOCALVG;
+ }
+
+ /*
+ * A local vg with no sysid, accessible from multiple hosts, can be
+ * modified without coordination if a user is not careful. The best we
+ * can do is disable the lvmetad cache for these vgs so any problems are
+ * detected earlier, and not masked by lvmetad caching.
+ */
+
+ if (lls && (result == -ELOCALVG) && !lls->vg_sysid[0])
+ invalidate_lvmetad_vg(lls);
+
+ if ((result == -EOTHERVG) || (result == -ELOCALVG && !local_thread_also)) {
+ const char *sysid = lls->vg_sysid[0] ? lls->vg_sysid : "none";
+
+ log_debug("local vg %s result %d %s sysid %s", vg_name, result,
+ (result == -EOTHERVG) ? "other" : "local", sysid);
+
+ buffer_init(&res.buffer);
+ res = daemon_reply_simple("OK",
+ "op_result = %d", result,
+ "vg_sysid = %s", sysid,
+ "lock_type = %s", "none",
+ "result_flags = %s", "LOCAL_LS",
+ NULL);
+ buffer_write(cl->fd, &res.buffer);
+ buffer_destroy(&res.buffer);
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ if (!gl_use_dlm && !gl_use_sanlock && (lm > 0)) {
+ if (lm == LD_LM_DLM)
+ gl_use_dlm = 1;
+ else if (lm == LD_LM_SANLOCK)
+ gl_use_sanlock = 1;
+
+ log_debug("set gl_use_%s", lm_str(lm));
+ }
+
+ act = alloc_action();
+ if (!act) {
+ /* TODO: return an error to the client */
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+ client_resume(cl);
+ return;
+ }
+
+ act->client_id = cl->id;
+ act->op = op;
+ act->rt = rt;
+ act->mode = mode;
+ act->flags = opts;
+ act->lm_type = lm;
+
+ if (vg_name && strcmp(vg_name, "none"))
+ strncpy(act->vg_name, vg_name, MAX_NAME);
+
+ if (vg_uuid && strcmp(vg_uuid, "none"))
+ strncpy(act->vg_uuid, vg_uuid, 64);
+
+ if (vg_sysid && strcmp(vg_sysid, "none"))
+ strncpy(act->vg_sysid, vg_sysid, MAX_NAME);
+
+ str = daemon_request_str(req, "lv_name", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->lv_name, str, MAX_NAME);
+
+ val = daemon_request_int(req, "version", 0);
+ if (val)
+ act->version = (uint32_t)val;
+
+ str = daemon_request_str(req, "vg_lock_args", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->vg_args, str, MAX_ARGS);
+
+ str = daemon_request_str(req, "lv_lock_args", NULL);
+ if (str && strcmp(str, "none"))
+ strncpy(act->lv_args, str, MAX_ARGS);
+
+ /* start_vg will include lvmlocal.conf local/host_id here */
+ val = daemon_request_int(req, "host_id", 0);
+ if (val)
+ act->host_id = val;
+
+ act->max_retries = daemon_request_int(req, "max_retries", DEFAULT_MAX_RETRIES);
+
+ dm_config_destroy(req.cft);
+ buffer_destroy(&req.buffer);
+
+ log_debug("recv %s[%d.%u] %s %s \"%s\" mode %s flags %x",
+ cl->name[0] ? cl->name : "client", cl->pid, cl->id,
+ op_str(act->op), rt_str(act->rt), act->vg_name, mode_str(act->mode), opts);
+
+ /*
+ * local lock on local vg (lls) is done when local locking is enabled.
+ * local lock on gl is done when local locking is enabled and lockd is not.
+ */
+ if ((local_thread_also && lls) ||
+ (local_thread_only && rt == LD_RT_GL && op == LD_OP_LOCK)) {
+ add_local_lock_action(lls, act);
+ return;
+ }
+
+ switch (act->op) {
+ case LD_OP_START:
+ rv = add_lockspace(act);
+ break;
+ case LD_OP_STOP:
+ rv = rem_lockspace(act);
+ break;
+ case LD_OP_INIT:
+ case LD_OP_UPDATE_LOCAL:
+ case LD_OP_START_WAIT:
+ case LD_OP_STOP_ALL:
+ add_work_action(act);
+ rv = 0;
+ break;
+ case LD_OP_LOCK:
+ case LD_OP_UPDATE:
+ case LD_OP_ENABLE:
+ case LD_OP_DISABLE:
+ case LD_OP_FREE:
+ rv = add_lock_action(act);
+ break;
+ case LD_OP_ADD_LOCAL:
+ pthread_mutex_lock(&local_thread_mutex);
+ add_local_vg(act->vg_name, act->vg_uuid, act->vg_sysid);
+ pthread_mutex_unlock(&local_thread_mutex);
+ act->result = 0;
+ add_client_result(act);
+ rv = 0;
+ break;
+ case LD_OP_REM_LOCAL:
+ pthread_mutex_lock(&local_thread_mutex);
+ rem_local_vg(act->vg_name, act->vg_uuid);
+ pthread_mutex_unlock(&local_thread_mutex);
+ act->result = 0;
+ add_client_result(act);
+ rv = 0;
+ break;
+ default:
+ rv = -EINVAL;
+ };
+
+ if (rv < 0) {
+ act->result = rv;
+ add_client_result(act);
+ }
+}
+
+static void *client_thread_main(void *arg_in)
+{
+ struct client *cl;
+ struct action *act;
+
+ while (1) {
+ pthread_mutex_lock(&client_mutex);
+ while (!client_work && list_empty(&client_results)) {
+ if (client_stop) {
+ pthread_mutex_unlock(&client_mutex);
+ goto out;
+ }
+ pthread_cond_wait(&client_cond, &client_mutex);
+ }
+
+ /*
+ * Send outgoing results back to clients
+ */
+
+ if (!list_empty(&client_results)) {
+ act = list_first_entry(&client_results, struct action, list);
+ list_del(&act->list);
+ cl = find_client_id(act->client_id);
+ pthread_mutex_unlock(&client_mutex);
+
+ if (cl) {
+ pthread_mutex_lock(&cl->mutex);
+ client_send_result(cl, act);
+ pthread_mutex_unlock(&cl->mutex);
+ } else {
+ log_debug("no client for result");
+ }
+ free_action(act);
+ continue;
+ }
+
+ /*
+ * Queue incoming actions for lockspace threads
+ */
+
+ if (client_work) {
+ cl = find_client_work();
+ if (!cl)
+ client_work = 0;
+ pthread_mutex_unlock(&client_mutex);
+
+ if (!cl)
+ continue;
+
+ pthread_mutex_lock(&cl->mutex);
+
+ if (cl->recv) {
+ cl->recv = 0;
+ client_recv_action(cl);
+ }
+
+ if (cl->dead) {
+ /*
+ log_debug("client rem %d pi %d fd %d ig %d",
+ cl->id, cl->pi, cl->fd, cl->poll_ignore);
+ */
+ /*
+ * If cl->dead was set in main_loop, then the
+ * fd has already been closed and the pollfd
+ * entry is already unused.
+ * main_loop set dead=1, ignore=0, pi=-1, fd=-1
+ *
+ * if cl->dead was not set in main_loop, but
+ * set in client_recv_action, then the main_loop
+ * should be ignoring this client fd.
+ * main_loop set ignore=1
+ */
+
+ if (cl->poll_ignore) {
+ log_debug("client close %d pi %d fd %d",
+ cl->id, cl->pi, cl->fd);
+ /* assert cl->pi != -1 */
+ /* assert pollfd[pi].fd == FD_IGNORE */
+ close(cl->fd);
+ rem_pollfd(cl->pi);
+ cl->pi = -1;
+ cl->fd = -1;
+ cl->poll_ignore = 0;
+ } else {
+ /* main thread should have closed */
+ if (cl->pi != -1 || cl->fd != -1) {
+ log_error("client %d bad state pi %d fd %d",
+ cl->id, cl->pi, cl->fd);
+ }
+ }
+ pthread_mutex_unlock(&cl->mutex);
+
+ pthread_mutex_lock(&client_mutex);
+ list_del(&cl->list);
+ pthread_mutex_unlock(&client_mutex);
+
+ client_purge(cl);
+
+ free_client(cl);
+ } else {
+ pthread_mutex_unlock(&cl->mutex);
+ }
+ }
+ pthread_mutex_unlock(&client_mutex);
+ }
+out:
+ return NULL;
+}
+
+static int setup_client_thread(void)
+{
+ int rv;
+
+ INIT_LIST_HEAD(&client_list);
+ INIT_LIST_HEAD(&client_results);
+
+ pthread_mutex_init(&client_mutex, NULL);
+ pthread_cond_init(&client_cond, NULL);
+
+ rv = pthread_create(&client_thread, NULL, client_thread_main, NULL);
+ if (rv)
+ return -1;
+ return 0;
+}
+
+static void close_client_thread(void)
+{
+ pthread_mutex_lock(&client_mutex);
+ client_stop = 1;
+ pthread_cond_signal(&client_cond);
+ pthread_mutex_unlock(&client_mutex);
+ pthread_join(client_thread, NULL);
+}
+
+static int setup_local_thread(void)
+{
+ struct lockspace *ls;
+ struct resource *r;
+ int rv;
+
+ if (!local_thread_also)
+ return 0;
+
+ if (local_thread_only) {
+ ls = malloc(sizeof(struct lockspace));
+ if (!ls)
+ return -ENOMEM;
+
+ r = alloc_resource();
+ if (!r) {
+ free(ls);
+ return -ENOMEM;
+ }
+
+ memset(ls, 0, sizeof(struct lockspace));
+ strcpy(ls->name, "local_thread_gls");
+ INIT_LIST_HEAD(&ls->actions);
+ INIT_LIST_HEAD(&ls->resources);
+
+ r->type = LD_RT_GL;
+ r->mode = LD_LK_UN;
+ strncpy(r->name, R_NAME_GL, MAX_NAME);
+ INIT_LIST_HEAD(&r->locks);
+ INIT_LIST_HEAD(&r->actions);
+ list_add_tail(&r->list, &ls->resources);
+
+ list_add(&ls->list, &local_vgs);
+ local_thread_gls = ls;
+ }
+
+ rv = pthread_create(&local_thread, NULL, local_thread_main, NULL);
+ if (rv)
+ return -1;
+
+ return 0;
+}
+
+static void close_local_thread(void)
+{
+ if (!local_thread_also)
+ return;
+
+ pthread_mutex_lock(&local_thread_mutex);
+ local_thread_stop = 1;
+ pthread_cond_signal(&local_thread_cond);
+ pthread_mutex_unlock(&local_thread_mutex);
+ pthread_join(local_thread, NULL);
+}
+
+#if 0
+static void setup_listener(void)
+{
+ struct sockaddr_un addr;
+ int rv, fd, ci;
+
+ rv = lvmlockd_socket_address(&addr);
+ if (rv < 0)
+ return rv;
+
+ fd = socket(AF_LOCAL, SOCK_STREAM, 0);
+ if (fd < 0)
+ return fd;
+
+ unlink(addr.sun_path);
+ rv = bind(fd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
+ if (rv < 0)
+ goto exit_fail;
+
+ rv = chmod(addr.sun_path, DEFAULT_SOCKET_MODE);
+ if (rv < 0)
+ goto exit_fail;
+
+ rv = chown(addr.sun_path, com.uid, com.gid);
+ if (rv < 0) {
+ log_error("could not set socket %s permissions: %s",
+ addr.sun_path, strerror(errno));
+ goto exit_fail;
+ }
+
+ rv = listen(fd, 5);
+ if (rv < 0)
+ goto exit_fail;
+
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK);
+
+ listen_pi = add_pollfd(fd);
+
+ return 0;
+
+exit_fail:
+ close(fd);
+ return -1;
+}
+#endif
+
+static int get_peer_pid(int fd)
+{
+ struct ucred cred;
+ unsigned int len = sizeof(cred);
+
+ if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cred, &len) != 0)
+ return -1;
+
+ return cred.pid;
+}
+
+static void process_listener(int poll_fd)
+{
+ struct client *cl;
+ int fd, pi;
+
+ /* assert poll_fd == listen_fd */
+
+ fd = accept(listen_fd, NULL, NULL);
+ if (fd < 0)
+ return;
+
+ cl = alloc_client();
+ if (!cl)
+ return;
+ memset(cl, 0, sizeof(struct client));
+
+ pi = add_pollfd(fd);
+ if (pi < 0) {
+ log_error("process_listener add_pollfd error %d", pi);
+ free_client(cl);
+ return;
+ }
+
+ cl->pi = pi;
+ cl->fd = fd;
+ cl->pid = get_peer_pid(fd);
+
+ pthread_mutex_init(&cl->mutex, NULL);
+
+ pthread_mutex_lock(&client_mutex);
+ cl->id = ++client_ids;
+ list_add_tail(&cl->list, &client_list);
+ pthread_mutex_unlock(&client_mutex);
+
+ /* log_debug("client add %d pi %d fd %d", cl->id, cl->pi, cl->fd); */
+}
+
+/*
+ * main loop polls on pipe[0] so that a thread can
+ * restart the poll by writing to pipe[1].
+ */
+static int setup_restart(void)
+{
+ if (pipe(restart_fds)) {
+ log_error("setup_restart pipe error %d", errno);
+ return -1;
+ }
+
+ restart_pi = add_pollfd(restart_fds[0]);
+ if (restart_pi < 0)
+ return restart_pi;
+
+ return 0;
+}
+
+/*
+ * thread wrote 'w' to restart_fds[1] to restart poll()
+ * after adding an fd back into pollfd.
+ */
+static void process_restart(int fd)
+{
+ char wake[1];
+ /* assert fd == restart_fds[0] */
+ read(restart_fds[0], wake, 1);
+}
+
+static void sigterm_handler(int sig __attribute__((unused)))
+{
+ daemon_quit = 1;
+}
+
+static int main_loop(daemon_state *ds_arg)
+{
+ struct client *cl;
+ int i, rv, is_recv, is_dead;
+
+ signal(SIGTERM, &sigterm_handler);
+
+ setup_structs();
+
+ /* TODO: avoid possible vg name collision */
+ strcpy(gl_lsname_dlm, S_NAME_GL_DLM);
+
+ INIT_LIST_HEAD(&local_vgs);
+ INIT_LIST_HEAD(&local_thread_actions);
+ pthread_mutex_init(&local_thread_mutex, NULL);
+ pthread_cond_init(&local_thread_cond, NULL);
+ INIT_LIST_HEAD(&lockspaces);
+ INIT_LIST_HEAD(&lockspaces_inactive);
+ pthread_mutex_init(&lockspaces_mutex, NULL);
+ pthread_mutex_init(&pollfd_mutex, NULL);
+ pthread_mutex_init(&log_mutex, NULL);
+
+ openlog("lvmlockd", LOG_CONS | LOG_PID, LOG_DAEMON);
+ log_warn("lvmlockd started");
+
+ listen_fd = ds_arg->socket_fd;
+ listen_pi = add_pollfd(listen_fd);
+
+ setup_client_thread();
+ setup_worker_thread();
+ setup_local_thread();
+ setup_restart();
+
+ pthread_mutex_init(&lvmetad_mutex, NULL);
+ lvmetad_handle = lvmetad_open(NULL);
+ if (lvmetad_handle.error || lvmetad_handle.socket_fd < 0)
+ log_error("lvmetad_open error %d", lvmetad_handle.error);
+ else
+ lvmetad_connected = 1;
+
+ /* add entries to local_vgs */
+ create_work_action(LD_OP_UPDATE_LOCAL);
+
+ while (1) {
+ rv = poll(pollfd, pollfd_maxi + 1, -1);
+ if (rv == -1 && errno == EINTR) {
+ if (daemon_quit) {
+ int count;
+ /* first sigterm would trigger stops, and
+ second sigterm may finish the joins. */
+ count = for_each_lockspace(DO_STOP, DO_FREE, NO_FORCE);
+ if (!count)
+ break;
+ log_debug("ignore shutdown for %d lockspaces", count);
+ daemon_quit = 0;
+ }
+ continue;
+ }
+ if (rv < 0) {
+ log_error("poll errno %d", errno);
+ break;
+ }
+
+ for (i = 0; i <= pollfd_maxi; i++) {
+ if (pollfd[i].fd < 0)
+ continue;
+
+ is_recv = 0;
+ is_dead = 0;
+
+ if (pollfd[i].revents & POLLIN)
+ is_recv = 1;
+ if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL))
+ is_dead = 1;
+
+ if (!is_recv && !is_dead)
+ continue;
+
+ if (i == listen_pi) {
+ process_listener(pollfd[i].fd);
+ continue;
+ }
+
+ if (i == restart_pi) {
+ process_restart(pollfd[i].fd);
+ continue;
+ }
+
+ /*
+ log_debug("poll pi %d fd %d revents %x",
+ i, pollfd[i].fd, pollfd[i].revents);
+ */
+
+ pthread_mutex_lock(&client_mutex);
+ cl = find_client_pi(i);
+ if (cl) {
+ pthread_mutex_lock(&cl->mutex);
+
+ if (cl->recv) {
+ /* should not happen */
+ log_error("main client %d already recv", cl->id);
+
+ } else if (cl->dead) {
+ /* should not happen */
+ log_error("main client %d already dead", cl->id);
+
+ } else if (is_dead) {
+ log_debug("close %s[%d.%u] fd %d",
+ cl->name[0] ? cl->name : "client",
+ cl->pid, cl->id, cl->fd);
+ cl->dead = 1;
+ cl->pi = -1;
+ cl->fd = -1;
+ cl->poll_ignore = 0;
+ close(pollfd[i].fd);
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+
+ } else if (is_recv) {
+ cl->recv = 1;
+ cl->poll_ignore = 1;
+ pollfd[i].fd = POLL_FD_IGNORE;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+
+ pthread_mutex_unlock(&cl->mutex);
+
+ client_work = 1;
+ pthread_cond_signal(&client_cond);
+
+ /* client_thread will pick up and work on any
+ client with cl->recv or cl->dead set */
+
+ } else {
+ /* don't think this can happen */
+ log_error("no client for index %d fd %d",
+ i, pollfd[i].fd);
+ close(pollfd[i].fd);
+ pollfd[i].fd = POLL_FD_UNUSED;
+ pollfd[i].events = 0;
+ pollfd[i].revents = 0;
+ }
+ pthread_mutex_unlock(&client_mutex);
+
+ /* TODO?: after set_dead, scan pollfd for last unused
+ slot and reduce pollfd_maxi */
+ }
+ }
+
+ for_each_lockspace_retry(DO_STOP, DO_FREE, DO_FORCE);
+ free_lockspaces_inactive();
+ close_worker_thread();
+ close_client_thread();
+ close_local_thread();
+ closelog();
+ daemon_close(lvmetad_handle);
+ return 0;
+}
+
+static void usage(char *prog, FILE *file)
+{
+ fprintf(file, "Usage:\n");
+ fprintf(file, "%s [options]\n\n", prog);
+ fprintf(file, " --help | -h\n");
+ fprintf(file, " Show this help information.\n");
+ fprintf(file, " --version | -V\n");
+ fprintf(file, " Show version of lvmlockd.\n");
+ fprintf(file, " --test | -T\n");
+ fprintf(file, " Test mode, do not call lock manager.\n");
+ fprintf(file, " --foreground | -f\n");
+ fprintf(file, " Don't fork.\n");
+ fprintf(file, " --daemon-debug | -D\n");
+ fprintf(file, " Don't fork and print debugging to stdout.\n");
+ fprintf(file, " --pid-file | -p <path>\n");
+ fprintf(file, " Set path to the pid file. [%s]\n", LVMLOCKD_PIDFILE);
+ fprintf(file, " --socket-path | -s <path>\n");
+ fprintf(file, " Set path to the socket to listen on. [%s]\n", LVMLOCKD_SOCKET);
+ fprintf(file, " --log-config | -l <str>\n");
+ fprintf(file, " Set log config.\n");
+ fprintf(file, " --local-also | -a\n");
+ fprintf(file, " Manage locks between pids for local vgs.\n");
+ fprintf(file, " --local-only | -o\n");
+ fprintf(file, " Only manage locks for local vgs, not dlm|sanlock vgs.\n");
+ fprintf(file, " --gl-type | -g <str>\n");
+ fprintf(file, " Set global lock type to be dlm|sanlock.\n");
+ fprintf(file, " --system-id | -y <str>\n");
+ fprintf(file, " Set the local system id.\n");
+ fprintf(file, " --host-id | -i <num>\n");
+ fprintf(file, " Set the local sanlock host id.\n");
+ fprintf(file, " --host-id-file | -F <path>\n");
+ fprintf(file, " A file containing the local sanlock host_id.\n");
+}
+
+int main(int argc, char *argv[])
+{
+ daemon_state ds;
+
+ ds.daemon_main = main_loop;
+ ds.daemon_init = NULL;
+ ds.daemon_fini = NULL;
+ ds.pidfile = getenv("LVM_LVMLOCKD_PIDFILE");
+ ds.socket_path = getenv("LVM_LVMLOCKD_SOCKET");
+ ds.protocol = lvmlockd_protocol;
+ ds.protocol_version = lvmlockd_protocol_version;
+ ds.name = "lvmlockd";
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"version", no_argument, 0, 'V' },
+ {"test", no_argument, 0, 'T' },
+ {"foreground", no_argument, 0, 'f' },
+ {"daemon-debug",no_argument, 0, 'D' },
+ {"pid-file", required_argument, 0, 'p' },
+ {"socket-path", required_argument, 0, 's' },
+ {"local-also", no_argument, 0, 'a' },
+ {"local-only", no_argument, 0, 'o' },
+ {"gl-type", required_argument, 0, 'g' },
+ {"system-id", required_argument, 0, 'y' },
+ {"host-id", required_argument, 0, 'i' },
+ {"host-id-file",required_argument, 0, 'F' },
+ {0, 0, 0, 0 }
+ };
+
+ while (1) {
+ int c;
+ int lm;
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "hVTfDp:s:l:aog:S:I:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case '0':
+ break;
+ case 'h':
+ usage(argv[0], stdout);
+ exit(EXIT_SUCCESS);
+ case 'V':
+ printf("lvmlockd version: " LVM_VERSION "\n");
+ exit(EXIT_SUCCESS);
+ case 'T':
+ daemon_test = 1;
+ break;
+ case 'f':
+ ds.foreground = 1;
+ break;
+ case 'D':
+ ds.foreground = 1;
+ daemon_debug = 1;
+ break;
+ case 'p':
+ ds.pidfile = strdup(optarg);
+ break;
+ case 's':
+ ds.socket_path = strdup(optarg);
+ break;
+ case 'a':
+ local_thread_also = 1;
+ break;
+ case 'o':
+ local_thread_also = 1;
+ local_thread_only = 1;
+ break;
+ case 'g':
+ lm = str_to_lm(optarg);
+ if (lm == LD_LM_DLM)
+ gl_use_dlm = 1;
+ else if (lm == LD_LM_SANLOCK)
+ gl_use_sanlock = 1;
+ else {
+ fprintf(stderr, "invalid gl-type option");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'y':
+ our_system_id = strdup(optarg);
+ break;
+ case 'i':
+ daemon_host_id = atoi(optarg);
+ break;
+ case 'F':
+ daemon_host_id_file = strdup(optarg);
+ break;
+ case '?':
+ default:
+ usage(argv[0], stdout);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (!ds.pidfile)
+ ds.pidfile = LVMLOCKD_PIDFILE;
+
+ if (!ds.socket_path)
+ ds.socket_path = LVMLOCKD_SOCKET;
+
+ /* runs daemon_main/main_loop */
+ daemon_start(ds);
+
+ return 0;
+}
diff --git a/daemons/lvmlockd/lvmlockd-dlm.c b/daemons/lvmlockd/lvmlockd-dlm.c
new file mode 100644
index 000000000..bd0691d5e
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
@@ -0,0 +1,529 @@
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <poll.h>
+#include <errno.h>
+#include <string.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <byteswap.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "configure.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+
+#include "lvmlockd-internal.h"
+
+/*
+ * Using synchronous _wait dlm apis so do not define _REENTRANT and
+ * link with non-threaded version of library, libdlm_lt.
+ */
+#include "libdlm.h"
+
+struct lm_dlm {
+ dlm_lshandle_t *dh;
+};
+
+struct rd_dlm {
+ struct dlm_lksb lksb;
+ struct val_blk *vb;
+};
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for dlm is
+ * vg_version_string:undefined:cluster_name
+ *
+ * lv_lock_args are not used for dlm
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+static int cluster_name_from_args(char *vg_args, char *clustername)
+{
+ return last_string_from_args(vg_args, clustername);
+}
+
+static int check_args_version(char *vg_args)
+{
+ unsigned int major = 0;
+ int rv;
+
+ rv = version_from_args(vg_args, &major, NULL, NULL);
+ if (rv < 0) {
+ log_error("check_args_version %s error %d", vg_args, rv);
+ return rv;
+ }
+
+ if (major > VG_LOCK_ARGS_MAJOR) {
+ log_error("check_args_version %s major %d %d", vg_args, major, VG_LOCK_ARGS_MAJOR);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* This will be set after dlm_controld is started. */
+#define DLM_CLUSTER_NAME_PATH "/sys/kernel/config/dlm/cluster/cluster_name"
+
+static int read_cluster_name(char *clustername)
+{
+ char *n;
+ int fd;
+ int rv;
+
+ if (daemon_test) {
+ sprintf(clustername, "%s", "test");
+ return 0;
+ }
+
+ fd = open(DLM_CLUSTER_NAME_PATH, O_RDONLY);
+ if (fd < 0) {
+ log_error("read_cluster_name: open error %d, check dlm_controld", fd);
+ return fd;
+ }
+
+ rv = read(fd, clustername, MAX_ARGS - 1);
+ if (rv < 0) {
+ log_error("read_cluster_name: cluster name read error %d, check dlm_controld", fd);
+ close(fd);
+ return rv;
+ }
+
+ n = strstr(clustername, "\n");
+ if (n)
+ *n = '\0';
+ close(fd);
+ return 0;
+}
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ char clustername[MAX_ARGS];
+ char lock_args_version[MAX_ARGS];
+ int rv;
+
+ memset(clustername, 0, sizeof(clustername));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+ rv = read_cluster_name(clustername);
+ if (rv < 0)
+ return rv;
+
+ if (strlen(clustername) + strlen(lock_args_version) + 2 > MAX_ARGS) {
+ log_error("init_vg_dlm args too long");
+ return -ENAMETOOLONG;
+ }
+
+ snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, clustername);
+ rv = 0;
+
+ log_debug("init_vg_dlm done %s vg_args %s", ls_name, vg_args);
+ return rv;
+}
+
+int lm_add_lockspace_dlm(struct lockspace *ls)
+{
+ char sys_clustername[MAX_ARGS];
+ char arg_clustername[MAX_ARGS];
+ struct lm_dlm *lmd;
+ int rv;
+
+ memset(sys_clustername, 0, sizeof(sys_clustername));
+ memset(arg_clustername, 0, sizeof(arg_clustername));
+
+ rv = read_cluster_name(sys_clustername);
+ if (rv < 0)
+ return rv;
+
+ if (!ls->vg_args[0]) {
+ /* global lockspace has no vg args */
+ goto skip_args;
+ }
+
+ rv = check_args_version(ls->vg_args);
+ if (rv < 0)
+ return rv;
+
+ rv = cluster_name_from_args(ls->vg_args, arg_clustername);
+ if (rv < 0) {
+ log_error("add_lockspace_dlm %s no cluster name from args %s", ls->name, ls->vg_args);
+ return rv;
+ }
+
+ if (strcmp(sys_clustername, arg_clustername)) {
+ log_error("add_lockspace_dlm %s mismatching cluster names sys %s arg %s",
+ ls->name, sys_clustername, arg_clustername);
+ return -1;
+ }
+
+ skip_args:
+ lmd = malloc(sizeof(struct lm_dlm));
+ if (!lmd) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ if (daemon_test)
+ goto data;
+
+ lmd->dh = dlm_new_lockspace(ls->name, 0600, DLM_LSFL_NEWEXCL);
+ if (!lmd->dh && errno == EEXIST) {
+ lmd->dh = dlm_open_lockspace(ls->name);
+ }
+
+ if (!lmd->dh) {
+ free(lmd);
+ rv = -1;
+ log_error("add_lockspace_dlm new error %d", rv);
+ goto out;
+ }
+
+ data:
+ ls->lm_data = lmd;
+ rv = 0;
+ out:
+ return rv;
+}
+
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ int rv;
+
+ if (daemon_test)
+ goto out;
+
+ /*
+ * TODO: if free_vg is set, it means we are doing vgremove,
+ * and we may want to tell any other nodes to leave the lockspace.
+ * This is not really necessary since there should be no harm in
+ * having an unused lockspace sitting around.
+ */
+
+ rv = dlm_release_lockspace(ls->name, lmd->dh, 1);
+ if (rv < 0) {
+ log_error("rem_lockspace_dlm error %d", rv);
+ return rv;
+ }
+ out:
+ free(lmd);
+ ls->lm_data = NULL;
+
+ if (!strcmp(ls->name, gl_lsname_dlm)) {
+ gl_running_dlm = 0;
+ gl_auto_dlm = 0;
+ }
+
+ return 0;
+}
+
+static int lm_add_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ struct rd_dlm *rdd;
+ uint32_t flags = 0;
+ char *buf;
+ int rv;
+
+ rdd = malloc(sizeof(struct rd_dlm));
+ if (!rdd)
+ return -ENOMEM;
+
+ memset(rdd, 0, sizeof(struct rd_dlm));
+
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+ buf = malloc(sizeof(struct val_blk) + DLM_LVB_LEN);
+ if (!buf) {
+ free(rdd);
+ return -ENOMEM;
+ }
+ memset(buf, 0, sizeof(struct val_blk) + DLM_LVB_LEN);
+
+ rdd->vb = (struct val_blk *)buf;
+ rdd->lksb.sb_lvbptr = buf + sizeof(struct val_blk);
+
+ flags |= LKF_VALBLK;
+ }
+
+ /* because this is a new NL lock request */
+ flags |= LKF_EXPEDITE;
+
+ if (daemon_test)
+ goto out;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, &rdd->lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s add_resource_dlm lock error %d", ls->name, r->name, rv);
+ free(rdd);
+ return rv;
+ }
+ out:
+ r->lm_data = rdd;
+ return 0;
+}
+
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ struct rd_dlm *rdd = r->lm_data;
+ struct dlm_lksb *lksb;
+ int rv;
+
+ if (daemon_test) {
+ rv = 0;
+ goto out;
+ }
+
+ if (!rdd)
+ return 0;
+
+ lksb = &rdd->lksb;
+
+ rv = dlm_ls_unlock_wait(lmd->dh, lksb->sb_lkid, 0, lksb);
+ if (rv < 0) {
+ log_error("S %s R %s rem_resource_dlm unlock error %d", ls->name, r->name, rv);
+ }
+ out:
+ if (rdd->vb)
+ free(rdd->vb);
+ free(rdd);
+ r->lm_data = NULL;
+ return rv;
+}
+
+static int to_dlm_mode(int ld_mode)
+{
+ switch (ld_mode) {
+ case LD_LK_EX:
+ return LKM_EXMODE;
+ case LD_LK_SH:
+ return LKM_PRMODE;
+ };
+ return -1;
+}
+
+/*
+ * Use PERSISTENT so that if lvmlockd exits while holding locks,
+ * the locks will remain orphaned in the dlm, still protecting what
+ * they were acquired to protect These orphaned locks should be
+ * cleared by resetting the host (or possibly some complicated
+ * manual clearing of things).
+ *
+ * TODO: define manual steps to safely clear/reset things after
+ * lvmlockd failure, if possible.
+ *
+ * TODO: in the future, allow lvmlockd to be restarted, purge
+ * orphans, and reacquire locks that are needed.
+ */
+
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, uint32_t *n_version)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ struct rd_dlm *rdd;
+ struct dlm_lksb *lksb;
+ struct val_blk vb;
+ uint32_t flags = 0;
+ uint16_t vb_version;
+ int mode;
+ int rv;
+
+ if (!r->lm_data) {
+ rv = lm_add_resource_dlm(ls, r);
+ if (rv < 0)
+ return rv;
+ }
+
+ rdd = r->lm_data;
+ lksb = &rdd->lksb;
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb)
+ flags |= LKF_VALBLK;
+
+ mode = to_dlm_mode(ld_mode);
+ if (mode < 0) {
+ log_error("lock_dlm invalid mode %d", ld_mode);
+ return -EINVAL;
+ }
+
+ log_debug("S %s R %s lock_dlm", ls->name, r->name);
+
+ if (daemon_test) {
+ *r_version = 0;
+ *n_version = 0;
+ return 0;
+ }
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ /* TODO: what case is this? what should be done? */
+ log_error("S %s R %s lock_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s lock_dlm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ if (rdd->vb) {
+ if (lksb->sb_flags & DLM_SBF_VALNOTVALID) {
+ log_debug("S %s R %s lock_dlm VALNOTVALID", ls->name, r->name);
+ memset(rdd->vb, 0, sizeof(struct val_blk));
+ *r_version = 0;
+ *n_version = 0;
+ goto out;
+ }
+
+ memcpy(&vb, lksb->sb_lvbptr, sizeof(struct val_blk));
+ vb_version = le16_to_cpu(vb.version);
+
+ if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+ log_error("S %s R %s lock_dlm ignore vb_version %x",
+ ls->name, r->name, vb_version);
+ *r_version = 0;
+ *n_version = 0;
+ free(rdd->vb);
+ rdd->vb = NULL;
+ lksb->sb_lvbptr = NULL;
+ goto out;
+ }
+
+ *r_version = le32_to_cpu(vb.r_version);
+ *n_version = le32_to_cpu(vb.n_version);
+ memcpy(rdd->vb, &vb, sizeof(vb)); /* rdd->vb saved as le */
+
+ log_debug("S %s R %s lock_dlm get r_version %u n_version %u",
+ ls->name, r->name, *r_version, *n_version);
+ }
+out:
+ return 0;
+}
+
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ struct rd_dlm *rdd = r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t mode;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s convert_dlm", ls->name, r->name);
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s convert_dlm set r_version %u",
+ ls->name, r->name, r_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ mode = to_dlm_mode(ld_mode);
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ /* TODO: what case is this? what should be done? */
+ log_error("S %s R %s convert_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
+ }
+ return rv;
+}
+
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t n_version, uint32_t lmuf_flags)
+{
+ struct lm_dlm *lmd = ls->lm_data;
+ struct rd_dlm *rdd = r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s unlock_dlm r_version %u flags %x",
+ ls->name, r->name, r_version, lmuf_flags);
+
+ /*
+ * Do not set PERSISTENT, because we don't need an orphan
+ * NL lock to protect anything.
+ */
+
+ flags |= LKF_CONVERT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ if (n_version)
+ rdd->vb->n_version = cpu_to_le32(n_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s unlock_dlm set r_version %u n_version %u",
+ ls->name, r->name, r_version, n_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h
new file mode 100644
index 000000000..0c84a798a
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-internal.h
@@ -0,0 +1,317 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+
+#ifndef _LVM_LVMLOCKD_INTERNAL_H
+#define _LVM_LVMLOCKD_INTERNAL_H
+
+/* TODO: figure out real restraints/requirements for these */
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+#define R_NAME_GL_DISABLED "_GLLK_disabled"
+#define R_NAME_GL "GLLK"
+#define R_NAME_VG "VGLK"
+#define S_NAME_GL_DLM "lvmglobal"
+/* global lockspace name for sanlock is a vg name */
+
+/* lock manager types */
+enum {
+ LD_LM_NONE = 0,
+ LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */
+ LD_LM_DLM = 2,
+ LD_LM_SANLOCK = 3,
+};
+
+/* operation types */
+enum {
+ LD_OP_HELLO = 1,
+ LD_OP_QUIT,
+ LD_OP_INIT,
+ LD_OP_FREE,
+ LD_OP_START,
+ LD_OP_STOP,
+ LD_OP_LOCK,
+ LD_OP_UPDATE,
+ LD_OP_CLOSE,
+ LD_OP_ENABLE,
+ LD_OP_DISABLE,
+ LD_OP_ADD_LOCAL,
+ LD_OP_REM_LOCAL,
+ LD_OP_UPDATE_LOCAL,
+ LD_OP_START_WAIT,
+ LD_OP_STOP_ALL,
+ LD_OP_DUMP_INFO,
+ LD_OP_DUMP_LOG,
+};
+
+/* resource types */
+enum {
+ LD_RT_GL = 1,
+ LD_RT_VG,
+ LD_RT_LV,
+};
+
+/* lock modes, more restrictive must be larger value */
+enum {
+ LD_LK_IV = -1,
+ LD_LK_UN = 0,
+ LD_LK_NL = 1,
+ LD_LK_SH = 2,
+ LD_LK_EX = 3,
+};
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct client {
+ struct list_head list;
+ pthread_mutex_t mutex;
+ int pid;
+ int fd;
+ int pi;
+ uint32_t id;
+ unsigned int recv : 1;
+ unsigned int dead : 1;
+ unsigned int poll_ignore : 1;
+ char name[MAX_NAME];
+};
+
+#define LD_AF_PERSISTENT 0x00000001
+#define LD_AF_CLIENT_DEAD 0x00000002
+#define LD_AF_UNLOCK_CANCEL 0x00000004
+#define LD_AF_NEXT_VERSION 0x00000008
+#define LD_AF_WAIT 0x00000010
+#define LD_AF_FORCE 0x00000020
+#define LD_AF_EX_DISABLE 0x00000040
+#define LD_AF_ENABLE 0x00000080
+#define LD_AF_DISABLE 0x00000100
+#define LD_AF_SEARCH_LS 0x00000200
+#define LD_AF_LOCAL_LS 0x00000400
+#define LD_AF_UPDATE_NAMES_VERSION 0x00000800
+#define LD_AF_WAIT_STARTING 0x00001000
+#define LD_AF_DUP_GL_LS 0x00002000
+#define LD_AF_INACTIVE_LS 0x00004000
+#define LD_AF_ADD_LS_ERROR 0x00008000
+
+/*
+ * Number of times to repeat a lock request after
+ * a lock conflict (-EAGAIN) if unspecified in the
+ * request.
+ */
+#define DEFAULT_MAX_RETRIES 4
+
+struct action {
+ struct list_head list;
+ uint32_t client_id;
+ uint32_t flags; /* LD_AF_ */
+ uint32_t version;
+ uint64_t host_id;
+ int8_t op; /* operation type LD_OP_ */
+ int8_t rt; /* resource type LD_RT_ */
+ int8_t mode; /* lock mode LD_LK_ */
+ int8_t lm_type; /* lock manager: LM_DLM, LM_SANLOCK */
+ int retries;
+ int max_retries;
+ int result;
+ int lm_rv; /* return value from lm_ function */
+ char vg_uuid[64];
+ char vg_name[MAX_NAME+1];
+ char lv_name[MAX_NAME+1];
+ char vg_args[MAX_ARGS];
+ char lv_args[MAX_ARGS];
+ char vg_sysid[MAX_NAME+1];
+};
+
+struct resource {
+ struct list_head list; /* lockspace.resources */
+ char name[MAX_NAME+1]; /* vg name or lv name */
+ int8_t type; /* resource type LD_RT_ */
+ int8_t mode;
+ unsigned int sh_count; /* number of sh locks on locks list */
+ uint32_t version;
+ struct list_head locks;
+ struct list_head actions;
+ void *lm_data;
+};
+
+#define LD_LF_PERSISTENT 0x00000001
+
+struct lock {
+ struct list_head list; /* resource.locks */
+ int8_t mode; /* lock mode LD_LK_ */
+ uint32_t version;
+ uint32_t flags; /* LD_LF_ */
+ uint32_t client_id; /* may be 0 for persistent or internal locks */
+};
+
+struct lockspace {
+ struct list_head list; /* lockspaces */
+ char name[MAX_NAME+1];
+ char vg_name[MAX_NAME+1];
+ char vg_uuid[64];
+ char vg_args[MAX_ARGS]; /* lock manager specific args */
+ char vg_sysid[MAX_NAME+1];
+ int8_t lm_type; /* lock manager: LM_DLM, LM_SANLOCK */
+ void *lm_data;
+ uint64_t host_id;
+ uint32_t names_version; /* read/write from/to gl val_blk n_version */
+
+ uint32_t start_client_id; /* client_id that started the lockspace */
+ pthread_t thread; /* makes synchronous lock requests */
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ unsigned int create_fail : 1;
+ unsigned int create_done : 1;
+ unsigned int thread_work : 1;
+ unsigned int thread_stop : 1;
+ unsigned int thread_done : 1;
+ unsigned int update_local_vgs : 1;
+ unsigned int update_names_version: 1;
+ unsigned int sanlock_gl_enabled: 1;
+ unsigned int sanlock_gl_dup: 1;
+
+ struct list_head actions; /* new client actions */
+ struct list_head resources; /* resource/lock state for gl/vg/lv */
+ /* TODO: should probably be tree */
+};
+
+#define VAL_BLK_VERSION 0x0101
+
+struct val_blk {
+ uint16_t version;
+ uint16_t flags;
+ uint32_t r_version;
+ uint32_t n_version;
+};
+
+/* lm_unlock flags */
+#define LMUF_FREE_VG 0x00000001
+
+int lockspaces_empty(void);
+int last_string_from_args(char *args_in, char *last);
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_add_lockspace_dlm(struct lockspace *ls);
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg);
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, uint32_t *n_version);
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version);
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t n_version, uint32_t lmu_flags);
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r);
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args);
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
+int lm_add_lockspace_sanlock(struct lockspace *ls);
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+ char *lv_args, uint32_t *r_version, uint32_t *n_version, int *retry);
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version);
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t n_version, uint32_t lmu_flags);
+int lm_able_gl_sanlock(struct lockspace *ls, int enable);
+int lm_ex_disable_gl_sanlock(struct lockspace *ls);
+int lm_hosts_sanlock(struct lockspace *ls, int notify);
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r);
+int lm_gl_is_enabled(struct lockspace *ls);
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu(x) (bswap_16((x)))
+#define le32_to_cpu(x) (bswap_32((x)))
+#define le64_to_cpu(x) (bswap_64((x)))
+#define cpu_to_le16(x) (bswap_16((x)))
+#define cpu_to_le32(x) (bswap_32((x)))
+#define cpu_to_le64(x) (bswap_64((x)))
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define le16_to_cpu(x) (x)
+#define le32_to_cpu(x) (x)
+#define le64_to_cpu(x) (x)
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le64(x) (x)
+#endif
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/* to improve readability */
+#define WAIT 1
+#define NO_WAIT 0
+#define FORCE 1
+#define NO_FORCE 0
+
+/*
+ * global variables
+ */
+
+#ifndef EXTERN
+#define EXTERN extern
+#define INIT(X)
+#else
+#undef EXTERN
+#define EXTERN
+#define INIT(X) =X
+#endif
+
+/*
+ * gl_type_static and gl_use_ are set by command line or config file
+ * to specify whether the global lock comes from dlm or sanlock.
+ * Without a static setting, lvmlockd will figure out where the
+ * global lock should be (but it could get mixed up in cases where
+ * both sanlock and dlm vgs exist.)
+ *
+ * gl_use_dlm means that the gl should come from lockspace gl_lsname_dlm
+ * gl_use_sanlock means that the gl should come from lockspace gl_lsname_sanlock
+ *
+ * gl_use_dlm has precedence over gl_use_sanlock, so if a node sees both
+ * dlm and sanlock vgs, it will use the dlm gl.
+ *
+ * gl_use_ is set when the first evidence of that lm_type is seen
+ * in any command.
+ *
+ * gl_lsname_sanlock is set when the first vg is seen in which an
+ * enabled gl is exists, or when init_vg creates a vg with gl enabled,
+ * or when enable_gl is used.
+ *
+ * gl_lsname_sanlock is cleared when free_vg deletes a vg with gl enabled
+ * or when disable_gl matches.
+ */
+
+EXTERN int gl_type_static;
+EXTERN int gl_use_dlm;
+EXTERN int gl_use_sanlock;
+EXTERN pthread_mutex_t gl_type_mutex;
+
+EXTERN char gl_lsname_dlm[MAX_NAME+1];
+EXTERN char gl_lsname_sanlock[MAX_NAME+1];
+
+EXTERN int gl_running_dlm;
+EXTERN int gl_auto_dlm;
+
+EXTERN int daemon_test; /* run as much as possible without a live lock manager */
+EXTERN int daemon_debug;
+EXTERN int daemon_host_id;
+EXTERN const char *daemon_host_id_file;
+
+void log_level(int level, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
+#define log_debug(fmt, args...) log_level(LOG_DEBUG, fmt, ##args)
+#define log_error(fmt, args...) log_level(LOG_ERR, fmt, ##args)
+#define log_warn(fmt, args...) log_level(LOG_WARNING, fmt, ##args)
+
+#endif
diff --git a/daemons/lvmlockd/lvmlockd-sanlock.c b/daemons/lvmlockd/lvmlockd-sanlock.c
new file mode 100644
index 000000000..d8c8df9e8
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
@@ -0,0 +1,1453 @@
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <poll.h>
+#include <errno.h>
+#include <string.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "configure.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+#include "sanlock.h"
+#include "sanlock_rv.h"
+#include "sanlock_admin.h"
+#include "sanlock_resource.h"
+
+/*
+ * If access to the pv containing the vg's leases is lost, sanlock cannot renew
+ * the leases we have acquired for locked LVs. This means that we could soon
+ * loose the lease to another host which could activate our LV exclusively. We
+ * do not want to get to the point of two hosts having the same LV active
+ * exclusively (it obviously violates the purpose of LV locks.)
+ *
+ * The default method of preventing this problem is for lvmlockd to do nothing,
+ * which produces a safe but potentially inconvenient result. Doing nothing
+ * leads to our LV leases not being released, which leads to sanlock using the
+ * local watchdog to reset us before another host can acquire our lock. It
+ * would often be preferrable to avoid the abrupt hard reset from the watchdog.
+ *
+ * There are other options to avoid being reset by our watchdog. If we can
+ * quickly stop using the LVs in question and release the locks for them, then
+ * we could avoid a reset (there's a certain grace period of about 40 seconds
+ * in which we can attempt this.) To do this, we can tell sanlock to run a
+ * specific program when it has lost access to our leases. We could use this
+ * program to:
+ *
+ * 1. Deactivate all lvs in the effected vg. If all the leases are
+ * deactivated, then our LV locks would be released and sanlock would no longer
+ * use the watchdog to reset us. If file systems are mounted on the active
+ * lvs, then deactivating them would fail, so this option would be of limited
+ * usefulness.
+ *
+ * 2. Option 1 could be extended to kill pids using the fs on the lv, unmount
+ * the fs, and deactivate the lv. This is probably out of scope for lvm
+ * directly, and would likely need the help of another system service.
+ *
+ * 3. Use dmsetup suspend to block access to lvs in the effected vg. If this
+ * was successful, the local host could no longer write to the lvs, we could
+ * safely release the LV locks, and sanlock would no longer reset us. At this
+ * point, with suspended lvs, the host would be in a fairly hobbled state, and
+ * would almost certainly need a manual, forcible reset.
+ *
+ * 4. Option 3 could be extended to monitor the lost storage, and if it is
+ * reconnected, the leases could be reacquired, and the suspended lvs resumed
+ * (reacquiring leases will fail if another host has acquired them since they
+ * were released.) This complexity of this option, combined with the fact that
+ * the error conditions are often not as simple as storage being lost and then
+ * later connecting, will result in this option being too unreliable.
+ *
+ * TODO: add a config option that we could use to select a different behavior
+ * than the default. Then implement one of the simpler options as a proof of
+ * concept, which could be extended if needed.
+ */
+
+/*
+ * Each lockspace thread has its own sanlock daemon connection.
+ * If they shared one, sanlock acquire/release calls would be
+ * serialized. Some aspects of sanlock expect a single connection
+ * from each pid: signals due to a sanlock_request, and
+ * acquire/release/convert/inquire. The later can probably be
+ * addressed with a flag to indicate that the pid field should be
+ * interpretted as 'ci' (which the caller would need to figure
+ * out somehow.)
+ */
+
+struct lm_sanlock {
+ struct sanlk_lockspace ss;
+ int align_size;
+ int sock; /* sanlock daemon connection */
+};
+
+struct rd_sanlock {
+ union {
+ struct sanlk_resource rs;
+ char buf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ };
+ struct val_blk *vb;
+};
+
+struct sanlk_resourced {
+ union {
+ struct sanlk_resource rs;
+ char buf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ };
+};
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for sanlock is
+ * vg_version_string:undefined:lock_lv_name
+ *
+ * lv_lock_args format for sanlock is
+ * lv_version_string:undefined:offset
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ *
+ * If a new version of the lock_args string cannot be
+ * handled by an old version of lvmlockd, then the
+ * new lock_args string should contain a larger major number.
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+#define LV_LOCK_ARGS_MAJOR 1
+#define LV_LOCK_ARGS_MINOR 0
+#define LV_LOCK_ARGS_PATCH 0
+
+/*
+ * offset 0 is lockspace
+ * offset align_size * 1 is unused
+ * offset align_size * 2 is unused
+ * ...
+ * offset align_size * 64 is unused
+ * offset align_size * 65 is gl lock
+ * offset align_size * 66 is vg lock
+ * offset align_size * 67 is first lv lock
+ * offset align_size * 68 is second lv lock
+ * ...
+ */
+
+#define LS_BEGIN 0
+#define GL_LOCK_BEGIN 65
+#define VG_LOCK_BEGIN 66
+#define LV_LOCK_BEGIN 67
+
+static int lock_lv_name_from_args(char *vg_args, char *lock_lv_name)
+{
+ return last_string_from_args(vg_args, lock_lv_name);
+}
+
+static int lock_lv_offset_from_args(char *lv_args, uint64_t *lock_lv_offset)
+{
+ char offset_str[MAX_ARGS];
+ int rv;
+
+ memset(offset_str, 0, sizeof(offset_str));
+
+ rv = last_string_from_args(lv_args, offset_str);
+ if (rv < 0)
+ return rv;
+
+ *lock_lv_offset = strtoull(offset_str, NULL, 10);
+ return 0;
+}
+
+static int check_args_version(char *args, unsigned int our_major)
+{
+ unsigned int major = 0;
+ int rv;
+
+ rv = version_from_args(args, &major, NULL, NULL);
+ if (rv < 0) {
+ log_error("check_args_version %s error %d", args, rv);
+ return rv;
+ }
+
+ if (major > our_major) {
+ log_error("check_args_version %s major %u %u", args, major, our_major);
+ return -1;
+ }
+
+ return 0;
+}
+
+#define MAX_LINE 64
+
+static int read_host_id_file(void)
+{
+ FILE *file;
+ char line[MAX_LINE];
+ char key_str[MAX_LINE];
+ char val_str[MAX_LINE];
+ char *key, *val, *sep;
+ int host_id = 0;
+
+ file = fopen(daemon_host_id_file, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(line, MAX_LINE, file)) {
+ if (line[0] == '#' || line[0] == '\n')
+ continue;
+
+ key = line;
+ sep = strstr(line, "=");
+ val = sep + 1;
+
+ if (!sep || !val)
+ continue;
+
+ *sep = '\0';
+ memset(key_str, 0, sizeof(key_str));
+ memset(val_str, 0, sizeof(val_str));
+ sscanf(key, "%s", key_str);
+ sscanf(val, "%s", val_str);
+
+ if (!strcmp(key_str, "host_id")) {
+ host_id = atoi(val_str);
+ break;
+ }
+ }
+ fclose(file);
+out:
+ log_debug("host_id %d from %s", host_id, daemon_host_id_file);
+ return host_id;
+}
+
+/*
+ * vgcreate
+ *
+ * For init_vg, vgcreate passes the internal lv name as vg_args.
+ * This constructs the full/proper vg_args format, containing the
+ * version and lv name, and returns the real lock_args in vg_args.
+ */
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ struct sanlk_lockspace ss;
+ struct sanlk_resourced rd;
+ struct sanlk_disk disk;
+ char lock_lv_name[MAX_ARGS];
+ char lock_args_version[MAX_ARGS];
+ const char *gl_name = NULL;
+ uint64_t offset;
+ int align_size;
+ int i, rv;
+
+ memset(&ss, 0, sizeof(ss));
+ memset(&rd, 0, sizeof(rd));
+ memset(&disk, 0, sizeof(disk));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ if (!vg_args || !vg_args[0] || !strcmp(vg_args, "none")) {
+ log_error("S %s init_vg_san vg_args missing", ls_name);
+ return -EINVAL;
+ }
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+ /* see comment above about input vg_args being only lock_lv_name */
+ snprintf(lock_lv_name, MAX_ARGS, "%s", vg_args);
+
+ if (strlen(lock_lv_name) + strlen(lock_args_version) + 2 > MAX_ARGS)
+ return -ENAMETOOLONG;
+
+ snprintf(disk.path, SANLK_PATH_LEN, "/dev/mapper/%s-%s", vg_name, lock_lv_name);
+
+ log_debug("S %s init_vg_san path %s", ls_name, disk.path);
+
+ if (daemon_test) {
+ if (!gl_lsname_sanlock[0])
+ strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
+ goto out;
+ }
+
+ align_size = sanlock_align(&disk);
+ if (align_size <= 0) {
+ log_error("S %s init_vg_san bad align size %d %s",
+ ls_name, align_size, disk.path);
+ return -EINVAL;
+ }
+
+ strncpy(ss.name, ls_name, SANLK_NAME_LEN);
+ memcpy(ss.host_id_disk.path, disk.path, SANLK_PATH_LEN);
+ ss.host_id_disk.offset = LS_BEGIN * align_size;
+
+ rv = sanlock_write_lockspace(&ss, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_lockspace error %d %s",
+ ls_name, rv, ss.host_id_disk.path);
+ return rv;
+ }
+
+ /*
+ * We want to create the global lock in the first sanlock vg.
+ * If other sanlock vgs exist, then one of them must contain
+ * the gl. If gl_lsname_sanlock is not set, then perhaps
+ * the sanlock vg with the gl has been removed or has not yet
+ * been seen. (Would vgcreate get this far in that case?)
+ * If dlm vgs exist, then we choose to use the dlm gl and
+ * not a sanlock gl.
+ */
+
+ if (flags & LD_AF_ENABLE)
+ gl_name = R_NAME_GL;
+ else if (flags & LD_AF_DISABLE)
+ gl_name = R_NAME_GL_DISABLED;
+ else if (!gl_use_sanlock || gl_lsname_sanlock[0] || !lockspaces_empty())
+ gl_name = R_NAME_GL_DISABLED;
+ else
+ gl_name = R_NAME_GL;
+
+ memcpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, gl_name, SANLK_NAME_LEN);
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * GL_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_resource gl error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ memcpy(rd.rs.lockspace_name, ss.name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, R_NAME_VG, SANLK_NAME_LEN);
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = align_size * VG_LOCK_BEGIN;
+ rd.rs.num_disks = 1;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s init_vg_san write_resource vg error %d %s",
+ ls_name, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ if (!strcmp(gl_name, R_NAME_GL))
+ strncpy(gl_lsname_sanlock, ls_name, MAX_NAME);
+ out:
+ snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, lock_lv_name);
+
+ log_debug("S %s init_vg_san done vg_args %s", ls_name, vg_args);
+
+ /*
+ * Go through all lv resource slots and initialize them with the
+ * correct lockspace name but a special resource name that indicates
+ * it is unused.
+ */
+
+ memset(&rd, 0, sizeof(rd));
+ rd.rs.num_disks = 1;
+ memcpy(rd.rs.disks[0].path, disk.path, SANLK_PATH_LEN);
+ strncpy(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN);
+ strcpy(rd.rs.name, "#unused");
+
+ offset = align_size * LV_LOCK_BEGIN;
+
+ log_debug("S %s init_vg_san clearing lv lease areas", ls_name);
+
+ for (i = 0; i < LVMLOCKD_SANLOCK_MAX_LVS_IN_VG; i++) {
+ rd.rs.disks[0].offset = offset;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv) {
+ log_error("clear lv resource area %llu error %d",
+ (unsigned long long)offset, rv);
+ break;
+ }
+ offset += align_size;
+ }
+
+ return 0;
+}
+
+/*
+ * lvcreate
+ *
+ * The offset at which the lv lease is written is passed
+ * all the way back to the lvcreate command so that it
+ * can be saved in the lv's lock_args in the vg metadata.
+ */
+
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name,
+ char *vg_args, char *lv_args)
+{
+ struct sanlk_resourced rd;
+ char lock_lv_name[MAX_ARGS];
+ char lock_args_version[MAX_ARGS];
+ uint64_t offset;
+ int align_size;
+ int lv_count = 0;
+ int rv;
+
+ memset(&rd, 0, sizeof(rd));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ rv = lock_lv_name_from_args(vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s init_lv_san lock_lv_name_from_args error %d %s",
+ ls_name, rv, vg_args);
+ return rv;
+ }
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ LV_LOCK_ARGS_MAJOR, LV_LOCK_ARGS_MINOR, LV_LOCK_ARGS_PATCH);
+
+ strncpy(rd.rs.lockspace_name, ls_name, SANLK_NAME_LEN);
+ rd.rs.num_disks = 1;
+ snprintf(rd.rs.disks[0].path, SANLK_PATH_LEN, "/dev/mapper/%s-%s", vg_name, lock_lv_name);
+
+ align_size = sanlock_align(&rd.rs.disks[0]);
+ if (align_size <= 0) {
+ log_error("S %s init_lv_san align error %d", ls_name, align_size);
+ return -EINVAL;
+ }
+
+ offset = align_size * LV_LOCK_BEGIN;
+ rd.rs.disks[0].offset = offset;
+
+ if (daemon_test) {
+ snprintf(lv_args, MAX_ARGS, "%s:%llu",
+ lock_args_version, (unsigned long long)1111);
+ return 0;
+ }
+
+ while (1) {
+ rd.rs.disks[0].offset = offset;
+
+ memset(rd.rs.name, 0, SANLK_NAME_LEN);
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv) {
+ log_error("S %s init_lv_san read error %d offset %llu",
+ ls_name, rv, (unsigned long long)offset);
+ break;
+ }
+
+ if (!strncmp(rd.rs.name, lv_name, SANLK_NAME_LEN)) {
+ log_error("S %s init_lv_san resource name %s already exists at %llu",
+ ls_name, lv_name, (unsigned long long)offset);
+ return -EEXIST;
+ }
+
+ if (!strcmp(rd.rs.name, "#unused")) {
+ log_debug("S %s init_lv_san %s found unused area at %llu",
+ ls_name, lv_name, (unsigned long long)offset);
+
+ strncpy(rd.rs.name, lv_name, SANLK_NAME_LEN);
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (!rv) {
+ snprintf(lv_args, MAX_ARGS, "%s:%llu",
+ lock_args_version, (unsigned long long)offset);
+ } else {
+ log_error("S %s init_lv_san write error %d offset %llu",
+ ls_name, rv, (unsigned long long)rv);
+ }
+ break;
+ }
+
+ offset += align_size;
+
+ if (lv_count++ >= LVMLOCKD_SANLOCK_MAX_LVS_IN_VG) {
+ log_error("S %s init_lv_san too many lvs %d", ls_name, lv_count);
+ rv = -ENOENT;
+ break;
+ }
+ }
+
+ return rv;
+}
+
+/* lvremove */
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock *rds = r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ int rv;
+
+ log_debug("S %s R %s free_lv_san", ls->name, r->name);
+
+ if (daemon_test)
+ return 0;
+
+ strcpy(rs->name, "#unused");
+
+ rv = sanlock_write_resource(rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s R %s free_lv_san write error %d",
+ ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+int lm_ex_disable_gl_sanlock(struct lockspace *ls)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct sanlk_resourced rd1;
+ struct sanlk_resourced rd2;
+ struct sanlk_resource *rs1;
+ struct sanlk_resource *rs2;
+ struct sanlk_resource **rs_args;
+ int rv;
+
+ rs_args = malloc(2 * sizeof(struct sanlk_resource *));
+ if (!rs_args)
+ return -ENOMEM;
+
+ rs1 = &rd1.rs;
+ rs2 = &rd2.rs;
+
+ memset(&rd1, 0, sizeof(rd1));
+ memset(&rd2, 0, sizeof(rd2));
+
+ strncpy(rd1.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd1.rs.name, R_NAME_GL, SANLK_NAME_LEN);
+
+ strncpy(rd2.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd2.rs.name, R_NAME_GL_DISABLED, SANLK_NAME_LEN);
+
+ rd1.rs.num_disks = 1;
+ strncpy(rd1.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+ rd1.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+
+ rv = sanlock_acquire(lms->sock, -1, 0, 1, &rs1, NULL);
+ if (rv < 0) {
+ log_error("S %s ex_disable_gl_san acquire error %d",
+ ls->name, rv);
+ goto out;
+ }
+
+ rs_args[0] = rs1;
+ rs_args[1] = rs2;
+
+ rv = sanlock_release(lms->sock, -1, SANLK_REL_RENAME, 2, rs_args);
+ if (rv < 0) {
+ log_error("S %s ex_disable_gl_san release_rename error %d",
+ ls->name, rv);
+ }
+
+out:
+ free(rs_args);
+ return rv;
+}
+
+/*
+ * enable/disable exist because each vg contains a global lock,
+ * but we only want to use the gl from one of them. The first
+ * sanlock vg created, has its gl enabled, and subsequent
+ * sanlock vgs have their gl disabled. If the vg containing the
+ * gl is removed, the gl from another sanlock vg needs to be
+ * enabled. Or, if gl in multiple vgs are somehow enabled, we
+ * want to be able to disable one of them.
+ *
+ * Disable works by naming/renaming the gl resource to have a
+ * name that is different from the predefined name.
+ * When a host attempts to acquire the gl with its standard
+ * predefined name, it will fail because the resource's name
+ * on disk doesn't match.
+ */
+
+int lm_able_gl_sanlock(struct lockspace *ls, int enable)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct sanlk_resourced rd;
+ const char *gl_name;
+ int rv;
+
+ if (enable)
+ gl_name = R_NAME_GL;
+ else
+ gl_name = R_NAME_GL_DISABLED;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rd.rs.name, gl_name, SANLK_NAME_LEN);
+
+ rd.rs.num_disks = 1;
+ strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+ rd.rs.disks[0].offset = lms->align_size * GL_LOCK_BEGIN;
+
+ rv = sanlock_write_resource(&rd.rs, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s able_gl %d write_resource gl error %d %s",
+ ls->name, enable, rv, rd.rs.disks[0].path);
+ return rv;
+ }
+
+ log_debug("S %s able_gl %s", ls->name, gl_name);
+
+ ls->sanlock_gl_enabled = enable;
+ if (ls->sanlock_gl_dup && !enable)
+ ls->sanlock_gl_dup = 0;
+
+ if (enable)
+ strncpy(gl_lsname_sanlock, ls->name, MAX_NAME);
+
+ if (!enable && !strcmp(gl_lsname_sanlock, ls->name))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+
+ return 0;
+}
+
+static int gl_is_enabled(struct lockspace *ls, struct lm_sanlock *lms)
+{
+ char strname[SANLK_NAME_LEN + 1];
+ struct sanlk_resourced rd;
+ uint64_t offset;
+ int rv;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+
+ /* leave rs.name empty, it is what we're checking */
+
+ rd.rs.num_disks = 1;
+ strncpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+
+ offset = lms->align_size * GL_LOCK_BEGIN;
+ rd.rs.disks[0].offset = offset;
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (rv < 0) {
+ log_error("gl_is_enabled read_resource error %d", rv);
+ return rv;
+ }
+
+ memset(strname, 0, sizeof(strname));
+ memcpy(strname, rd.rs.name, SANLK_NAME_LEN);
+
+ if (!strcmp(strname, R_NAME_GL_DISABLED)) {
+ return 0;
+ }
+
+ if (!strcmp(strname, R_NAME_GL)) {
+ return 1;
+ }
+
+ log_error("gl_is_enabled invalid gl name %s", strname);
+ return -1;
+}
+
+int lm_gl_is_enabled(struct lockspace *ls)
+{
+ int rv;
+ rv = gl_is_enabled(ls, ls->lm_data);
+ ls->sanlock_gl_enabled = rv;
+ return rv;
+}
+
+/*
+ * host A: start_vg/add_lockspace
+ * host B: vgremove
+ *
+ * The global lock cannot always be held around start_vg
+ * on host A because the gl is in a vg that may not be
+ * started yet, or may be in the vg we are starting.
+ *
+ * If B removes the vg, destroying the delta leases,
+ * while A is a lockspace member, it will cause A's
+ * sanlock delta lease renewal to fail, and lockspace
+ * recovery.
+ *
+ * Possible way to mostly avoid problems:
+ *
+ * hostA: start_vg
+ *
+ * read vg metadata, lock_type/lock_args
+ * read and verify vglk lease name
+ * sanlock_add_lockspace reads valid delta lease
+ * sanlock_add_lockspace done, A is a member
+ * read and verify vglk lease name
+ *
+ * hostB: vgremove
+ *
+ * lock gl
+ * lock vg ex
+ * check sanlock for lockspace members
+ * lock lv ex (all)
+ * unlock lv ex (all)
+ * unlock rename vglk
+ * wait for max time that add_lockspace could take
+ * check sanlock for lockspace members
+ * sanlock_rem_lockspace
+ * destroy delta leases
+ * unlock gl
+ * remove vg
+ *
+ * hostA will fail in one of the places where it verifies
+ * the vglk lease name, or hostA will fail in one of the
+ * places where it checks sanlock lockspace members.
+ * And both can probably fail, but I suspect it would be
+ * very unlikely for both to succeed.
+ *
+ * (I think a similar situation is start_vg vs changing lock_type
+ * from sanlock to something else.)
+ */
+
+int lm_add_lockspace_sanlock(struct lockspace *ls)
+{
+ struct stat st;
+ struct lm_sanlock *lms;
+ char lock_lv_name[MAX_ARGS];
+ char lsname[SANLK_NAME_LEN + 1];
+ char disk_path[SANLK_PATH_LEN];
+ int rv;
+
+ memset(disk_path, 0, sizeof(disk_path));
+ memset(lock_lv_name, 0, sizeof(lock_lv_name));
+
+ rv = check_args_version(ls->vg_args, VG_LOCK_ARGS_MAJOR);
+ if (rv < 0)
+ return rv;
+
+ rv = lock_lv_name_from_args(ls->vg_args, lock_lv_name);
+ if (rv < 0) {
+ log_error("S %s add_lockspace_san lock_lv_name_from_args error %d %s",
+ ls->name, rv, ls->vg_args);
+ return rv;
+ }
+
+ snprintf(disk_path, SANLK_PATH_LEN, "/dev/mapper/%s-%s",
+ ls->vg_name, lock_lv_name);
+
+ /*
+ * When a vg is started, the internal sanlock lv should be
+ * activated before lvmlockd is asked to add the lockspace.
+ * (sanlock needs to use the lv.)
+ *
+ * TODO: can we ask something on the system to activate the
+ * sanlock lv or should we just require that vgchange be used
+ * to start sanlock vgs?
+ * Should sanlock lvs be "auto-activated"?
+ */
+
+ /* FIXME: remove this, device is not always ready for us here */
+ sleep(1);
+
+ rv = stat(disk_path, &st);
+ if (rv < 0) {
+ log_error("S %s add_lockspace_san stat error %d disk_path %s",
+ ls->name, errno, disk_path);
+ return -1;
+ }
+
+ if (!ls->host_id) {
+ if (daemon_host_id)
+ ls->host_id = daemon_host_id;
+ else if (daemon_host_id_file)
+ ls->host_id = read_host_id_file();
+ }
+
+ if (!ls->host_id || ls->host_id > 2000) {
+ log_error("S %s add_lockspace_san invalid host_id %llu",
+ ls->name, (unsigned long long)ls->host_id);
+ return -1;
+ }
+
+ lms = malloc(sizeof(struct lm_sanlock));
+ if (!lms)
+ return -ENOMEM;
+
+ memset(lsname, 0, sizeof(lsname));
+ strncpy(lsname, ls->name, SANLK_NAME_LEN);
+
+ memcpy(lms->ss.name, lsname, SANLK_NAME_LEN);
+ lms->ss.host_id_disk.offset = 0;
+ lms->ss.host_id = ls->host_id;
+ strncpy(lms->ss.host_id_disk.path, disk_path, SANLK_PATH_LEN);
+
+ if (daemon_test) {
+ if (!gl_lsname_sanlock[0]) {
+ log_debug("S %s add_lockspace_san use global lock in", lsname);
+ strncpy(gl_lsname_sanlock, lsname, MAX_NAME);
+ }
+ goto out;
+ }
+
+ lms->sock = sanlock_register();
+ if (lms->sock < 0) {
+ log_error("S %s add_lockspace_san register error %d", lsname, lms->sock);
+ free(lms);
+ return -1;
+ }
+
+ rv = sanlock_restrict(lms->sock, SANLK_RESTRICT_SIGKILL);
+ if (rv < 0) {
+ log_error("S %s restrict error %d", lsname, rv);
+ }
+
+ lms->align_size = sanlock_align(&lms->ss.host_id_disk);
+ if (lms->align_size <= 0) {
+ log_error("S %s add_lockspace_san align error %d", lsname, lms->align_size);
+ close(lms->sock);
+ free(lms);
+ return -1;
+ }
+
+ rv = gl_is_enabled(ls, lms);
+ if (rv < 0) {
+ log_error("S %s add_lockspace_san gl_enabled error %d", lsname, rv);
+ close(lms->sock);
+ free(lms);
+ return rv;
+ }
+
+ ls->sanlock_gl_enabled = rv;
+
+ if (rv) {
+ if (gl_use_dlm) {
+ log_error("S %s add_lockspace_san gl_use_dlm is set", lsname);
+ } else if (gl_lsname_sanlock[0] && strcmp(gl_lsname_sanlock, lsname)) {
+ log_error("S %s add_lockspace_san multiple sanlock global locks current %s",
+ lsname, gl_lsname_sanlock);
+ } else {
+ log_debug("S %s add_lockspace_san use global lock", lsname);
+ strncpy(gl_lsname_sanlock, lsname, MAX_NAME);
+ }
+ }
+
+ rv = sanlock_add_lockspace(&lms->ss, 0);
+ if (rv < 0) {
+ /* TODO: retry for some errors */
+ log_error("S %s add_lockspace_san add_lockspace error %d", lsname, rv);
+ close(lms->sock);
+ free(lms);
+ return rv;
+ }
+
+ /*
+ * Don't let the lockspace be cleanly released if orphan locks
+ * exist, because the orphan locks are still protecting resources
+ * that are being used on the host, e.g. active lvs. If the
+ * lockspace is cleanly released, another host could acquire the
+ * orphan leases.
+ *
+ * TODO: eventually allow lvmlockd to be restarted, and to
+ * reacquire the orphan leases for active lvs.
+ */
+
+ rv = sanlock_set_config(lsname, 0, SANLK_CONFIG_USED_BY_ORPHANS, NULL);
+ if (rv < 0) {
+ log_error("S %s add_lockspace_san set_config error %d", lsname, rv);
+ sanlock_rem_lockspace(&lms->ss, 0);
+ close(lms->sock);
+ free(lms);
+ return rv;
+ }
+
+out:
+ log_debug("S %s add_lockspace_san done", lsname);
+
+ ls->lm_data = lms;
+ return 0;
+}
+
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ int rv;
+
+ if (daemon_test)
+ goto out;
+
+ rv = sanlock_rem_lockspace(&lms->ss, 0);
+ if (rv < 0) {
+ log_error("S %s rem_lockspace_san error %d", ls->name, rv);
+ return rv;
+ }
+
+ if (free_vg) {
+ /*
+ * Destroy sanlock lockspace (delta leases). Forces failure for any
+ * other host that is still using or attempts to use this lockspace.
+ * This shouldn't be generally necessary, but there may some races
+ * between nodes starting and removing a vg which this could help.
+ */
+ strncpy(lms->ss.name, "#unused", SANLK_NAME_LEN);
+
+ rv = sanlock_write_lockspace(&lms->ss, 0, 0, 0);
+ if (rv < 0) {
+ log_error("S %s rem_lockspace free_vg write_lockspace error %d %s",
+ ls->name, rv, lms->ss.host_id_disk.path);
+ }
+ }
+out:
+ close(lms->sock);
+
+ free(lms);
+ ls->lm_data = NULL;
+
+ /* TODO: should we only clear gl_lsname when doing free_vg? */
+
+ if (!strcmp(ls->name, gl_lsname_sanlock))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+
+ return 0;
+}
+
+#if 0
+static int find_lv_offset(struct lockspace *ls, struct resource *r,
+ uint64_t *lv_args_offset)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct sanlk_resourced rd;
+ uint64_t offset;
+ int align_size;
+ int lv_count = 0;
+ int rv;
+
+ memset(&rd, 0, sizeof(rd));
+
+ strncpy(rd.rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ rd.rs.num_disks = 1;
+ memcpy(rd.rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+
+ align_size = sanlock_align(&rd.rs.disks[0]);
+ if (align_size <= 0) {
+ log_error("find_lv_offset align error %d", align_size);
+ return -EINVAL;
+ }
+
+ offset = align_size * LV_LOCK_BEGIN;
+
+ while (1) {
+ rd.rs.disks[0].offset = offset;
+
+ memset(rd.rs.name, 0, SANLK_NAME_LEN);
+
+ rv = sanlock_read_resource(&rd.rs, 0);
+ if (!rv) {
+ if (!strncmp(rd.rs.name, r->name, SANLK_NAME_LEN)) {
+ log_debug("S %s R %s find_lv_offset found at %llu",
+ ls->name, r->name, (unsigned long long)offset);
+
+ *lv_args_offset = offset;
+ return 0;
+ }
+
+ offset += align_size;
+
+ if (lv_count++ >= LVMLOCKD_SANLOCK_MAX_LVS_IN_VG) {
+ log_error("S %s R %s find_lv_offset too many lvs %d",
+ ls->name, r->name, lv_count);
+ rv = -ENOENT;
+ break;
+ }
+ continue;
+ }
+ if (rv != SANLK_LEADER_MAGIC) {
+ log_error("S %s R %s find_lv_offset read error %d offset %llu",
+ ls->name, r->name, rv, (unsigned long long)offset);
+ break;
+ }
+
+ /*
+ * an empty slot means no more resources, assuming that
+ * there are no gaps, so the lv was not found.
+ */
+
+ log_debug("S %s R %s find_lv_offset not found", ls->name, r->name);
+ rv = -ENOENT;
+ break;
+ }
+ return rv;
+}
+#endif
+
+static int lm_add_resource_sanlock(struct lockspace *ls, struct resource *r,
+ char *lv_args)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct rd_sanlock *rds;
+
+#if 0
+ uint64_t lock_lv_offset;
+ int rv;
+ /* This case shouldn't be needed, lv_args should always be provided AFAICS. */
+ if ((r->type == LD_RT_LV) && (!lv_args[0] || !strcmp(lv_args, "none"))) {
+ rv = find_lv_offset(ls, r, &lock_lv_offset);
+ if (rv < 0)
+ return rv;
+ }
+#endif
+
+ rds = malloc(sizeof(struct rd_sanlock));
+ if (!rds)
+ return -ENOMEM;
+
+ memset(rds, 0, sizeof(struct rd_sanlock));
+
+ strncpy(rds->rs.lockspace_name, ls->name, SANLK_NAME_LEN);
+ strncpy(rds->rs.name, r->name, SANLK_NAME_LEN);
+ rds->rs.num_disks = 1;
+ memcpy(rds->rs.disks[0].path, lms->ss.host_id_disk.path, SANLK_PATH_LEN);
+
+ if (r->type == LD_RT_GL)
+ rds->rs.disks[0].offset = GL_LOCK_BEGIN * lms->align_size;
+ else if (r->type == LD_RT_VG)
+ rds->rs.disks[0].offset = VG_LOCK_BEGIN * lms->align_size;
+
+ /* LD_RT_LV offset is set in each lm_lock call from lv_args. */
+
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+ rds->vb = malloc(sizeof(struct val_blk));
+ if (!rds->vb) {
+ free(rds);
+ return -ENOMEM;
+ }
+ memset(rds->vb, 0, sizeof(struct val_blk));
+ }
+
+ r->lm_data = rds;
+ return 0;
+}
+
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock *rds = r->lm_data;
+
+ /* TODO: assert r->mode == UN or unlock if it's not? */
+
+ if (!rds)
+ return 0;
+ if (rds->vb)
+ free(rds->vb);
+ free(rds);
+ r->lm_data = NULL;
+ return 0;
+}
+
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode, char *lv_args,
+ uint32_t *r_version, uint32_t *n_version, int *retry)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct rd_sanlock *rds;
+ struct sanlk_resource *rs;
+ uint64_t lock_lv_offset;
+ uint32_t flags = 0;
+ struct val_blk vb;
+ uint16_t vb_version;
+ int added = 0;
+ int rv;
+
+ if (!r->lm_data) {
+ rv = lm_add_resource_sanlock(ls, r, lv_args);
+ if (rv < 0)
+ return rv;
+ added = 1;
+ }
+
+ rds = r->lm_data;
+ rs = &rds->rs;
+
+ if (r->type == LD_RT_LV) {
+ /* The lv may have been removed and recreated with a new lease
+ offset so we need to get the offset from lv_args each time
+ instead of reusing the value. */
+
+ rv = check_args_version(lv_args, LV_LOCK_ARGS_MAJOR);
+ if (rv < 0) {
+ log_error("S %s R %s lock_san wrong lv_args version %s",
+ ls->name, r->name, lv_args);
+ return rv;
+ }
+
+ rv = lock_lv_offset_from_args(lv_args, &lock_lv_offset);
+ if (rv < 0) {
+ log_error("S %s R %s lock_san lv_offset_from_args error %d %s",
+ ls->name, r->name, rv, lv_args);
+ return rv;
+ }
+
+ if (!added && (rds->rs.disks[0].offset != lock_lv_offset)) {
+ log_debug("S %s R %s lock_san offset old %llu new %llu",
+ ls->name, r->name,
+ (unsigned long long)rds->rs.disks[0].offset,
+ (unsigned long long)lock_lv_offset);
+ }
+
+ rds->rs.disks[0].offset = lock_lv_offset;
+ }
+
+ if (ld_mode == LD_LK_SH) {
+ rs->flags |= SANLK_RES_SHARED;
+ } else if (ld_mode == LD_LK_EX) {
+ rs->flags &= ~SANLK_RES_SHARED;
+ } else {
+ log_error("lock_san invalid mode %d", ld_mode);
+ return -EINVAL;
+ }
+
+ /*
+ * Use PERSISTENT because if lvmlockd exits while holding
+ * a lock, it's not safe to simply clear/drop the lock while
+ * a command or lv is using it.
+ *
+ * TODO: in the future, allow lvmlockd to be restarted after
+ * an unclean exit, and reacquire orphan locks.
+ */
+
+ rs->flags |= SANLK_RES_PERSISTENT;
+
+ log_debug("S %s R %s lock_san", ls->name, r->name);
+
+ if (daemon_test) {
+ *r_version = 0;
+ *n_version = 0;
+ return 0;
+ }
+
+ if (rds->vb)
+ flags |= SANLK_ACQUIRE_LVB;
+
+ rv = sanlock_acquire(lms->sock, -1, flags, 1, &rs, NULL);
+
+ if (rv == -EAGAIN) {
+ /*
+ * It appears that sanlock_acquire returns EAGAIN when we request
+ * a shared lock but the lock is held ex by another host.
+ * There's no point in retrying this case, just return an error.
+ *
+ * TODO: verify the sanlock behavior here.
+ */
+ log_debug("S %s R %s lock_san acquire mode %d rv EAGAIN", ls->name, r->name, ld_mode);
+ *retry = 0;
+ return -EAGAIN;
+ }
+
+ if (rv == SANLK_ACQUIRE_IDLIVE || rv == SANLK_ACQUIRE_OWNED || rv == SANLK_ACQUIRE_OTHER) {
+ /*
+ * The lock is held by another host. These failures can
+ * happen while multiple hosts are concurrently acquiring
+ * shared locks. We want to retry a couple times in this
+ * case because we'll probably get the sh lock.
+ *
+ * I believe these are also the errors when requesting an
+ * ex lock that another host holds ex. We want to report
+ * something like: "lock is held by another host" in this case.
+ * Retry is pointless here.
+ *
+ * We can't distinguish between the two cases above,
+ * so if requesting a sh lock, retry a couple times,
+ * otherwise don't.
+ *
+ * TODO: verify sanlock behavior here.
+ */
+ log_debug("S %s R %s lock_san acquire mode %d rv %d", ls->name, r->name, ld_mode, rv);
+ *retry = (ld_mode == LD_LK_SH) ? 1 : 0;
+ return -EAGAIN;
+ }
+
+ if (rv < 0) {
+ log_error("S %s R %s lock_san acquire error %d",
+ ls->name, r->name, rv);
+
+ if (added) {
+ lm_rem_resource_sanlock(ls, r);
+ return rv;
+ }
+
+ /* if the gl has been disabled, remove and free the gl resource */
+ if ((rv == SANLK_LEADER_RESOURCE) && (r->type == LD_RT_GL)) {
+ if (!lm_gl_is_enabled(ls)) {
+ log_error("S %s R %s lock_san gl has been disabled",
+ ls->name, r->name);
+ if (!strcmp(gl_lsname_sanlock, ls->name))
+ memset(gl_lsname_sanlock, 0, sizeof(gl_lsname_sanlock));
+ return -EUNATCH;
+ }
+ }
+
+ return rv;
+ }
+
+ if (rds->vb) {
+ rv = sanlock_get_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s lock_san get_lvb error %d", ls->name, r->name, rv);
+ *r_version = 0;
+ *n_version = 0;
+ goto out;
+ }
+
+ vb_version = le16_to_cpu(vb.version);
+
+ if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+ log_error("S %s R %s lock_san ignore vb_version %x",
+ ls->name, r->name, vb_version);
+ *r_version = 0;
+ free(rds->vb);
+ rds->vb = NULL;
+ goto out;
+ }
+
+ *r_version = le32_to_cpu(vb.r_version);
+ *n_version = le32_to_cpu(vb.n_version);
+ memcpy(rds->vb, &vb, sizeof(vb)); /* rds->vb saved as le */
+
+ log_debug("S %s R %s lock_san get r_version %u n_version %u",
+ ls->name, r->name, *r_version, *n_version);
+ }
+out:
+ return rv;
+}
+
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct rd_sanlock *rds = r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ struct val_blk vb;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s convert_san", ls->name, r->name);
+
+ if (daemon_test)
+ goto rs_flag;
+
+ if (rds->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rds->vb->version) {
+ /* first time vb has been written */
+ rds->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rds->vb->r_version = cpu_to_le32(r_version);
+ memcpy(&vb, rds->vb, sizeof(vb));
+
+ log_debug("S %s R %s convert_san set r_version %u",
+ ls->name, r->name, r_version);
+
+ rv = sanlock_set_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s convert_san set_lvb error %d",
+ ls->name, r->name, rv);
+ }
+ }
+
+ rs_flag:
+ if (ld_mode == LD_LK_SH)
+ rs->flags |= SANLK_RES_SHARED;
+ else
+ rs->flags &= ~SANLK_RES_SHARED;
+
+ if (daemon_test)
+ return 0;
+
+ rv = sanlock_convert(lms->sock, -1, flags, rs);
+ if (rv == -EAGAIN) {
+ /* TODO: what case is this? what should be done? */
+ log_error("S %s R %s convert_san EAGAIN", ls->name, r->name);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s convert_san convert error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+static int release_rename(struct lockspace *ls, struct resource *r)
+{
+ struct rd_sanlock rd1;
+ struct rd_sanlock rd2;
+ struct sanlk_resource *res1;
+ struct sanlk_resource *res2;
+ struct sanlk_resource **res_args;
+ struct lm_sanlock *lms = ls->lm_data;
+ struct rd_sanlock *rds = r->lm_data;
+ int rv;
+
+ log_debug("S %s R %s release rename", ls->name, r->name);
+
+ res_args = malloc(2 * sizeof(struct sanlk_resource *));
+ if (!res_args)
+ return -ENOMEM;
+
+ memcpy(&rd1, rds, sizeof(struct rd_sanlock));
+ memcpy(&rd2, rds, sizeof(struct rd_sanlock));
+
+ res1 = (struct sanlk_resource *)&rd1;
+ res2 = (struct sanlk_resource *)&rd2;
+
+ strcpy(res2->name, "invalid_removed");
+
+ res_args[0] = res1;
+ res_args[1] = res2;
+
+ rv = sanlock_release(lms->sock, -1, SANLK_REL_RENAME, 2, res_args);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_san release rename error %d", ls->name, r->name, rv);
+ }
+
+ free(res_args);
+
+ return rv;
+}
+
+/*
+ * rds->vb is stored in le
+ *
+ * r_version is r->version
+ *
+ * for GL locks lvmlockd just increments this value
+ * each time the global lock is released from ex.
+ *
+ * for VG locks it is the seqno from the vg metadata.
+ *
+ * n_version is r->names_version
+ *
+ * n_version is only used in gl locks.
+ * lvmlockd increments this value each time
+ * the global lock is released from ex by a
+ * command that changes the list of vgs.
+ */
+
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t n_version, uint32_t lmu_flags)
+{
+ struct lm_sanlock *lms = ls->lm_data;
+ struct rd_sanlock *rds = r->lm_data;
+ struct sanlk_resource *rs = &rds->rs;
+ struct val_blk vb;
+ int rv;
+
+ log_debug("S %s R %s unlock_san r_version %u flags %x",
+ ls->name, r->name, r_version, lmu_flags);
+
+ if (daemon_test)
+ return 0;
+
+ if (rds->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rds->vb->version) {
+ /* first time vb has been written */
+ rds->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rds->vb->r_version = cpu_to_le32(r_version);
+ if (n_version)
+ rds->vb->n_version = cpu_to_le32(n_version);
+ memcpy(&vb, rds->vb, sizeof(vb));
+
+ log_debug("S %s R %s unlock_san set r_version %u n_version %u",
+ ls->name, r->name, r_version, n_version);
+
+ rv = sanlock_set_lvb(0, rs, (char *)&vb, sizeof(vb));
+ if (rv < 0) {
+ log_error("S %s R %s unlock_san set_lvb error %d",
+ ls->name, r->name, rv);
+ }
+ }
+
+ /*
+ * For vgremove (FREE_VG) we unlock-rename the vg and gl locks
+ * so they cannot be reacquired.
+ */
+ if ((lmu_flags & LMUF_FREE_VG) &&
+ (r->type == LD_RT_GL || r->type == LD_RT_VG)) {
+ return release_rename(ls, r);
+ }
+
+ rv = sanlock_release(lms->sock, -1, 0, 1, &rs);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_san release error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+int lm_hosts_sanlock(struct lockspace *ls, int notify)
+{
+ struct sanlk_host *hss = NULL;
+ struct sanlk_host *hs;
+ uint32_t state;
+ int hss_count = 0;
+ int found_self = 0;
+ int found_others = 0;
+ int i, rv;
+
+ rv = sanlock_get_hosts(ls->name, 0, &hss, &hss_count, 0);
+ if (rv < 0) {
+ log_error("S %s hosts_san get_hosts error %d", ls->name, rv);
+ return 0;
+ }
+
+ if (!hss || !hss_count) {
+ log_error("S %s hosts_san zero hosts", ls->name);
+ return 0;
+ }
+
+ hs = hss;
+
+ for (i = 0; i < hss_count; i++) {
+ log_debug("S %s hosts_san host_id %llu gen %llu flags %x",
+ ls->name,
+ (unsigned long long)hs->host_id,
+ (unsigned long long)hs->generation,
+ hs->flags);
+
+ if (hs->host_id == ls->host_id) {
+ found_self = 1;
+ continue;
+ }
+
+ state = hs->flags & SANLK_HOST_MASK;
+ if (state == SANLK_HOST_LIVE)
+ found_others++;
+ hs++;
+ }
+ free(hss);
+
+ if (found_others && notify) {
+#if 0
+ struct sanlk_host_event he;
+ memset(&he, 0, sizeof(he));
+ hm.host_id = 1;
+ hm.generation = 0;
+ hm.event = EVENT_VGSTOP;
+ sanlock_set_event(ls->name, &he, SANLK_SETEV_ALL_HOSTS);
+#endif
+ /*
+ * We'll need to retry for a while before all the hosts see
+ * this event and stop the vg.
+ * We'll need to register for events from the lockspace
+ * and add the registered fd to our poll set.
+ */
+ }
+
+ if (!found_self) {
+ log_error("S %s hosts_san self not found others %d", ls->name, found_others);
+ return 0;
+ }
+
+ return found_others;
+}
+
diff --git a/man/lvmlockd.8.in b/man/lvmlockd.8.in
new file mode 100644
index 000000000..71cab290b
--- /dev/null
+++ b/man/lvmlockd.8.in
@@ -0,0 +1,829 @@
+.TH "LVMLOCKD" "8" "LVM TOOLS #VERSION#" "Red Hat, Inc" "\""
+
+.SH NAME
+lvmlockd \(em lvm locking daemon
+
+.SH DESCRIPTION
+lvm commands use lvmlockd to coordinate access to shared storage.
+.br
+When lvm is used on devices shared by multiple hosts, locks will:
+
+- coordinate reading and writing of lvm metadata
+.br
+- validate caching of lvm metadata
+.br
+- prevent concurrent activation of logical volumes
+
+lvmlockd uses an external lock manager to perform basic locking.
+.br
+Lock manager (lock type) options are:
+
+- sanlock: places locks on disk within lvm storage.
+.br
+- dlm: uses network communication and a cluster manager.
+
+.SH OPTIONS
+
+lvmlockd [options]
+
+For default settings, see lvmlockd -h.
+
+.B --help | -h
+ Show this help information.
+
+.B --version | -V
+ Show version of lvmlockd.
+
+.B --test | -T
+ Test mode, do not call lock manager.
+
+.B --foreground | -f
+ Don't fork.
+
+.B --daemon-debug | -D
+ Don't fork and print debugging to stdout.
+
+.B --pid-file | -p
+.I path
+ Set path to the pid file.
+
+.B --socket-path | -s
+.I path
+ Set path to the socket to listen on.
+
+.B --local-also | -a
+ Manage locks between pids for local VGs.
+
+.B --local-only | -o
+ Only manage locks for local VGs, not dlm|sanlock VGs.
+
+.B --gl-type | -g
+.I str
+ Set global lock type to be dlm|sanlock.
+
+.B --system-id | -y
+.I str
+ Set the local system id.
+
+.B --host-id | -i
+.I num
+ Set the local sanlock host id.
+
+.B --host-id-file | -F
+.I path
+ A file containing the local sanlock host_id.
+
+
+.SH USAGE
+
+.SS Initial set up
+
+Using lvm with lvmlockd for the first time includes some one-time set up
+steps:
+
+.SS 1. choose a lock manager
+
+.I dlm
+.br
+If dlm (or corosync) are already being used by other cluster
+software, then select dlm. dlm uses corosync which requires additional
+configuration beyond the scope of this document. See corosync and dlm
+documentation for instructions on configuration, setup and usage.
+
+.I sanlock
+.br
+Choose sanlock if dlm/corosync are not otherwise required.
+sanlock does not depend on any clustering software or configuration.
+
+.SS 2. configure hosts to use lvmlockd
+
+On all hosts running lvmlockd, configure lvm.conf:
+.nf
+locking_type = 1
+use_lvmlockd = 1
+use_lvmetad = 1
+.fi
+
+.I sanlock
+.br
+Assign each host a unique host_id in the range 1-2000 by setting
+.br
+/etc/lvm/lvmlocal.conf local/host_id = <num>
+
+.SS 3. start lvmlockd
+
+Use a service/init file if available, or just run "lvmlockd".
+
+.SS 4. start lock manager
+
+.I sanlock
+.br
+systemctl start wdmd sanlock
+
+.I dlm
+.br
+Follow external clustering documentation when applicable, otherwise:
+.br
+systemctl start corosync dlm
+
+.SS 5. create VGs on shared devices
+
+vgcreate --lock-type sanlock|dlm <vg_name> <devices>
+
+The vgcreate --lock-type option means that lvm commands will perform
+locking for the VG using lvmlockd and the specified lock manager.
+
+.SS 6. start VGs on all hosts
+
+vgchange --lock-start
+
+lvmlockd requires that VGs created with a lock type be "started" before
+being used. This is a lock manager operation to start/join the VG
+lockspace, and it may take some time. Until the start completes, locks
+are not available. Reading and reporting lvm commands are allowed while
+start is in progress.
+.br
+(A service/init file may be used to start VGs.)
+
+.SS 7. create and activate LVs
+
+An LV activated exclusively on one host cannot be activated on another.
+When multiple hosts need to use the same LV concurrently, the LV can be
+activated with a shared lock (see lvchange options -aey vs -asy.)
+(Shared locks are disallowed for certain LV types that cannot be used from
+multiple hosts.)
+
+.SS Subsequent start up
+
+.nf
+After initial set up, start up includes:
+
+- start lvmetad
+- start lvmlockd
+- start lock manager
+- vgchange --lock-start
+- activate LVs
+
+The shut down sequence is the reverse:
+
+- deactivate LVs
+- vgchange --lock-stop
+- stop lock manager
+- stop lvmlockd
+- stop lvmetad
+.fi
+
+
+.SH TOPICS
+
+.SS locking terms
+
+The following terms are used to distinguish VGs that require locking from
+those that do not. Also see
+.BR lvmsystemid (7).
+
+.I "lockd VG"
+
+A "lockd VG" is a shared VG that has a "lock type" of dlm or sanlock.
+Using it requires lvmlockd. These VGs exist on shared storage that is
+visible to multiple hosts. lvm commands use lvmlockd to perform locking
+for these VGs when they are used.
+
+If the lock manager for a lock type is not available (e.g. not started or
+failed), lvmlockd is not able to acquire locks from it, and lvm commands
+are unable to fully use VGs with the given lock type. Commands generally
+allow reading and reporting in this condition, but changes and activation
+are not allowed. Maintaining a properly running lock manager can require
+background not covered here.
+
+.I "local VG"
+
+A "local VG" is meant to be used by a single host. It has no lock type or
+lock type "none". lvm commands and lvmlockd do not perform locking for
+these VGs. A local VG typically exists on local (non-shared) devices and
+cannot be used concurrently from different hosts.
+
+If a local VG does exist on shared devices, it should be owned by a single
+host by having its system_id set. Only the host with a matching system_id
+can then use the local VG. A VG with no lock type and no system_id should
+be excluded from all but one host using lvm.conf filters. Without any of
+these protections, a local VG on shared devices can be easily damaged or
+destroyed.
+
+(When lvmlockd is enabled, it actively manages locks for lockd VGs, but
+also keeps a record of local VGs so it can quickly determine that no locks
+are needed for a given local VG.)
+
+.I "clvm VG"
+
+A "clvm VG" is a shared VG that has the CLUSTERED flag set (and may
+optionally have lock type "clvm"). Using it requires clvmd. These VGs
+cannot be used by hosts using lvmlockd, only by hosts using clvm. See
+below for converting a clvm VG to a lockd VG.
+
+The term "clustered" is widely used in other documentation, and refers to
+clvm VGs. Statements about "clustered" VGs usually do not apply to lockd
+VGs. A new set of rules, properties and descriptions apply to lockd VGs,
+created with a "lock type", as opposed to clvm VGs, created with the
+"clustered" flag.
+
+
+.SS locking activity
+
+To optimize the use of lvm with lvmlockd, consider the three kinds of lvm
+locks and when they are used:
+
+1.
+.I GL lock
+
+The global lock (GL lock) is associated with global information, which is
+information not isolated to a single VG. This is primarily:
+
+.nf
+- the list of all VG names
+- the list of PVs not allocated to a VG (orphan PVs)
+- properties of orphan PVs, e.g. PV size
+.fi
+
+The global lock is used in shared mode by commands that want to read this
+information, or in exclusive mode by commands that want to change this
+information.
+
+The vgs command acquires the global lock in shared mode because it reports
+the list of all VG names.
+
+The vgcreate command acquires the global lock in exclusive mode because it
+creates a new VG name, and it takes a PV from the list of unused PVs.
+
+When use_lvmlockd is enabled, many lvm commands attempt to acquire the
+global lock even if no lockd VGs exist. For this reason, lvmlockd should
+not be enabled unless lockd VGs will be used.
+
+2.
+.I VG lock
+
+A VG lock is associated with each VG. The VG lock is acquired in shared
+mode to read the VG and in exclusive mode to change the VG (write the VG
+metadata). This serializes modifications to a VG with all other lvm
+commands on the VG.
+
+"vgs" will not only acquire the GL lock (see above), but will acquire the
+VG lock for each VG prior to reading it.
+
+"vgs vg_name" does not acquire the GL lock (it does not need the list of
+all VG names), but will acquire the VG lock on each vg_name listed.
+
+3.
+.I LV lock
+
+An LV lock is acquired before the LV is activated, and is released after
+the LV is deactivated. If the LV lock cannot be acquired, the LV is not
+activated. LV locks are persistent and remain in place after the
+activation command is done. GL and VG locks are transient, and are held
+only while an lvm command is running.
+
+.I reporting
+
+Reporting commands can sometimes lead to unexpected and excessive locking
+activity. See below for optimizing reporting commands to avoid unwanted
+locking.
+
+If tags are used on the command line, all VGs must be read to search for
+matching tags. This implies acquiring the GL lock and each VG lock.
+
+
+.SS locking conflicts
+
+When a command asks lvmlockd to acquire a lock, lvmlockd submits a
+non-blocking lock request to the lock manager. This request will fail if
+the same lock is held by another host in an incompatible mode. In certain
+cases, lvmlockd may retry the request and hide simple transient conflicts
+from the command. In other cases, such as LV lock conflicts, the failure
+will be returned to the command immediately. The command will fail,
+reporting the conflict with another host.
+
+GL and VG locks are held for short periods, over the course of a single
+lvm command, so GL/VG lock conflicts can occur during a small window of
+time when two conflicting commands on different hosts happen to overlap
+each other. In these cases, retry attempts within lvmlockd will often
+mask the transient lock conflicts.
+
+Another factor that impacts lock conflicts is if lvm commands are
+coordinated by a user or program. If commands using conflicting GL/VG
+locks are not run concurrently on multiple hosts, they will not encounter
+lock conflicts. If no attempt is made to activate LVs exclusively on
+multiple hosts, then LV activation will not fail due to lock conflicts.
+
+Frequent, uncoordinated lvm commands, running concurrently on multiple
+hosts, that are making changes to the same lvm resources may occasionally
+fail due to locking conflicts. Internal retry attempts could be tuned to
+the level necessary to mask these conflicts. Or, retry attempts can be
+disabled if all command conflicts should be reported via a command
+failure.
+
+(Commands may report lock failures for reasons other than conflicts. See
+below for more cases, e.g. no GL lock exists, locking is not started,
+etc.)
+
+.SS local VGs on shared devices
+
+When local VGs exist on shared devices, no locking is performed for them
+by lvmlockd. The system_id should be set for these VGs to prevent
+multiple hosts from using them, or lvm.conf filters should be set to make
+the devices visible to only one host.
+
+The "owner" of a VG is the host with a matching system_id. When local VGs
+exist on shared devices, only the VG owner can read and write the local
+VG. lvm commands on all other hosts will fail to read or write the VG
+with an unmatching system_id.
+
+Example
+
+host-01 owns VG "vg0", which is visible to host-02. When host-02 runs
+the "vgs" command which reads vg0, the vgs command prints:
+.nf
+Skip VG vg0 with system id "host-01" from system id "host-02"
+.fi
+
+If a local VG on shared devices has no system_id, and filters are not used
+to make the devices visible to a single host, then all hosts are able to
+read and write it, which can easily corrupt the VG.
+
+(N.B. Changes to local VGs may not be immediately reflected on other hosts
+where they are visible. This is not a problem because the other hosts
+cannot use these VGs anyway. The relevant changes include VG renaming,
+uuid changes or changes to system_id.)
+
+
+.SS lockd VGs from hosts not using lvmlockd
+
+Only hosts that will use lockd VGs should be configured to run lvmlockd.
+However, lockd VGs may be visible from hosts not using lockd VGs and not
+running lvmlockd, much like local VGs with foreign system_id's may be
+visible. In this case, the lockd VGs are treated in a similar way to a
+local VG with an unmatching system_id.
+
+Example
+
+host-01 running lvmlockd is using "vg1" with lock type sanlock.
+host-02 is not running lvmlockd, but can see vg1. When host-02 runs
+the "vgs" command, which reads vg1, the vgs command prints:
+.nf
+Skip VG vg1 which requires lvmlockd, lock type sanlock.
+.fi
+
+
+.SS vgcreate
+
+Forms of the vgcreate command:
+
+.B vgcreate <vg_name> <devices>
+.br
+- creates a local VG
+.br
+- If lvm.conf system_id_source = "none", the VG will have no system_id.
+ This is not recommended, especially for VGs on shared devices.
+.br
+- If lvm.conf system_id_source does not disable the system_id, the VG
+ will be owned by the host creating the VG.
+
+.B vgcreate --lock-type sanlock|dlm <vg_name> <devices>
+.br
+- creates a lockd vg
+.br
+- lvm commands will request locks from lvmlockd to use the VG
+.br
+- lvmlockd will obtain locks from the specified lock manager
+.br
+- this requires lvmlockd to be configured (use_lvmlock=1)
+.br
+- run vgchange --lock-start on other hosts to start the new VG
+
+.B vgcreate -cy <vg_name> <devices>
+.br
+- creates a clvm VG when clvm is configured
+.br
+- creates a lockd VG when lvmlockd is configured
+ (the --lock-type option is preferred in this case)
+.br
+- this clustered option originally created a clvm VG,
+ but will be translated to a lock type when appropriate.
+.br
+- if use_lvmlockd=1, -cy is translated to --lock-type <type>,
+ where <type> comes from lvm.conf:vgcreate_cy_lock_type,
+ which can be set to either sanlock or dlm.
+
+
+After lvm.conf use_lvmlockd=1 is set, and before the first lockd VG is
+created, no global lock will exist, and lvm commands will try and fail
+to acquire it. lvm commands will report this error until the first
+lockd VG is created: "Skipping global lock: not found".
+
+lvm commands that only read VGs are allowed to continue in this state,
+without the shared GL lock, but commands that attempt to acquire the GL
+lock exclusively to make changes will fail.
+
+
+.SS starting and stopping VGs
+
+Starting a lockd VG (vgchange --lock-start) causes the lock manager to
+start or join the lockspace for the VG. This makes locks for the VG
+accessible to the host. Stopping the VG leaves the lockspace and makes
+locks for the VG inaccessible to the host.
+
+Lockspaces should be started as early as possible because starting
+(joining) a lockspace can take a long time (potentially minutes after a
+host failure when using sanlock.) A VG can be started after all the
+following are true:
+
+.nf
+- lvmlockd is running
+- lock manager is running
+- VG is visible to the system
+.fi
+
+All lockd VGs can be started/stopped using:
+.br
+vgchange --lock-start
+.br
+vgchange --lock-stop
+
+
+Individual VGs can be started/stopped using:
+.br
+vgchange --lock-start <vg_name> ...
+.br
+vgchange --lock-stop <vg_name> ...
+
+To make vgchange wait for start to complete:
+.br
+vgchange --lock-start-wait
+.br
+vgchange --lock-start-wait <vg_name>
+
+To stop all lockspaces and wait for all to complete:
+.br
+lvmlock --stop-lockspaces --wait
+
+To start only selected lockd VGs, use the lvm.conf
+activation/lock_start_list. When defined, only VG names in this list are
+started by vgchange. If the list is not defined (the default), all
+visible lockd VGs are started. To start only "vg1", use the following
+lvm.conf configuration:
+
+.nf
+activation {
+ lock_start_list = [ "vg1" ]
+ ...
+}
+.fi
+
+
+.SS automatic starting and automatic activation
+
+Scripts or programs on a host that automatically start VGs will use an
+"auto" version of the normal --lock-start option to indicate that the
+command is being run automatically by the system:
+
+vgchange --lock-start-auto [vg_name ...]
+.br
+vgchange --lock-start-auto-wait [vg_name ...]
+
+By default, these "auto" variations have identical behavior to the
+--lock-start and --lock-start-wait options.
+
+However, when the lvm.conf activation/auto_lock_start_list is defined, the
+auto start commands perform an additional filtering phase to all VGs being
+started, testing each VG name against the auto_lock_start_list. The
+auto_lock_start_list defines lockd VGs that will be started by the auto
+start command. Visible lockd VGs not included in the list are ignored by
+the auto start command. If the list is undefined, all VG names pass this
+filter. (The lock_start_list is also still used to filter all VGs.)
+
+The auto_lock_start_list allows a user to select certain lockd VGs that
+should be automatically started by the system (or indirectly, those that
+should not).
+
+To use auto activation of lockd LVs (see auto_activation_volume_list),
+auto starting of the corresponding lockd VGs is necessary.
+
+
+.SS sanlock global lock
+
+There are some special cases related to the global lock in sanlock VGs.
+
+The global lock exists in one of the sanlock VGs. The first sanlock VG
+created will contain the global lock. Subsequent sanlock VGs will each
+contain disabled global locks that can be enabled later if necessary.
+
+The VG containing the global lock must be visible to all hosts using
+sanlock VGs. This can be a reason to create a small sanlock VG, visible
+to all hosts, and dedicated to just holding the global lock. While not
+required, this strategy can help to avoid extra work in the future if VGs
+are moved or removed.
+
+The vgcreate command typically acquires the global lock, but in the case
+of the first sanlock VG, there will be no global lock to acquire until the
+initial vgcreate is complete. So, creating the first sanlock VG is a
+special case that skips the global lock.
+
+vgcreate for a sanlock VG determines it is the first one to exist if no
+other sanlock VGs are visible. It is possible that other sanlock VGs do
+exist but are not visible or started on the host running vgcreate. This
+raises the possibility of more than one global lock existing. If this
+happens, commands will warn of the condition, and it should be manually
+corrected.
+
+If the situation arises where more than one sanlock VG contains a global
+lock, the global lock should be manually disabled in all but one of them
+with the command:
+
+lvmlock --gl-disable <vg_name>
+
+(The one VG with the global lock enabled must be visible to all hosts.)
+
+An opposite problem can occur if the VG holding the global lock is
+removed. In this case, no global lock will exist following the vgremove,
+and subsequent lvm commands will fail to acquire it. In this case, the
+global lock needs to be manually enabled in one of the remaining sanlock
+VGs with the command:
+
+lvmlock --gl-enable <vg_name>
+
+Or, a new VG can be created with an enabled GL lock with the command:
+.br
+vgcreate --lock-type sanlock --lock-gl enable
+
+A small sanlock VG dedicated to holding the global lock can avoid the case
+where the GL lock must be manually enabled after a vgremove.
+
+
+
+.SS changing lock type
+
+To change a local VG to a lockd VG:
+
+vgchange --lock-type sanlock|dlm <vg_name>
+
+All LVs must be inactive to change the lock type.
+
+To change a clvm VG to a lockd VG:
+
+vgchange --lock-type sanlock|dlm <vg_name>
+
+Changing a lockd VG to a local VG is not yet generally allowed.
+(It can be done partially in certain recovery cases.)
+
+
+
+.SS limitations of lockd VGs
+
+LV types that are not yet allowed within lockd VGs:
+.br
+- snapshot (old style copy on write)
+
+lvm commands that are not yet allowed for lockd VGs:
+.br
+- vgrename
+.br
+- vgsplit
+.br
+- vgmerge
+.br
+- lvrename
+
+sanlock VGs can contain up to 190 LVs. This limit is due to the size of
+the internal lvmlock LV used to hold sanlock leases.
+
+
+.SS vgremove of a sanlock VG
+
+vgremove of a sanlock VG will fail if other hosts have the VG started.
+Run vgchange --lock-stop <vg_name> on all other hosts before vgremove.
+
+(It may take several seconds before vgremove recognizes that all hosts
+have stopped.)
+
+
+.SS shared LVs
+
+When an LV is used concurrently from multiple hosts (e.g. by a
+multi-host/cluster application or file system), the LV can be activated on
+multiple hosts concurrently using a shared lock.
+
+To activate the LV with a shared lock: lvchange -asy vg/lv.
+
+The default activation mode is always exclusive (-ay defaults to -aey).
+
+If the LV type does not allow the LV to be used concurrently from multiple
+hosts, then a shared activation lock is not allowed and the lvchange
+command will report an error. LV types that cannot be used concurrently
+from multiple hosts include thin, cache, raid, mirror, and snapshot.
+
+lvextend on LV with shared locks is not allowed. Deactivate the lv
+everywhere, or activate it exclusively to run lvextend.
+
+
+.SS recover from lost pv holding sanlock locks
+
+In a sanlock VG, the locks are stored on a PV within the VG. If this PV
+is lost, the locks need to be reconstructed as follows:
+
+1. Enable the unsafe lock modes option in lvm.conf so that default locking requirements can be overriden.
+
+\&
+
+.nf
+allow_override_lock_modes = 1
+.fi
+
+2. Remove missing PVs and partial LVs from the VG.
+
+\&
+
+.nf
+vgreduce --removemissing --force --lock-gl na --lock-vg na <vg>
+.fi
+
+3. If step 2 does not remove the internal/hidden "lvmlock" lv, it should be removed.
+
+\&
+
+.nf
+lvremove --lock-vg na --lock-lv na <vg>/lvmlock
+.fi
+
+4. Change the lock type to none.
+
+\&
+
+.nf
+vgchange --lock-type none --force --lock-gl na --lock-vg na <vg>
+.fi
+
+5. VG space is needed to recreate the locks. If there is not enough space, vgextend the vg.
+
+6. Change the lock type back to sanlock. This creates a new internal
+lvmlock lv, and recreates locks.
+
+\&
+
+.nf
+vgchange --lock-type sanlock <vg>
+.fi
+
+
+.SS locking system failures
+
+.B lvmlockd failure
+
+If lvmlockd was holding any locks, the host should be rebooted. When
+lvmlockd fails, the locks it holds are orphaned in the lock manager, and
+still protect the resources used by the host. If lvmlockd is restarted,
+it does not yet have the ability to reacquire previously orphaned locks.
+
+.B dlm/corosync failure
+
+If dlm or corosync fail, the clustering system will fence the host using a
+method configured within the dlm/corosync clustering environment.
+
+lvm commands on other hosts will be blocked from acquiring any locks until
+the dlm/corosync recovery process is complete.
+
+.B sanlock lock storage failure
+
+If access to the device containing the VG's locks is lost, sanlock cannot
+renew its leases for locked LVs. This means that the host could soon lose
+the lease to another host which could activate the LV exclusively.
+sanlock is designed to never reach the point where two hosts hold the
+same lease exclusively at once, so the same LV should never be active on
+two hosts at once when activated exclusively.
+
+The sanlock method of preventing this involves lvmlockd doing nothing,
+which produces a safe but potentially inconvenient result. Doing nothing
+from lvmlockd leads to the host's LV locks not being released, which leads
+to sanlock using the local watchdog to reset the host before another host
+can acquire any locks held by the local host.
+
+lvm commands on other hosts will be blocked from acquiring locks held by
+the failed/reset host until the sanlock recovery time expires (2-4
+minutes). This includes activation of any LVs that were locked by the
+failed host. It also includes GL/VG locks held by any lvm commands that
+happened to be running on the failed host at the time of the failure.
+
+.B sanlock daemon failure
+
+If the sanlock daemon fails or exits while a lockspace is started, the
+local watchdog will reset the host. See previous section for the impact
+on other hosts.
+
+
+.SS overriding, disabling, testing locking
+
+Special options to manually override or disable default locking:
+
+Disable use_lvmlockd for an individual command. Return success to all
+lockd calls without attempting to contact lvmlockd:
+
+<lvm_command> --config 'global { use_lvmlockd = 0 }'
+
+Ignore error if lockd call failed to connect to lvmlockd or did not get a
+valid response to its request:
+
+<lvm_command> --sysinit
+.br
+<lvm_command> --ignorelockingfailure
+
+Specifying "na" as the lock mode will cause the lockd_xy() call to do
+nothing (like the --config):
+
+<lvm_command> --lock-gl na
+.br
+<lvm_command> --lock-vg na
+.br
+<lvm_command> --lock-lv na
+
+(This will not be permitted unless lvm.conf:allow_override_lock_modes=1.)
+
+Exercise all locking code in client and daemon, for each specific
+lock_type, but return success at a step would fail because the specific
+locking system is not running:
+
+lvmockd --test
+
+
+.SS locking between local processes
+
+With the --local-also option, lvmlockd will handle VG locking between
+local processes for local VGs. The standard internal lockd_vg calls,
+typically used for locking lockd VGs, are applied to local VGs. The
+global lock behavior does not change and applies to both lockd VGs and
+local VGs as usual.
+
+The --lock-only option extends the --local-also option to include a
+special "global lock" for local VGs. This option should be used when only
+local VGs exist, no lockd VGs exist. It allows the internal lockd_gl
+calls to provide GL locking between local processes.
+
+
+.SS changing dlm cluster name
+
+When a dlm VG is created, the cluster name is saved in the VG metadata for
+the new VG. To use the VG, a host must be in the named cluster. If the
+cluster name is changed, or the VG is moved to a different cluster, the
+cluster name for the dlm VG must be changed. To do this:
+
+1. Ensure the VG is not being used by any hosts.
+
+2. The new cluster must be active on the node making the change.
+.br
+ The current dlm cluster name can be seen by:
+.br
+ cat /sys/kernel/config/dlm/cluster/cluster_name
+
+3. Change the VG lock type to none:
+.br
+ vgchange --lock-type none --force <vg_name>
+
+4. Change the VG lock type back to dlm which sets the new cluster name:
+.br
+ vgchange --lock-type dlm <vg_name>
+
+
+(The cluster name is not checked or enforced when using clvmd which can
+lead to hosts corrupting a clvm VG if they are in different clusters.)
+
+
+.SS clvm comparison
+
+User visible or command level differences between lockd VGs (with
+lvmlockd) and clvm VGs (with clvmd):
+
+lvmlockd includes the sanlock lock manager option.
+
+lvmlockd does not require all hosts to see all the same shared devices.
+
+lvmlockd defaults to the exclusive activation mode in all VGs.
+
+lvmlockd commands may fail from lock conflicts with other commands.
+
+lvmlockd commands always apply to the local host, and never have an effect
+on a remote host. (The activation option 'l' is not used.)
+
+lvmlockd works with lvmetad.
+
+lvmlockd works with thin and cache pools and LVs.
+
+lvmlockd allows VG ownership by system id (also works when lvmlockd is not
+used).
+
+lvmlockd saves the cluster name for a lockd VG using dlm. Only hosts in
+the matching cluster can use the VG.
+
+lvmlockd prefers the new vgcreate --lock-type option in place of the
+--clustered (-c) option.
+
+lvmlockd requires starting/stopping lockd VGs with vgchange --lock-start
+and --lock-stop.
+
+