summaryrefslogtreecommitdiff
path: root/daemons/lvmlockd/lvmlockd-dlm.c
diff options
context:
space:
mode:
Diffstat (limited to 'daemons/lvmlockd/lvmlockd-dlm.c')
-rw-r--r--daemons/lvmlockd/lvmlockd-dlm.c666
1 files changed, 666 insertions, 0 deletions
diff --git a/daemons/lvmlockd/lvmlockd-dlm.c b/daemons/lvmlockd/lvmlockd-dlm.c
new file mode 100644
index 000000000..554296884
--- /dev/null
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+#define _ISOC99_SOURCE
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <poll.h>
+#include <errno.h>
+#include <string.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <byteswap.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "configure.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "xlate.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+/*
+ * Using synchronous _wait dlm apis so do not define _REENTRANT and
+ * link with non-threaded version of library, libdlm_lt.
+ */
+#include "libdlm.h"
+
+struct lm_dlm {
+ dlm_lshandle_t *dh;
+};
+
+struct rd_dlm {
+ struct dlm_lksb lksb;
+ struct val_blk *vb;
+};
+
+int lm_data_size_dlm(void)
+{
+ return sizeof(struct rd_dlm);
+}
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for dlm is
+ * vg_version_string:undefined:cluster_name
+ *
+ * lv_lock_args are not used for dlm
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+static int cluster_name_from_args(char *vg_args, char *clustername)
+{
+ return last_string_from_args(vg_args, clustername);
+}
+
+static int check_args_version(char *vg_args)
+{
+ unsigned int major = 0;
+ int rv;
+
+ rv = version_from_args(vg_args, &major, NULL, NULL);
+ if (rv < 0) {
+ log_error("check_args_version %s error %d", vg_args, rv);
+ return rv;
+ }
+
+ if (major > VG_LOCK_ARGS_MAJOR) {
+ log_error("check_args_version %s major %d %d", vg_args, major, VG_LOCK_ARGS_MAJOR);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* This will be set after dlm_controld is started. */
+#define DLM_CLUSTER_NAME_PATH "/sys/kernel/config/dlm/cluster/cluster_name"
+
+static int read_cluster_name(char *clustername)
+{
+ char *n;
+ int fd;
+ int rv;
+
+ if (daemon_test) {
+ sprintf(clustername, "%s", "test");
+ return 0;
+ }
+
+ fd = open(DLM_CLUSTER_NAME_PATH, O_RDONLY);
+ if (fd < 0) {
+ log_debug("read_cluster_name: open error %d, check dlm_controld", fd);
+ return fd;
+ }
+
+ rv = read(fd, clustername, MAX_ARGS - 1);
+ if (rv < 0) {
+ log_error("read_cluster_name: cluster name read error %d, check dlm_controld", fd);
+ close(fd);
+ return rv;
+ }
+
+ n = strstr(clustername, "\n");
+ if (n)
+ *n = '\0';
+ close(fd);
+ return 0;
+}
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+ char clustername[MAX_ARGS];
+ char lock_args_version[MAX_ARGS];
+ int rv;
+
+ memset(clustername, 0, sizeof(clustername));
+ memset(lock_args_version, 0, sizeof(lock_args_version));
+
+ snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+ VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+ rv = read_cluster_name(clustername);
+ if (rv < 0)
+ return -EMANAGER;
+
+ if (strlen(clustername) + strlen(lock_args_version) + 2 > MAX_ARGS) {
+ log_error("init_vg_dlm args too long");
+ return -EARGS;
+ }
+
+ snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, clustername);
+ rv = 0;
+
+ log_debug("init_vg_dlm done %s vg_args %s", ls_name, vg_args);
+ return rv;
+}
+
+int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+ char sys_clustername[MAX_ARGS];
+ char arg_clustername[MAX_ARGS];
+ struct lm_dlm *lmd;
+ int rv;
+
+ memset(sys_clustername, 0, sizeof(sys_clustername));
+ memset(arg_clustername, 0, sizeof(arg_clustername));
+
+ rv = read_cluster_name(sys_clustername);
+ if (rv < 0)
+ return -EMANAGER;
+
+ if (!ls->vg_args[0]) {
+ /* global lockspace has no vg args */
+ goto skip_args;
+ }
+
+ rv = check_args_version(ls->vg_args);
+ if (rv < 0)
+ return -EARGS;
+
+ rv = cluster_name_from_args(ls->vg_args, arg_clustername);
+ if (rv < 0) {
+ log_error("prepare_lockspace_dlm %s no cluster name from args %s", ls->name, ls->vg_args);
+ return -EARGS;
+ }
+
+ if (strcmp(sys_clustername, arg_clustername)) {
+ log_error("prepare_lockspace_dlm %s mismatching cluster names sys %s arg %s",
+ ls->name, sys_clustername, arg_clustername);
+ return -EARGS;
+ }
+
+ skip_args:
+ lmd = malloc(sizeof(struct lm_dlm));
+ if (!lmd)
+ return -ENOMEM;
+
+ ls->lm_data = lmd;
+ return 0;
+}
+
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+
+ if (daemon_test)
+ return 0;
+
+ if (adopt)
+ lmd->dh = dlm_open_lockspace(ls->name);
+ else
+ lmd->dh = dlm_new_lockspace(ls->name, 0600, DLM_LSFL_NEWEXCL);
+
+ if (!lmd->dh) {
+ log_error("add_lockspace_dlm %s adopt %d error", ls->name, adopt);
+ free(lmd);
+ ls->lm_data = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ int rv;
+
+ if (daemon_test)
+ goto out;
+
+ /*
+ * If free_vg is set, it means we are doing vgremove, and we may want
+ * to tell any other nodes to leave the lockspace. This is not really
+ * necessary since there should be no harm in having an unused
+ * lockspace sitting around. A new "notification lock" would need to
+ * be added with a callback to signal this.
+ */
+
+ rv = dlm_release_lockspace(ls->name, lmd->dh, 1);
+ if (rv < 0) {
+ log_error("rem_lockspace_dlm error %d", rv);
+ return rv;
+ }
+ out:
+ free(lmd);
+ ls->lm_data = NULL;
+
+ if (!strcmp(ls->name, gl_lsname_dlm)) {
+ gl_running_dlm = 0;
+ gl_auto_dlm = 0;
+ }
+
+ return 0;
+}
+
+static int lm_add_resource_dlm(struct lockspace *ls, struct resource *r, int with_lock_nl)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ uint32_t flags = 0;
+ char *buf;
+ int rv;
+
+ if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+ buf = malloc(sizeof(struct val_blk) + DLM_LVB_LEN);
+ if (!buf)
+ return -ENOMEM;
+ memset(buf, 0, sizeof(struct val_blk) + DLM_LVB_LEN);
+
+ rdd->vb = (struct val_blk *)buf;
+ rdd->lksb.sb_lvbptr = buf + sizeof(struct val_blk);
+
+ flags |= LKF_VALBLK;
+ }
+
+ if (!with_lock_nl)
+ goto out;
+
+ /* because this is a new NL lock request */
+ flags |= LKF_EXPEDITE;
+
+ if (daemon_test)
+ goto out;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, &rdd->lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s add_resource_dlm lock error %d", ls->name, r->name, rv);
+ return rv;
+ }
+ out:
+ return 0;
+}
+
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ int rv = 0;
+
+ if (daemon_test)
+ goto out;
+
+ lksb = &rdd->lksb;
+
+ if (!lksb->sb_lkid)
+ goto out;
+
+ rv = dlm_ls_unlock_wait(lmd->dh, lksb->sb_lkid, 0, lksb);
+ if (rv < 0) {
+ log_error("S %s R %s rem_resource_dlm unlock error %d", ls->name, r->name, rv);
+ }
+ out:
+ if (rdd->vb)
+ free(rdd->vb);
+
+ memset(rdd, 0, sizeof(struct rd_dlm));
+ r->lm_init = 0;
+ return rv;
+}
+
+static int to_dlm_mode(int ld_mode)
+{
+ switch (ld_mode) {
+ case LD_LK_EX:
+ return LKM_EXMODE;
+ case LD_LK_SH:
+ return LKM_PRMODE;
+ };
+ return -1;
+}
+
+static int lm_adopt_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ uint32_t flags = 0;
+ int mode;
+ int rv;
+
+ *r_version = 0;
+
+ if (!r->lm_init) {
+ rv = lm_add_resource_dlm(ls, r, 0);
+ if (rv < 0)
+ return rv;
+ r->lm_init = 1;
+ }
+
+ lksb = &rdd->lksb;
+
+ flags |= LKF_PERSISTENT;
+ flags |= LKF_ORPHAN;
+
+ if (rdd->vb)
+ flags |= LKF_VALBLK;
+
+ mode = to_dlm_mode(ld_mode);
+ if (mode < 0) {
+ log_error("adopt_dlm invalid mode %d", ld_mode);
+ rv = -EINVAL;
+ goto fail;
+ }
+
+ log_debug("S %s R %s adopt_dlm", ls->name, r->name);
+
+ if (daemon_test)
+ return 0;
+
+ /*
+ * dlm returns 0 for success, -EAGAIN if an orphan is
+ * found with another mode, and -ENOENT if no orphan.
+ *
+ * cast/bast/param are (void *)1 because the kernel
+ * returns errors if some are null.
+ */
+
+ rv = dlm_ls_lockx(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name), 0,
+ (void *)1, (void *)1, (void *)1,
+ NULL, NULL);
+
+ if (rv == -EAGAIN) {
+ log_debug("S %s R %s adopt_dlm adopt mode %d try other mode",
+ ls->name, r->name, ld_mode);
+ rv = -EUCLEAN;
+ goto fail;
+ }
+ if (rv < 0) {
+ log_debug("S %s R %s adopt_dlm mode %d flags %x error %d errno %d",
+ ls->name, r->name, mode, flags, rv, errno);
+ goto fail;
+ }
+
+ /*
+ * FIXME: For GL/VG locks we probably want to read the lvb,
+ * especially if adopting an ex lock, because when we
+ * release this adopted ex lock we may want to write new
+ * lvb values based on the current lvb values (at lease
+ * in the GL case where we increment the current values.)
+ *
+ * It should be possible to read the lvb by requesting
+ * this lock in the same mode it's already in.
+ */
+
+ return rv;
+
+ fail:
+ lm_rem_resource_dlm(ls, r);
+ return rv;
+}
+
+/*
+ * Use PERSISTENT so that if lvmlockd exits while holding locks,
+ * the locks will remain orphaned in the dlm, still protecting what
+ * they were acquired to protect.
+ */
+
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+ uint32_t *r_version, int adopt)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb;
+ struct val_blk vb;
+ uint32_t flags = 0;
+ uint16_t vb_version;
+ int mode;
+ int rv;
+
+ if (adopt) {
+ /* When adopting, we don't follow the normal method
+ of acquiring a NL lock then converting it to the
+ desired mode. */
+ return lm_adopt_dlm(ls, r, ld_mode, r_version);
+ }
+
+ if (!r->lm_init) {
+ rv = lm_add_resource_dlm(ls, r, 1);
+ if (rv < 0)
+ return rv;
+ r->lm_init = 1;
+ }
+
+ lksb = &rdd->lksb;
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb)
+ flags |= LKF_VALBLK;
+
+ mode = to_dlm_mode(ld_mode);
+ if (mode < 0) {
+ log_error("lock_dlm invalid mode %d", ld_mode);
+ return -EINVAL;
+ }
+
+ log_debug("S %s R %s lock_dlm", ls->name, r->name);
+
+ if (daemon_test) {
+ *r_version = 0;
+ return 0;
+ }
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ log_error("S %s R %s lock_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s lock_dlm error %d", ls->name, r->name, rv);
+ return rv;
+ }
+
+ if (rdd->vb) {
+ if (lksb->sb_flags & DLM_SBF_VALNOTVALID) {
+ log_debug("S %s R %s lock_dlm VALNOTVALID", ls->name, r->name);
+ memset(rdd->vb, 0, sizeof(struct val_blk));
+ *r_version = 0;
+ goto out;
+ }
+
+ memcpy(&vb, lksb->sb_lvbptr, sizeof(struct val_blk));
+ vb_version = le16_to_cpu(vb.version);
+
+ if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+ log_error("S %s R %s lock_dlm ignore vb_version %x",
+ ls->name, r->name, vb_version);
+ *r_version = 0;
+ free(rdd->vb);
+ rdd->vb = NULL;
+ lksb->sb_lvbptr = NULL;
+ goto out;
+ }
+
+ *r_version = le32_to_cpu(vb.r_version);
+ memcpy(rdd->vb, &vb, sizeof(vb)); /* rdd->vb saved as le */
+
+ log_debug("S %s R %s lock_dlm get r_version %u",
+ ls->name, r->name, *r_version);
+ }
+out:
+ return 0;
+}
+
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+ int ld_mode, uint32_t r_version)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t mode;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s convert_dlm", ls->name, r->name);
+
+ flags |= LKF_CONVERT;
+ flags |= LKF_NOQUEUE;
+ flags |= LKF_PERSISTENT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s convert_dlm set r_version %u",
+ ls->name, r->name, r_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ mode = to_dlm_mode(ld_mode);
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv == -EAGAIN) {
+ /* FIXME: When does this happen? Should something different be done? */
+ log_error("S %s R %s convert_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+ return -EAGAIN;
+ }
+ if (rv < 0) {
+ log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
+ }
+ return rv;
+}
+
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+ uint32_t r_version, uint32_t lmuf_flags)
+{
+ struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+ struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+ struct dlm_lksb *lksb = &rdd->lksb;
+ uint32_t flags = 0;
+ int rv;
+
+ log_debug("S %s R %s unlock_dlm r_version %u flags %x",
+ ls->name, r->name, r_version, lmuf_flags);
+
+ /*
+ * Do not set PERSISTENT, because we don't need an orphan
+ * NL lock to protect anything.
+ */
+
+ flags |= LKF_CONVERT;
+
+ if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+ if (!rdd->vb->version) {
+ /* first time vb has been written */
+ rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+ }
+ if (r_version)
+ rdd->vb->r_version = cpu_to_le32(r_version);
+ memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+ log_debug("S %s R %s unlock_dlm set r_version %u",
+ ls->name, r->name, r_version);
+
+ flags |= LKF_VALBLK;
+ }
+
+ if (daemon_test)
+ return 0;
+
+ rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, lksb, flags,
+ r->name, strlen(r->name),
+ 0, NULL, NULL, NULL);
+ if (rv < 0) {
+ log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
+ }
+
+ return rv;
+}
+
+/*
+ * This list could be read from dlm_controld via libdlmcontrol,
+ * but it's simpler to get it from sysfs.
+ */
+
+#define DLM_LOCKSPACES_PATH "/sys/kernel/config/dlm/cluster/spaces"
+
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+ struct lockspace *ls;
+ struct dirent *de;
+ DIR *ls_dir;
+
+ if (!(ls_dir = opendir(DLM_LOCKSPACES_PATH)))
+ return -ECONNREFUSED;
+
+ while ((de = readdir(ls_dir))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ if (strncmp(de->d_name, LVM_LS_PREFIX, strlen(LVM_LS_PREFIX)))
+ continue;
+
+ if (!(ls = alloc_lockspace())) {
+ closedir(ls_dir);
+ return -ENOMEM;
+ }
+
+ ls->lm_type = LD_LM_DLM;
+ strncpy(ls->name, de->d_name, MAX_NAME);
+ strncpy(ls->vg_name, ls->name + strlen(LVM_LS_PREFIX), MAX_NAME);
+ list_add_tail(&ls->list, ls_rejoin);
+ }
+
+ closedir(ls_dir);
+ return 0;
+}
+
+int lm_is_running_dlm(void)
+{
+ char sys_clustername[MAX_ARGS];
+ int rv;
+
+ memset(sys_clustername, 0, sizeof(sys_clustername));
+
+ rv = read_cluster_name(sys_clustername);
+ if (rv < 0)
+ return 0;
+ return 1;
+}