summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetr Rockai <prockai@redhat.com>2013-09-09 00:01:44 +0200
committerPetr Rockai <prockai@redhat.com>2013-09-18 21:17:48 +0200
commit25bed9968155f43ef0b8832383ef711e7ae1685d (patch)
tree56d48f1a5886f5b639c716becccea4f2ba69cfed
parent3df50d822b8064c5536968556c466c50a9a3ace5 (diff)
downloadlvm2-25bed9968155f43ef0b8832383ef711e7ae1685d.tar.gz
clvmd: Avoid a 3-way deadlock in dead-client cleanup.
-rw-r--r--daemons/clvmd/clvmd.c177
-rw-r--r--daemons/clvmd/clvmd.h1
2 files changed, 96 insertions, 82 deletions
diff --git a/daemons/clvmd/clvmd.c b/daemons/clvmd/clvmd.c
index a52b39d5a..bed8e68b3 100644
--- a/daemons/clvmd/clvmd.c
+++ b/daemons/clvmd/clvmd.c
@@ -119,6 +119,7 @@ static void *pre_and_post_thread(void *arg);
static int send_message(void *buf, int msglen, const char *csid, int fd,
const char *errtext);
static int read_from_local_sock(struct local_client *thisfd);
+static int cleanup_zombie(struct local_client *thisfd);
static int process_local_command(struct clvm_header *msg, int msglen,
struct local_client *client,
unsigned short xid);
@@ -698,6 +699,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
newfd->bits.localsock.sent_out = FALSE;
newfd->bits.localsock.threadid = 0;
newfd->bits.localsock.finished = 0;
+ newfd->bits.localsock.cleanup_needed = 0;
newfd->bits.localsock.pipe_client = NULL;
newfd->bits.localsock.private = NULL;
newfd->bits.localsock.all_success = 1;
@@ -889,7 +891,7 @@ static void main_loop(int local_sock, int cmd_timeout)
for (thisfd = &local_client_head; thisfd != NULL;
thisfd = thisfd->next) {
- if (thisfd->removeme) {
+ if (thisfd->removeme && !cleanup_zombie(thisfd)) {
struct local_client *free_fd;
lastfd->next = thisfd->next;
free_fd = thisfd;
@@ -916,7 +918,6 @@ static void main_loop(int local_sock, int cmd_timeout)
/* Got error or EOF: Remove it from the list safely */
if (ret <= 0) {
- struct local_client *free_fd;
int type = thisfd->type;
/* If the cluster socket shuts down, so do we */
@@ -926,12 +927,7 @@ static void main_loop(int local_sock, int cmd_timeout)
DEBUGLOG("ret == %d, errno = %d. removing client\n",
ret, errno);
- lastfd->next = thisfd->next;
- free_fd = thisfd;
- safe_close(&(free_fd->fd));
-
- /* Queue cleanup, this also frees the client struct */
- add_to_lvmqueue(free_fd, NULL, 0, NULL);
+ thisfd->removeme = 1;
break;
}
@@ -1174,6 +1170,95 @@ static void dump_message(char *buf, int len)
}
}
+static int cleanup_zombie(struct local_client *thisfd)
+{
+ int *status;
+
+ if (thisfd->type != LOCAL_SOCK)
+ return 0;
+
+ if (!thisfd->bits.localsock.cleanup_needed)
+ return 0;
+
+ DEBUGLOG("EOF on local socket: inprogress=%d\n",
+ thisfd->bits.localsock.in_progress);
+
+ thisfd->bits.localsock.finished = 1;
+
+ /* If the client went away in mid command then tidy up */
+ if (thisfd->bits.localsock.in_progress) {
+ pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
+ if (pthread_mutex_trylock(&thisfd->bits.localsock.mutex))
+ goto bail;
+ thisfd->bits.localsock.state = POST_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+ /* Free any unsent buffers */
+ free_reply(thisfd);
+ }
+
+ /* Kill the subthread & free resources */
+ if (thisfd->bits.localsock.threadid) {
+ DEBUGLOG("Waiting for child thread\n");
+ pthread_mutex_lock(&thisfd->bits.localsock.mutex);
+ thisfd->bits.localsock.state = PRE_COMMAND;
+ pthread_cond_signal(&thisfd->bits.localsock.cond);
+ pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
+
+ if ((errno = pthread_join(thisfd->bits.localsock.threadid,
+ (void **) &status)))
+ log_sys_error("pthread_join", "");
+
+ DEBUGLOG("Joined child thread\n");
+
+ thisfd->bits.localsock.threadid = 0;
+ pthread_cond_destroy(&thisfd->bits.localsock.cond);
+ pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
+
+ /* Remove the pipe client */
+ if (thisfd->bits.localsock.pipe_client != NULL) {
+ struct local_client *newfd;
+ struct local_client *lastfd = NULL;
+ struct local_client *free_fd = NULL;
+
+ (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
+ (void) close(thisfd->bits.localsock.pipe);
+
+ /* Remove pipe client */
+ for (newfd = &local_client_head; newfd != NULL;
+ newfd = newfd->next) {
+ if (thisfd->bits.localsock.
+ pipe_client == newfd) {
+ thisfd->bits.localsock.
+ pipe_client = NULL;
+
+ lastfd->next = newfd->next;
+ free_fd = newfd;
+ newfd->next = lastfd;
+ free(free_fd);
+ break;
+ }
+ lastfd = newfd;
+ }
+ }
+ }
+
+ /* Free the command buffer */
+ free(thisfd->bits.localsock.cmd);
+
+ /* Clear out the cross-link */
+ if (thisfd->bits.localsock.pipe_client != NULL)
+ thisfd->bits.localsock.pipe_client->bits.pipe.client =
+ NULL;
+
+ safe_close(&(thisfd->fd));
+ thisfd->bits.localsock.cleanup_needed = 0;
+ return 0;
+bail:
+ return 1;
+}
+
/* Called when we have a read from the local socket.
was in the main loop but it's grown up and is a big girl now */
static int read_from_local_sock(struct local_client *thisfd)
@@ -1204,80 +1289,8 @@ static int read_from_local_sock(struct local_client *thisfd)
/* EOF or error on socket */
if (len <= 0) {
- int *status;
-
- DEBUGLOG("EOF on local socket: inprogress=%d\n",
- thisfd->bits.localsock.in_progress);
-
- thisfd->bits.localsock.finished = 1;
-
- /* If the client went away in mid command then tidy up */
- if (thisfd->bits.localsock.in_progress) {
- pthread_kill(thisfd->bits.localsock.threadid, SIGUSR2);
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
- thisfd->bits.localsock.state = POST_COMMAND;
- pthread_cond_signal(&thisfd->bits.localsock.cond);
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
-
- /* Free any unsent buffers */
- free_reply(thisfd);
- }
-
- /* Kill the subthread & free resources */
- if (thisfd->bits.localsock.threadid) {
- DEBUGLOG("Waiting for child thread\n");
- pthread_mutex_lock(&thisfd->bits.localsock.mutex);
- thisfd->bits.localsock.state = PRE_COMMAND;
- pthread_cond_signal(&thisfd->bits.localsock.cond);
- pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
-
- if ((errno = pthread_join(thisfd->bits.localsock.threadid,
- (void **) &status)))
- log_sys_error("pthread_join", "");
-
- DEBUGLOG("Joined child thread\n");
-
- thisfd->bits.localsock.threadid = 0;
- pthread_cond_destroy(&thisfd->bits.localsock.cond);
- pthread_mutex_destroy(&thisfd->bits.localsock.mutex);
-
- /* Remove the pipe client */
- if (thisfd->bits.localsock.pipe_client != NULL) {
- struct local_client *newfd;
- struct local_client *lastfd = NULL;
- struct local_client *free_fd = NULL;
-
- (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
- (void) close(thisfd->bits.localsock.pipe);
-
- /* Remove pipe client */
- for (newfd = &local_client_head; newfd != NULL;
- newfd = newfd->next) {
- if (thisfd->bits.localsock.
- pipe_client == newfd) {
- thisfd->bits.localsock.
- pipe_client = NULL;
-
- lastfd->next = newfd->next;
- free_fd = newfd;
- newfd->next = lastfd;
- free(free_fd);
- break;
- }
- lastfd = newfd;
- }
- }
- }
-
- /* Free the command buffer */
- free(thisfd->bits.localsock.cmd);
-
- /* Clear out the cross-link */
- if (thisfd->bits.localsock.pipe_client != NULL)
- thisfd->bits.localsock.pipe_client->bits.pipe.client =
- NULL;
-
- safe_close(&(thisfd->fd));
+ thisfd->bits.localsock.cleanup_needed = 1;
+ cleanup_zombie(thisfd); /* we ignore errors here */
return 0;
} else {
int comms_pipe[2];
diff --git a/daemons/clvmd/clvmd.h b/daemons/clvmd/clvmd.h
index fb0589d5f..0f837dd65 100644
--- a/daemons/clvmd/clvmd.h
+++ b/daemons/clvmd/clvmd.h
@@ -53,6 +53,7 @@ struct localsock_bits {
int finished; /* Flag to tell subthread to exit */
int all_success; /* Set to 0 if any node (or the pre_command)
failed */
+ int cleanup_needed; /* helper for cleanup_zombie */
struct local_client *pipe_client;
pthread_t threadid;
enum { PRE_COMMAND, POST_COMMAND, QUIT } state;