summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSalvatore Sanfilippo <antirez@gmail.com>2012-05-15 02:16:55 -0700
committerSalvatore Sanfilippo <antirez@gmail.com>2012-05-15 02:16:55 -0700
commit404223547c2b3f7eef0295cf22be4bfdbc63846a (patch)
tree86a0a7f864a5edb5d133e0cf3aeeb060d4821610
parente67d014d9a4a5a7c9dbab3f2ace52f25661e7a64 (diff)
parentd318803f9439e9e4a129727b69c8d5103ae994fd (diff)
downloadredis-404223547c2b3f7eef0295cf22be4bfdbc63846a.tar.gz
Merge pull request #510 from pietern/2.6-eventport
Support for Illumos event ports
-rw-r--r--src/ae.c14
-rw-r--r--src/ae_evport.c289
-rw-r--r--src/config.h7
3 files changed, 305 insertions, 5 deletions
diff --git a/src/ae.c b/src/ae.c
index 668277a78..c58c4b74b 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -44,13 +44,17 @@
/* Include the best multiplexing layer supported by this system.
* The following should be ordered by performances, descending. */
-#ifdef HAVE_EPOLL
-#include "ae_epoll.c"
+#ifdef HAVE_EVPORT
+#include "ae_evport.c"
#else
- #ifdef HAVE_KQUEUE
- #include "ae_kqueue.c"
+ #ifdef HAVE_EPOLL
+ #include "ae_epoll.c"
#else
- #include "ae_select.c"
+ #ifdef HAVE_KQUEUE
+ #include "ae_kqueue.c"
+ #else
+ #include "ae_select.c"
+ #endif
#endif
#endif
diff --git a/src/ae_evport.c b/src/ae_evport.c
new file mode 100644
index 000000000..5b6602d02
--- /dev/null
+++ b/src/ae_evport.c
@@ -0,0 +1,289 @@
+/* ae.c module for illumos event ports.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Released under the BSD license. See the COPYING file for more info. */
+
+#include <assert.h>
+#include <errno.h>
+#include <port.h>
+#include <poll.h>
+
+#include <sys/types.h>
+#include <sys/time.h>
+
+#include <stdio.h>
+
+static int evport_debug = 0;
+
+/*
+ * This file implements the ae API using event ports, present on Solaris-based
+ * systems since Solaris 10. Using the event port interface, we associate file
+ * descriptors with the port. Each association also includes the set of poll(2)
+ * events that the consumer is interested in (e.g., POLLIN and POLLOUT).
+ *
+ * There's one tricky piece to this implementation: when we return events via
+ * aeApiPoll, the corresponding file descriptors become dissociated from the
+ * port. This is necessary because poll events are level-triggered, so if the
+ * fd didn't become dissociated, it would immediately fire another event since
+ * the underlying state hasn't changed yet. We must reassociate the file
+ * descriptor, but only after we know that our caller has actually read from it.
+ * The ae API does not tell us exactly when that happens, but we do know that
+ * it must happen by the time aeApiPoll is called again. Our solution is to
+ * keep track of the last fds returned by aeApiPoll and reassociate them next
+ * time aeApiPoll is invoked.
+ *
+ * To summarize, in this module, each fd association is EITHER (a) represented
+ * only via the in-kernel assocation OR (b) represented by pending_fds and
+ * pending_masks. (b) is only true for the last fds we returned from aeApiPoll,
+ * and only until we enter aeApiPoll again (at which point we restore the
+ * in-kernel association).
+ */
+#define MAX_EVENT_BATCHSZ 512
+
+typedef struct aeApiState {
+ int portfd; /* event port */
+ int npending; /* # of pending fds */
+ int pending_fds[MAX_EVENT_BATCHSZ]; /* pending fds */
+ int pending_masks[MAX_EVENT_BATCHSZ]; /* pending fds' masks */
+} aeApiState;
+
+static int aeApiCreate(aeEventLoop *eventLoop) {
+ int i;
+ aeApiState *state = zmalloc(sizeof(aeApiState));
+ if (!state) return -1;
+
+ state->portfd = port_create();
+ if (state->portfd == -1) {
+ zfree(state);
+ return -1;
+ }
+
+ state->npending = 0;
+
+ for (i = 0; i < MAX_EVENT_BATCHSZ; i++) {
+ state->pending_fds[i] = -1;
+ state->pending_masks[i] = AE_NONE;
+ }
+
+ eventLoop->apidata = state;
+ return 0;
+}
+
+static void aeApiFree(aeEventLoop *eventLoop) {
+ aeApiState *state = eventLoop->apidata;
+
+ close(state->portfd);
+ zfree(state);
+}
+
+static int aeApiLookupPending(aeApiState *state, int fd) {
+ int i;
+
+ for (i = 0; i < state->npending; i++) {
+ if (state->pending_fds[i] == fd)
+ return (i);
+ }
+
+ return (-1);
+}
+
+/*
+ * Helper function to invoke port_associate for the given fd and mask.
+ */
+static int aeApiAssociate(const char *where, int portfd, int fd, int mask) {
+ int events = 0;
+ int rv, err;
+
+ if (mask & AE_READABLE)
+ events |= POLLIN;
+ if (mask & AE_WRITABLE)
+ events |= POLLOUT;
+
+ if (evport_debug)
+ fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events);
+
+ rv = port_associate(portfd, PORT_SOURCE_FD, fd, events,
+ (void *)(uintptr_t)mask);
+ err = errno;
+
+ if (evport_debug)
+ fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err));
+
+ if (rv == -1) {
+ fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err));
+
+ if (err == EAGAIN)
+ fprintf(stderr, "aeApiAssociate: event port limit exceeded.");
+ }
+
+ return rv;
+}
+
+static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ int fullmask, pfd;
+
+ if (evport_debug)
+ fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask);
+
+ /*
+ * Since port_associate's "events" argument replaces any existing events, we
+ * must be sure to include whatever events are already associated when
+ * we call port_associate() again.
+ */
+ fullmask = mask | eventLoop->events[fd].mask;
+ pfd = aeApiLookupPending(state, fd);
+
+ if (pfd != -1) {
+ /*
+ * This fd was recently returned from aeApiPoll. It should be safe to
+ * assume that the consumer has processed that poll event, but we play
+ * it safer by simply updating pending_mask. The fd will be
+ * reassociated as usual when aeApiPoll is called again.
+ */
+ if (evport_debug)
+ fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd);
+ state->pending_masks[pfd] |= fullmask;
+ return 0;
+ }
+
+ return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask));
+}
+
+static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
+ aeApiState *state = eventLoop->apidata;
+ int fullmask, pfd;
+
+ if (evport_debug)
+ fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask);
+
+ pfd = aeApiLookupPending(state, fd);
+
+ if (pfd != -1) {
+ if (evport_debug)
+ fprintf(stderr, "deleting event from pending fd %d\n", fd);
+
+ /*
+ * This fd was just returned from aeApiPoll, so it's not currently
+ * associated with the port. All we need to do is update
+ * pending_mask appropriately.
+ */
+ state->pending_masks[pfd] &= ~mask;
+
+ if (state->pending_masks[pfd] == AE_NONE)
+ state->pending_fds[pfd] = -1;
+
+ return;
+ }
+
+ /*
+ * The fd is currently associated with the port. Like with the add case
+ * above, we must look at the full mask for the file descriptor before
+ * updating that association. We don't have a good way of knowing what the
+ * events are without looking into the eventLoop state directly. We rely on
+ * the fact that our caller has already updated the mask in the eventLoop.
+ */
+
+ fullmask = eventLoop->events[fd].mask;
+ if (fullmask == AE_NONE) {
+ /*
+ * We're removing *all* events, so use port_dissociate to remove the
+ * association completely. Failure here indicates a bug.
+ */
+ if (evport_debug)
+ fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd);
+
+ if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) {
+ perror("aeApiDelEvent: port_dissociate");
+ abort(); /* will not return */
+ }
+ } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd,
+ fullmask) != 0) {
+ /*
+ * ENOMEM is a potentially transient condition, but the kernel won't
+ * generally return it unless things are really bad. EAGAIN indicates
+ * we've reached an resource limit, for which it doesn't make sense to
+ * retry (counterintuitively). All other errors indicate a bug. In any
+ * of these cases, the best we can do is to abort.
+ */
+ abort(); /* will not return */
+ }
+}
+
+static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
+ aeApiState *state = eventLoop->apidata;
+ struct timespec timeout, *tsp;
+ int mask, i;
+ uint_t nevents;
+ port_event_t event[MAX_EVENT_BATCHSZ];
+
+ /*
+ * If we've returned fd events before, we must reassociate them with the
+ * port now, before calling port_get(). See the block comment at the top of
+ * this file for an explanation of why.
+ */
+ for (i = 0; i < state->npending; i++) {
+ if (state->pending_fds[i] == -1)
+ /* This fd has since been deleted. */
+ continue;
+
+ if (aeApiAssociate("aeApiPoll", state->portfd,
+ state->pending_fds[i], state->pending_masks[i]) != 0) {
+ /* See aeApiDelEvent for why this case is fatal. */
+ abort();
+ }
+
+ state->pending_masks[i] = AE_NONE;
+ state->pending_fds[i] = -1;
+ }
+
+ state->npending = 0;
+
+ if (tvp != NULL) {
+ timeout.tv_sec = tvp->tv_sec;
+ timeout.tv_nsec = tvp->tv_usec * 1000;
+ tsp = &timeout;
+ } else {
+ tsp = NULL;
+ }
+
+ /*
+ * port_getn can return with errno == ETIME having returned some events (!).
+ * So if we get ETIME, we check nevents, too.
+ */
+ nevents = 1;
+ if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents,
+ tsp) == -1 && (errno != ETIME || nevents == 0)) {
+ if (errno == ETIME || errno == EINTR)
+ return 0;
+
+ /* Any other error indicates a bug. */
+ perror("aeApiPoll: port_get");
+ abort();
+ }
+
+ state->npending = nevents;
+
+ for (i = 0; i < nevents; i++) {
+ mask = 0;
+ if (event[i].portev_events & POLLIN)
+ mask |= AE_READABLE;
+ if (event[i].portev_events & POLLOUT)
+ mask |= AE_WRITABLE;
+
+ eventLoop->fired[i].fd = event[i].portev_object;
+ eventLoop->fired[i].mask = mask;
+
+ if (evport_debug)
+ fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n",
+ (int)event[i].portev_object, mask);
+
+ state->pending_fds[i] = event[i].portev_object;
+ state->pending_masks[i] = (uintptr_t)event[i].portev_user;
+ }
+
+ return nevents;
+}
+
+static char *aeApiName(void) {
+ return "evport";
+}
diff --git a/src/config.h b/src/config.h
index 136fd40c4..28ef37d6e 100644
--- a/src/config.h
+++ b/src/config.h
@@ -38,6 +38,13 @@
#define HAVE_KQUEUE 1
#endif
+#ifdef __sun
+#include <sys/feature_tests.h>
+#ifdef _DTRACE_VERSION
+#define HAVE_EVPORT 1
+#endif
+#endif
+
/* Define aof_fsync to fdatasync() in Linux and fsync() for all the rest */
#ifdef __linux__
#define aof_fsync fdatasync