summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2017-12-14 15:40:44 -0500
committerJeff Moyer <jmoyer@redhat.com>2017-12-14 15:40:44 -0500
commita6f4056fbc921382e2be7d09758b6fd37430b484 (patch)
tree35ae51aac44e9ce72aeb716d7fb457d1ddccd64a
parent272ea61121fab2255f4a67770bf2575c2c4723f0 (diff)
downloadlibaio-a6f4056fbc921382e2be7d09758b6fd37430b484.tar.gz
Add support for preadv2/pwritev2
preadv2 and pwritev2 allow the caller to specify per-io flags. The main driver for this support in libaio is RWF_NOWAIT, which allows applications to specify that they don't want the submission thread to block waiting for things such as page cache invalidation, block instantiation, etc. This should help reduce unexpected latencies in the submission path. Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
-rw-r--r--harness/cases/21.t176
-rw-r--r--src/libaio.h28
2 files changed, 203 insertions, 1 deletions
diff --git a/harness/cases/21.t b/harness/cases/21.t
new file mode 100644
index 0000000..441eaa8
--- /dev/null
+++ b/harness/cases/21.t
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2017, Red Hat, Inc.
+ *
+ * Test RWF_NOWAIT.
+ *
+ * RWF_NOWAIT will cause -EAGAIN to be returned in the io_event for
+ * any I/O that cannot be serviced without blocking the submission
+ * thread. Instances covered by the kernel at the time this test was
+ * written include:
+ * - O_DIRECT I/O to a file offset that has populated page cache pages
+ * - the submission context cannot obtain the inode lock
+ * - space allocation is necessary
+ * - we need to wait for other I/O (e.g. in the misaligned I/O case)
+ * - ...
+ *
+
+ * The easiest of these to test is that a direct I/O is writing to a
+ * file offset with populated page cache. We also test to ensure that
+ * we can perform I/O in the absence of the above conditions.
+ *
+ * Author: Jeff Moyer <jmoyer@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sched.h>
+#include <libaio.h>
+
+#define TEMPLATE "21.XXXXXX"
+#define BUFLEN 4096
+
+#ifndef RWF_NOWAIT
+#define RWF_NOWAIT 0x00000008
+#endif
+
+int
+open_temp_file()
+{
+ int fd;
+ char temp_file[sizeof(TEMPLATE)];
+
+ strncpy(temp_file, TEMPLATE, sizeof(TEMPLATE));
+ fd = mkstemp(temp_file);
+ if (fd < 0) {
+ perror("mkstemp");
+ return -1;
+ }
+ unlink(temp_file);
+ return fd;
+}
+
+int
+test_main()
+{
+ int fd, flags;
+ int ret;
+ io_context_t ctx;
+ struct iocb iocb, *iocbp = &iocb;
+ struct io_event event;
+ char buf[BUFLEN] __attribute__((aligned (4096)));
+ struct iovec iov;
+
+ fd = open_temp_file();
+ if (fd < 0)
+ return 1;
+
+ memset(&ctx, 0, sizeof(ctx));
+ ret = io_setup(1, &ctx);
+ if (ret != 0) {
+ fprintf(stderr, "io_setup failed with %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Perform a buffered write to a file. This instantiates the
+ * block and adds the page to the page cache.
+ */
+ memset(buf, 0xa, BUFLEN);
+ ret = write(fd, buf, BUFLEN);
+ if (ret != BUFLEN) {
+ perror("write");
+ return 1;
+ }
+
+ /*
+ * Now attempt an aio/dio pwritev2 with the RWF_NONBLOCK flag
+ * set.
+ */
+ flags = fcntl(fd, F_GETFL);
+ ret = fcntl(fd, F_SETFL, flags | O_DIRECT);
+ if (ret != 0) {
+ perror("fcntl");
+ return 1;
+ }
+
+ memset(buf, 0, BUFLEN);
+ iov.iov_base = buf;
+ iov.iov_len = BUFLEN;
+ io_prep_preadv2(&iocb, fd, &iov, 1, 0, RWF_NOWAIT);
+
+ ret = io_submit(ctx, 1, &iocbp);
+
+ /*
+ * io_submit will return -EINVAL if RWF_NOWAIT is not supported.
+ */
+ if (ret != 1) {
+ if (ret == -EINVAL) {
+ fprintf(stderr, "RWF_NOWAIT not supported by kernel.\n");
+ /* just return success */
+ return 0;
+ }
+ errno = -ret;
+ perror("io_submit");
+ return 1;
+ }
+
+ ret = io_getevents(ctx, 1, 1, &event, NULL);
+ if (ret != 1) {
+ errno = -ret;
+ perror("io_getevents");
+ return 1;
+ }
+
+ /*
+ * We expect -EAGAIN due to the existence of a page cache page
+ * for the file system block we are writing.
+ */
+ if (event.res != -EAGAIN) {
+ fprintf(stderr, "Expected -EAGAIN, got %lu\n", event.res);
+ return 1;
+ }
+
+ /*
+ * An O_DIRECT write to the page will force the page out of the
+ * page cache, allowing the subsequent RWF_NOWAIT I/O to complete.
+ */
+ ret = pwrite(fd, buf, BUFLEN, 0);
+ if (ret != BUFLEN) {
+ perror("write");
+ return 1;
+ }
+
+ /*
+ * Now retry the RWF_NOWAIT I/O. This should succeed.
+ */
+ ret = io_submit(ctx, 1, &iocbp);
+ if (ret != 1) {
+ errno = -ret;
+ perror("io_submit");
+ return 1;
+ }
+
+ ret = io_getevents(ctx, 1, 1, &event, NULL);
+ if (ret != 1) {
+ errno = -ret;
+ perror("io_getevents");
+ return 1;
+ }
+
+ if (event.res != BUFLEN) {
+ fprintf(stderr, "Expected %d, got %lu\n", BUFLEN, event.res);
+ return 1;
+ }
+
+ return 0;
+}
+/*
+ * Local variables:
+ * mode: c
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/libaio.h b/src/libaio.h
index 4a4e0f5..a5cd2e1 100644
--- a/src/libaio.h
+++ b/src/libaio.h
@@ -119,7 +119,9 @@ struct io_iocb_vector {
struct iocb {
PADDEDptr(void *data, __pad1); /* Return in the io completion event */
- PADDED(unsigned key, __pad2); /* For use in identifying io requests */
+ /* key: For use in identifying io requests */
+ /* aio_rw_flags: RWF_* flags (such as RWF_NOWAIT) */
+ PADDED(unsigned key, aio_rw_flags);
short aio_lio_opcode;
short aio_reqprio;
@@ -210,6 +212,30 @@ static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec
iocb->u.c.offset = offset;
}
+static inline void io_prep_preadv2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_PREADV;
+ iocb->aio_reqprio = 0;
+ iocb->aio_rw_flags = flags;
+ iocb->u.c.buf = (void *)iov;
+ iocb->u.c.nbytes = iovcnt;
+ iocb->u.c.offset = offset;
+}
+
+static inline void io_prep_pwritev2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags)
+{
+ memset(iocb, 0, sizeof(*iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = IO_CMD_PWRITEV;
+ iocb->aio_reqprio = 0;
+ iocb->aio_rw_flags = flags;
+ iocb->u.c.buf = (void *)iov;
+ iocb->u.c.nbytes = iovcnt;
+ iocb->u.c.offset = offset;
+}
+
/* Jeff Moyer says this was implemented in Red Hat AS2.1 and RHEL3.
* AFAICT, it was never in mainline, and should not be used. --RR */
static inline void io_prep_poll(struct iocb *iocb, int fd, int events)