diff options
author | Jeff Moyer <jmoyer@redhat.com> | 2017-12-14 15:40:44 -0500 |
---|---|---|
committer | Jeff Moyer <jmoyer@redhat.com> | 2017-12-14 15:40:44 -0500 |
commit | a6f4056fbc921382e2be7d09758b6fd37430b484 (patch) | |
tree | 35ae51aac44e9ce72aeb716d7fb457d1ddccd64a | |
parent | 272ea61121fab2255f4a67770bf2575c2c4723f0 (diff) | |
download | libaio-a6f4056fbc921382e2be7d09758b6fd37430b484.tar.gz |
Add support for preadv2/pwritev2
preadv2 and pwritev2 allow the caller to specify per-io flags.
The main driver for this support in libaio is RWF_NOWAIT, which
allows applications to specify that they don't want the submission
thread to block waiting for things such as page cache invalidation,
block instantiation, etc. This should help reduce unexpected
latencies in the submission path.
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
-rw-r--r-- | harness/cases/21.t | 176 | ||||
-rw-r--r-- | src/libaio.h | 28 |
2 files changed, 203 insertions, 1 deletions
diff --git a/harness/cases/21.t b/harness/cases/21.t new file mode 100644 index 0000000..441eaa8 --- /dev/null +++ b/harness/cases/21.t @@ -0,0 +1,176 @@ +/* + * Copyright 2017, Red Hat, Inc. + * + * Test RWF_NOWAIT. + * + * RWF_NOWAIT will cause -EAGAIN to be returned in the io_event for + * any I/O that cannot be serviced without blocking the submission + * thread. Instances covered by the kernel at the time this test was + * written include: + * - O_DIRECT I/O to a file offset that has populated page cache pages + * - the submission context cannot obtain the inode lock + * - space allocation is necessary + * - we need to wait for other I/O (e.g. in the misaligned I/O case) + * - ... + * + + * The easiest of these to test is that a direct I/O is writing to a + * file offset with populated page cache. We also test to ensure that + * we can perform I/O in the absence of the above conditions. + * + * Author: Jeff Moyer <jmoyer@redhat.com> + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <signal.h> +#include <sched.h> +#include <libaio.h> + +#define TEMPLATE "21.XXXXXX" +#define BUFLEN 4096 + +#ifndef RWF_NOWAIT +#define RWF_NOWAIT 0x00000008 +#endif + +int +open_temp_file() +{ + int fd; + char temp_file[sizeof(TEMPLATE)]; + + strncpy(temp_file, TEMPLATE, sizeof(TEMPLATE)); + fd = mkstemp(temp_file); + if (fd < 0) { + perror("mkstemp"); + return -1; + } + unlink(temp_file); + return fd; +} + +int +test_main() +{ + int fd, flags; + int ret; + io_context_t ctx; + struct iocb iocb, *iocbp = &iocb; + struct io_event event; + char buf[BUFLEN] __attribute__((aligned (4096))); + struct iovec iov; + + fd = open_temp_file(); + if (fd < 0) + return 1; + + memset(&ctx, 0, sizeof(ctx)); + ret = io_setup(1, &ctx); + if (ret != 0) { + fprintf(stderr, "io_setup failed with %d\n", ret); + return 1; + } + + /* + * Perform a buffered write to a file. This instantiates the + * block and adds the page to the page cache. + */ + memset(buf, 0xa, BUFLEN); + ret = write(fd, buf, BUFLEN); + if (ret != BUFLEN) { + perror("write"); + return 1; + } + + /* + * Now attempt an aio/dio pwritev2 with the RWF_NONBLOCK flag + * set. + */ + flags = fcntl(fd, F_GETFL); + ret = fcntl(fd, F_SETFL, flags | O_DIRECT); + if (ret != 0) { + perror("fcntl"); + return 1; + } + + memset(buf, 0, BUFLEN); + iov.iov_base = buf; + iov.iov_len = BUFLEN; + io_prep_preadv2(&iocb, fd, &iov, 1, 0, RWF_NOWAIT); + + ret = io_submit(ctx, 1, &iocbp); + + /* + * io_submit will return -EINVAL if RWF_NOWAIT is not supported. + */ + if (ret != 1) { + if (ret == -EINVAL) { + fprintf(stderr, "RWF_NOWAIT not supported by kernel.\n"); + /* just return success */ + return 0; + } + errno = -ret; + perror("io_submit"); + return 1; + } + + ret = io_getevents(ctx, 1, 1, &event, NULL); + if (ret != 1) { + errno = -ret; + perror("io_getevents"); + return 1; + } + + /* + * We expect -EAGAIN due to the existence of a page cache page + * for the file system block we are writing. + */ + if (event.res != -EAGAIN) { + fprintf(stderr, "Expected -EAGAIN, got %lu\n", event.res); + return 1; + } + + /* + * An O_DIRECT write to the page will force the page out of the + * page cache, allowing the subsequent RWF_NOWAIT I/O to complete. + */ + ret = pwrite(fd, buf, BUFLEN, 0); + if (ret != BUFLEN) { + perror("write"); + return 1; + } + + /* + * Now retry the RWF_NOWAIT I/O. This should succeed. + */ + ret = io_submit(ctx, 1, &iocbp); + if (ret != 1) { + errno = -ret; + perror("io_submit"); + return 1; + } + + ret = io_getevents(ctx, 1, 1, &event, NULL); + if (ret != 1) { + errno = -ret; + perror("io_getevents"); + return 1; + } + + if (event.res != BUFLEN) { + fprintf(stderr, "Expected %d, got %lu\n", BUFLEN, event.res); + return 1; + } + + return 0; +} +/* + * Local variables: + * mode: c + * c-basic-offset: 8 + * End: + */ diff --git a/src/libaio.h b/src/libaio.h index 4a4e0f5..a5cd2e1 100644 --- a/src/libaio.h +++ b/src/libaio.h @@ -119,7 +119,9 @@ struct io_iocb_vector { struct iocb { PADDEDptr(void *data, __pad1); /* Return in the io completion event */ - PADDED(unsigned key, __pad2); /* For use in identifying io requests */ + /* key: For use in identifying io requests */ + /* aio_rw_flags: RWF_* flags (such as RWF_NOWAIT) */ + PADDED(unsigned key, aio_rw_flags); short aio_lio_opcode; short aio_reqprio; @@ -210,6 +212,30 @@ static inline void io_prep_pwritev(struct iocb *iocb, int fd, const struct iovec iocb->u.c.offset = offset; } +static inline void io_prep_preadv2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PREADV; + iocb->aio_reqprio = 0; + iocb->aio_rw_flags = flags; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + +static inline void io_prep_pwritev2(struct iocb *iocb, int fd, const struct iovec *iov, int iovcnt, long long offset, int flags) +{ + memset(iocb, 0, sizeof(*iocb)); + iocb->aio_fildes = fd; + iocb->aio_lio_opcode = IO_CMD_PWRITEV; + iocb->aio_reqprio = 0; + iocb->aio_rw_flags = flags; + iocb->u.c.buf = (void *)iov; + iocb->u.c.nbytes = iovcnt; + iocb->u.c.offset = offset; +} + /* Jeff Moyer says this was implemented in Red Hat AS2.1 and RHEL3. * AFAICT, it was never in mainline, and should not be used. --RR */ static inline void io_prep_poll(struct iocb *iocb, int fd, int events) |