diff options
author | Keith Bostic <keith@wiredtiger.com> | 2013-05-11 10:15:58 -0400 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2013-05-11 10:15:58 -0400 |
commit | bc674d1a6e42a470958823eae92220630dd09369 (patch) | |
tree | c7afbc60d3fc711f7c42e2b6760825815d914a54 | |
parent | d821e9a54b5725cddb45b307831e66a2ef530523 (diff) | |
download | mongo-bc674d1a6e42a470958823eae92220630dd09369.tar.gz |
Increase the Linux default buffer alignment to 4KB, there are known cases
where I/O to/from buffers with lesser alignment causes reads to go through
the buffer cache, leading to multiple versions of a block in the system.
Add asserts to the read/write calls to ensure we detect mis-aligned buffers.
-rw-r--r-- | build_posix/configure.ac.in | 4 | ||||
-rw-r--r-- | dist/api_data.py | 5 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 6 | ||||
-rw-r--r-- | src/os_posix/os_rw.c | 12 |
4 files changed, 19 insertions, 8 deletions
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 7b469b673ca..1b4afeb30ec 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -100,13 +100,13 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],[[ AC_C_BIGENDIAN -# Linux requires _GNU_SOURCE to be defined and buffers aligned to 512 byte +# Linux requires _GNU_SOURCE to be defined and buffers aligned to 4KB # boundaries for O_DIRECT to work. BUFFER_ALIGNMENT=0 if test "$ac_cv_func_posix_memalign" = "yes" ; then case "`uname -s`" in Linux) AM_CFLAGS="$AM_CFLAGS -D_GNU_SOURCE" - BUFFER_ALIGNMENT=512 + BUFFER_ALIGNMENT=4096 ;; esac fi diff --git a/dist/api_data.py b/dist/api_data.py index 1b2d1e0e834..efbb1e2108c 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -532,9 +532,8 @@ methods = { 'wiredtiger_open' : Method(connection_runtime_config + [ Config('buffer_alignment', '-1', r''' in-memory alignment (in bytes) for buffers used for I/O. The - default value of -1 indicates that a platform-specific - alignment value should be used (512 bytes on Linux systems, - zero elsewhere)''', + default value of -1 indicates a platform-specific alignment + value should be used (4KB on Linux systems, zero elsewhere)''', min='-1', max='1MB'), Config('checkpoint', '', r''' periodically checkpoint the database''', diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 24a0598bc0d..fd495f8d914 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1320,9 +1320,9 @@ struct __wt_connection { * handler is installed that writes error messages to stderr * @configstart{wiredtiger_open, see dist/api_data.py} * @config{buffer_alignment, in-memory alignment (in bytes) for buffers used for - * I/O. The default value of -1 indicates that a platform-specific alignment - * value should be used (512 bytes on Linux systems\, zero elsewhere)., an - * integer between -1 and 1MB; default \c -1.} + * I/O. The default value of -1 indicates a platform-specific alignment value + * should be used (4KB on Linux systems\, zero elsewhere)., an integer between + * -1 and 1MB; default \c -1.} * @config{cache_size, maximum heap memory to allocate for the cache. A * database should configure either a cache_size or a shared_cache not both., an * integer between 1MB and 10TB; default \c 100MB.} diff --git a/src/os_posix/os_rw.c b/src/os_posix/os_rw.c index 2b83d961592..2bdde670256 100644 --- a/src/os_posix/os_rw.c +++ b/src/os_posix/os_rw.c @@ -21,6 +21,12 @@ __wt_read(WT_SESSION_IMPL *session, "%s: read %" PRIu32 " bytes at offset %" PRIuMAX, fh->name, bytes, (uintmax_t)offset); + WT_ASSERT(session, /* Assert aligned I/O is aligned. */ + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + !((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1))); + if (pread(fh->fd, buf, (size_t)bytes, offset) != (ssize_t)bytes) WT_RET_MSG(session, __wt_errno(), "%s read error: failed to read %" PRIu32 @@ -44,6 +50,12 @@ __wt_write(WT_SESSION_IMPL *session, "%s: write %" PRIu32 " bytes at offset %" PRIuMAX, fh->name, bytes, (uintmax_t)offset); + WT_ASSERT(session, /* Assert aligned I/O is aligned. */ + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + !((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1))); + if (pwrite(fh->fd, buf, (size_t)bytes, offset) != (ssize_t)bytes) WT_RET_MSG(session, __wt_errno(), "%s write error: failed to write %" PRIu32 |