summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2013-05-29 04:07:10 -0700
committerAlex Gorrod <alexg@wiredtiger.com>2013-05-29 04:07:10 -0700
commit2f7a8b3cf8db7304d105a1bf2034874c1901d66e (patch)
tree8c21306902309c4ffd43388fca0973ee2f5a0d04
parent5ceb0ac461912f33a7dc0b7bed9bfeb6346588b2 (diff)
parent663eeb52fd4aba7a9f0a94248c254269e2debe36 (diff)
downloadmongo-2f7a8b3cf8db7304d105a1bf2034874c1901d66e.tar.gz
Merge pull request #538 from wiredtiger/warm-cache-compress
Preload internal pages into file system cache with compression.
-rw-r--r--dist/stat_data.py1
-rw-r--r--src/block/block_mgr.c2
-rw-r--r--src/block/block_read.c51
-rw-r--r--src/btree/bt_handle.c42
-rw-r--r--src/include/block.h1
-rw-r--r--src/include/extern.h4
-rw-r--r--src/include/stat.h1
-rw-r--r--src/include/wiredtiger.in104
-rw-r--r--src/support/stat.c2
9 files changed, 127 insertions, 81 deletions
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 2fc0373a6a3..be5efbe1caa 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -47,6 +47,7 @@ connection_stats = [
Stat('block_byte_read', 'bytes read by the block manager'),
Stat('block_byte_write', 'bytes written by the block manager'),
Stat('block_map_read', 'mapped blocks read by the block manager'),
+ Stat('block_preload', 'blocks pre-loaded by the block manager'),
Stat('block_read', 'blocks read by the block manager'),
Stat('block_write', 'blocks written by the block manager'),
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index f91a3079c3c..46bc223f70c 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -328,6 +328,7 @@ __bm_method_set(WT_BM *bm, int readonly)
(WT_BM *, WT_SESSION_IMPL *, int, int *))__bm_readonly;
bm->free = (int (*)(WT_BM *,
WT_SESSION_IMPL *, const uint8_t *, uint32_t))__bm_readonly;
+ bm->preload = __wt_bm_preload;
bm->read = __wt_bm_read;
bm->salvage_end = (int (*)
(WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
@@ -358,6 +359,7 @@ __bm_method_set(WT_BM *bm, int readonly)
bm->compact_page_skip = __bm_compact_page_skip;
bm->compact_skip = __bm_compact_skip;
bm->free = __bm_free;
+ bm->preload = __wt_bm_preload;
bm->read = __wt_bm_read;
bm->salvage_end = __bm_salvage_end;
bm->salvage_next = __bm_salvage_next;
diff --git a/src/block/block_read.c b/src/block/block_read.c
index bfb39dc567d..7e50435aac6 100644
--- a/src/block/block_read.c
+++ b/src/block/block_read.c
@@ -8,6 +8,52 @@
#include "wt_internal.h"
/*
+ * __wt_bm_preload --
+ * Pre-load a page.
+ */
+int
+__wt_bm_preload(WT_BM *bm,
+ WT_SESSION_IMPL *session, const uint8_t *addr, uint32_t addr_size)
+{
+ WT_BLOCK *block;
+ WT_DECL_RET;
+ off_t offset;
+ uint32_t cksum, size;
+ int mapped;
+
+ WT_UNUSED(addr_size);
+ block = bm->block;
+
+ /* Crack the cookie. */
+ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
+
+ /* Check for a mapped block. */
+ mapped = bm->map != NULL && offset + size <= (off_t)bm->maplen;
+ if (mapped)
+ WT_RET(__wt_mmap_preload(
+ session, (uint8_t *)bm->map + offset, size));
+ else {
+#ifdef HAVE_POSIX_FADVISE
+ if ((ret = posix_fadvise(block->fh->fd,
+ (off_t)offset, (off_t)size, POSIX_FADV_WILLNEED)) != 0)
+ WT_RET_MSG(
+ session, ret, "%s: posix_fadvise", block->name);
+#else
+ WT_DECL_ITEM(tmp);
+ WT_RET(__wt_scr_alloc(session, size, &tmp));
+ ret = __wt_block_read_off(
+ session, block, tmp, offset, size, cksum);
+ __wt_scr_free(&tmp);
+ WT_RET(ret);
+#endif
+ }
+
+ WT_CSTAT_INCR(session, block_preload);
+
+ return (0);
+}
+
+/*
* __wt_bm_read --
* Map or read address cookie referenced block into a buffer.
*/
@@ -16,9 +62,9 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
WT_ITEM *buf, const uint8_t *addr, uint32_t addr_size)
{
WT_BLOCK *block;
- off_t offset;
- uint32_t size, cksum;
int mapped;
+ off_t offset;
+ uint32_t cksum, size;
WT_UNUSED(addr_size);
block = bm->block;
@@ -48,7 +94,6 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
buf->data = buf->mem;
buf->size = size;
F_SET(buf, WT_ITEM_MAPPED);
-
WT_RET(__wt_mmap_preload(session, buf->mem, buf->size));
WT_CSTAT_INCR(session, block_map_read);
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 6ba3ec198f1..f8f58221c18 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -10,8 +10,8 @@
static int __btree_conf(WT_SESSION_IMPL *, WT_CKPT *ckpt);
static int __btree_get_last_recno(WT_SESSION_IMPL *);
static int __btree_page_sizes(WT_SESSION_IMPL *);
+static int __btree_preload(WT_SESSION_IMPL *);
static int __btree_tree_open_empty(WT_SESSION_IMPL *, int);
-static int __btree_warm_cache(WT_SESSION_IMPL *);
static int pse1(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t);
static int pse2(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t, uint32_t);
@@ -108,7 +108,7 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
session, root_addr, root_addr_size));
/* Warm the cache, if possible. */
- WT_ERR(__btree_warm_cache(session));
+ WT_ERR(__btree_preload(session));
/* Get the last record number in a column-store file. */
if (btree->type != BTREE_ROW)
@@ -490,39 +490,27 @@ __wt_btree_evictable(WT_SESSION_IMPL *session, int on)
}
/*
- * __btree_warm_cache --
- * Pre-load internal pages from a checkpoint.
+ * __btree_preload --
+ * Pre-load internal pages.
*/
static int
-__btree_warm_cache(WT_SESSION_IMPL *session)
+__btree_preload(WT_SESSION_IMPL *session)
{
- WT_BTREE *btree;
WT_BM *bm;
- WT_PAGE *page;
+ WT_BTREE *btree;
+ WT_REF *ref;
+ uint32_t addr_size, i;
+ const uint8_t *addr;
btree = S2BT(session);
bm = btree->bm;
- if (bm->map == NULL || btree->compressor != NULL)
- return (0);
-
- /*
- * If the file is memory mapped, find the first leaf page. Assuming
- * the file was created with a bulk load, the internal pages will be at
- * the end of the file, starting with the parent of the left-most
- * child, ending with the root.
- */
- page = NULL;
- WT_RET(__wt_tree_walk(session, &page, 0));
- if (page == NULL)
- return (WT_NOTFOUND);
-
- if (page->parent->dsk < btree->root_page->dsk)
- WT_RET(__wt_mmap_preload(
- session, page->parent->dsk,
- WT_PTRDIFF(btree->root_page->dsk, page->parent->dsk)));
-
- return (__wt_page_release(session, page));
+ /* Pre-load the second-level internal pages. */
+ WT_REF_FOREACH(btree->root_page, ref, i) {
+ __wt_get_addr(btree->root_page, ref, &addr, &addr_size);
+ WT_RET(bm->preload(bm, session, addr, addr_size));
+ }
+ return (0);
}
/*
diff --git a/src/include/block.h b/src/include/block.h
index e41064d7134..277f6e871c4 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -151,6 +151,7 @@ struct __wt_bm {
(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, uint32_t, int *);
int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, int, int *);
int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, uint32_t);
+ int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, uint32_t);
int (*read)
(WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, uint32_t);
int (*salvage_end)(WT_BM *, WT_SESSION_IMPL *);
diff --git a/src/include/extern.h b/src/include/extern.h
index 656173e6e06..244cbc22ad5 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -149,6 +149,10 @@ extern int __wt_desc_init(WT_SESSION_IMPL *session,
extern void __wt_block_stat(WT_SESSION_IMPL *session,
WT_BLOCK *block,
WT_DSRC_STATS *stats);
+extern int __wt_bm_preload(WT_BM *bm,
+ WT_SESSION_IMPL *session,
+ const uint8_t *addr,
+ uint32_t addr_size);
extern int __wt_bm_read(WT_BM *bm,
WT_SESSION_IMPL *session,
WT_ITEM *buf,
diff --git a/src/include/stat.h b/src/include/stat.h
index e6e598f5b19..83083874319 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -200,6 +200,7 @@ struct __wt_connection_stats {
WT_STATS block_byte_read;
WT_STATS block_byte_write;
WT_STATS block_map_read;
+ WT_STATS block_preload;
WT_STATS block_read;
WT_STATS block_write;
WT_STATS cache_bytes_dirty;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index fbe181b0d5c..2005d208d44 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -2252,108 +2252,110 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_BLOCK_BYTE_WRITE 2
/*! mapped blocks read by the block manager */
#define WT_STAT_CONN_BLOCK_MAP_READ 3
+/*! blocks pre-loaded by the block manager */
+#define WT_STAT_CONN_BLOCK_PRELOAD 4
/*! blocks read by the block manager */
-#define WT_STAT_CONN_BLOCK_READ 4
+#define WT_STAT_CONN_BLOCK_READ 5
/*! blocks written by the block manager */
-#define WT_STAT_CONN_BLOCK_WRITE 5
+#define WT_STAT_CONN_BLOCK_WRITE 6
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 6
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 7
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 7
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 8
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 8
+#define WT_STAT_CONN_CACHE_BYTES_MAX 9
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 9
+#define WT_STAT_CONN_CACHE_BYTES_READ 10
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 10
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 11
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 11
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 12
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 12
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 13
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 13
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 14
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 14
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 15
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 15
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 16
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 16
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 17
/*! cache: internal page merge operations completed */
-#define WT_STAT_CONN_CACHE_EVICTION_MERGE 17
+#define WT_STAT_CONN_CACHE_EVICTION_MERGE 18
/*! cache: internal page merge attempts that could not complete */
-#define WT_STAT_CONN_CACHE_EVICTION_MERGE_FAIL 18
+#define WT_STAT_CONN_CACHE_EVICTION_MERGE_FAIL 19
/*! cache: internal levels merged */
-#define WT_STAT_CONN_CACHE_EVICTION_MERGE_LEVELS 19
+#define WT_STAT_CONN_CACHE_EVICTION_MERGE_LEVELS 20
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 20
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 21
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 21
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 22
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 22
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 23
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 23
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 24
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 24
+#define WT_STAT_CONN_CACHE_READ 25
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 25
+#define WT_STAT_CONN_CACHE_WRITE 26
/*! pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 26
+#define WT_STAT_CONN_COND_WAIT 27
/*! cursor creation */
-#define WT_STAT_CONN_CURSOR_CREATE 27
+#define WT_STAT_CONN_CURSOR_CREATE 28
/*! Btree cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 28
+#define WT_STAT_CONN_CURSOR_INSERT 29
/*! Btree cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 29
+#define WT_STAT_CONN_CURSOR_NEXT 30
/*! Btree cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 30
+#define WT_STAT_CONN_CURSOR_PREV 31
/*! Btree cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 31
+#define WT_STAT_CONN_CURSOR_REMOVE 32
/*! Btree cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 32
+#define WT_STAT_CONN_CURSOR_RESET 33
/*! Btree cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 33
+#define WT_STAT_CONN_CURSOR_SEARCH 34
/*! Btree cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 34
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 35
/*! Btree cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 35
+#define WT_STAT_CONN_CURSOR_UPDATE 36
/*! files currently open */
-#define WT_STAT_CONN_FILE_OPEN 36
+#define WT_STAT_CONN_FILE_OPEN 37
/*! rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 37
+#define WT_STAT_CONN_LSM_ROWS_MERGED 38
/*! total heap memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 38
+#define WT_STAT_CONN_MEMORY_ALLOCATION 39
/*! total heap memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 39
+#define WT_STAT_CONN_MEMORY_FREE 40
/*! total heap memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 40
+#define WT_STAT_CONN_MEMORY_GROW 41
/*! total read I/Os */
-#define WT_STAT_CONN_READ_IO 41
+#define WT_STAT_CONN_READ_IO 42
/*! page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 42
+#define WT_STAT_CONN_REC_PAGES 43
/*! page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 43
+#define WT_STAT_CONN_REC_PAGES_EVICTION 44
/*! reconciliation failed because an update could not be included */
-#define WT_STAT_CONN_REC_SKIPPED_UPDATE 44
+#define WT_STAT_CONN_REC_SKIPPED_UPDATE 45
/*! pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 45
+#define WT_STAT_CONN_RWLOCK_READ 46
/*! pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 46
+#define WT_STAT_CONN_RWLOCK_WRITE 47
/*! open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 47
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 48
/*! ancient transactions */
-#define WT_STAT_CONN_TXN_ANCIENT 48
+#define WT_STAT_CONN_TXN_ANCIENT 49
/*! transactions */
-#define WT_STAT_CONN_TXN_BEGIN 49
+#define WT_STAT_CONN_TXN_BEGIN 50
/*! transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 50
+#define WT_STAT_CONN_TXN_CHECKPOINT 51
/*! transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 51
+#define WT_STAT_CONN_TXN_COMMIT 52
/*! transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 52
+#define WT_STAT_CONN_TXN_FAIL_CACHE 53
/*! transactions rolled-back */
-#define WT_STAT_CONN_TXN_ROLLBACK 53
+#define WT_STAT_CONN_TXN_ROLLBACK 54
/*! total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 54
+#define WT_STAT_CONN_WRITE_IO 55
/*!
* @}
diff --git a/src/support/stat.c b/src/support/stat.c
index d62ec8dfbaf..a3ba49155ed 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -296,6 +296,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->block_byte_read.desc = "bytes read by the block manager";
stats->block_byte_write.desc = "bytes written by the block manager";
stats->block_map_read.desc = "mapped blocks read by the block manager";
+ stats->block_preload.desc = "blocks pre-loaded by the block manager";
stats->block_read.desc = "blocks read by the block manager";
stats->block_write.desc = "blocks written by the block manager";
stats->cache_bytes_dirty.desc =
@@ -373,6 +374,7 @@ __wt_stat_clear_connection_stats(void *stats_arg)
stats->block_byte_read.v = 0;
stats->block_byte_write.v = 0;
stats->block_map_read.v = 0;
+ stats->block_preload.v = 0;
stats->block_read.v = 0;
stats->block_write.v = 0;
stats->cache_bytes_dirty.v = 0;