diff options
Diffstat (limited to 'src/include')
51 files changed, 449 insertions, 256 deletions
diff --git a/src/include/api.h b/src/include/api.h index 4821b450f9e..c6a5af40698 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/async.h b/src/include/async.h index fb9a64e774d..7a415a4a17a 100644 --- a/src/include/async.h +++ b/src/include/async.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/bitstring.i b/src/include/bitstring.i index 5449ffe6209..0d30e55d1ef 100644 --- a/src/include/bitstring.i +++ b/src/include/bitstring.i @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/include/block.h b/src/include/block.h index 4bff6c82783..27a140b73a4 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -173,6 +173,7 @@ struct __wt_bm { int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); + bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*read) (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t); @@ -182,6 +183,7 @@ struct __wt_bm { int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *); int (*salvage_valid) (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool); + int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *); int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats); int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool); int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); @@ -244,7 +246,10 @@ struct __wt_block { bool ckpt_inprogress;/* Live checkpoint in progress */ /* Compaction support */ - int compact_pct_tenths; /* Percent to compact */ + int compact_pct_tenths; /* Percent to compact */ + uint64_t compact_pages_reviewed;/* Pages reviewed */ + uint64_t compact_pages_skipped; /* Pages skipped */ + uint64_t compact_pages_written; /* Pages rewritten */ /* Salvage support */ wt_off_t slvg_off; /* Salvage file offset */ diff --git a/src/include/bloom.h b/src/include/bloom.h index a673ee9add2..ddc2d64a118 100644 --- a/src/include/bloom.h +++ b/src/include/bloom.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/btmem.h b/src/include/btmem.h index 6ee74c61a38..cfbd87f0cae 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -305,7 +305,7 @@ struct __wt_page_modify { struct { /* * Appended items to column-stores: there is only a single one - * of these per column-store tree. + * of these active at a time per column-store tree. */ WT_INSERT_HEAD **append; @@ -319,9 +319,18 @@ struct __wt_page_modify { * huge. */ WT_INSERT_HEAD **update; + + /* + * Split-saved last column-store page record. If a column-store + * page is split, we save the first record number moved so that + * during reconciliation we know the page's last record and can + * write any implicitly created deleted records for the page. + */ + uint64_t split_recno; } leaf; #define mod_append u2.leaf.append #define mod_update u2.leaf.update +#define mod_split_recno u2.leaf.split_recno } u2; /* @@ -478,7 +487,7 @@ struct __wt_page { #define pg_row_ins u.row.ins #undef pg_row_upd #define pg_row_upd u.row.upd -#define pg_row_entries u.row.entries +#undef pg_row_entries #define pg_row_entries u.row.entries /* Fixed-length column-store leaf page. */ @@ -544,8 +553,8 @@ struct __wt_page { #define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */ #define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */ #define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */ -#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */ -#define WT_PAGE_SPLIT_BLOCK 0x40 /* Split blocking eviction and splits */ +#define WT_PAGE_SPLIT_BLOCK 0x20 /* Split blocking eviction and splits */ +#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */ #define WT_PAGE_UPDATE_IGNORE 0x80 /* Ignore updates on page discard */ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ @@ -1049,7 +1058,7 @@ struct __wt_insert_head { uint64_t __prev_split_gen = (session)->split_gen; \ if (__prev_split_gen == 0) \ do { \ - WT_PUBLISH((session)->split_gen, \ + WT_PUBLISH((session)->split_gen, \ S2C(session)->split_gen); \ } while ((session)->split_gen != S2C(session)->split_gen) diff --git a/src/include/btree.h b/src/include/btree.h index a1d8e395cfc..703de0f2fc6 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -153,16 +153,18 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */ #define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */ #define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */ -#define WT_BTREE_SALVAGE 0x04000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x08000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x10000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x20000 /* Handle is for verify */ +#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */ uint32_t flags; }; /* Flags that make a btree handle special (not for normal use). */ #define WT_BTREE_SPECIAL_FLAGS \ - (WT_BTREE_BULK | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) + (WT_BTREE_BULK | WT_BTREE_REBALANCE | \ + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY) /* * WT_SALVAGE_COOKIE -- diff --git a/src/include/btree.i b/src/include/btree.i index 23e0dfea2cd..94111397abd 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -1046,15 +1046,16 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * do it without making the appending threads wait. See if it's worth * doing a split to let the threads continue before doing eviction. * - * Ignore anything other than large, dirty row-store leaf pages. The - * split code only supports row-store pages, and we depend on the page - * being dirty for correctness (the page must be reconciled again + * Ignore anything other than large, dirty leaf pages. We depend on the + * page being dirty for correctness (the page must be reconciled again * before being evicted after the split, information from a previous * reconciliation will be wrong, so we can't evict immediately). */ - if (page->type != WT_PAGE_ROW_LEAF || - page->memory_footprint < btree->splitmempage || - !__wt_page_is_modified(page)) + if (page->memory_footprint < btree->splitmempage) + return (false); + if (WT_PAGE_IS_INTERNAL(page)) + return (false); + if (!__wt_page_is_modified(page)) return (false); /* @@ -1071,9 +1072,11 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) #define WT_MIN_SPLIT_COUNT 30 #define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */ - ins_head = page->pg_row_entries == 0 ? + ins_head = page->type == WT_PAGE_ROW_LEAF ? + (page->pg_row_entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : - WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); + WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1)) : + WT_COL_APPEND(page); if (ins_head == NULL) return (false); for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; @@ -1280,8 +1283,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * coupling up/down the tree. */ static inline int -__wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, - WT_REF *want, uint32_t flags +__wt_page_swap_func( + WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif @@ -1310,20 +1313,40 @@ __wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, #endif ); - /* Expected failures: page not found or restart. */ - if (ret == WT_NOTFOUND || ret == WT_RESTART) - return (ret); + /* + * Expected failures: page not found or restart. Our callers list the + * errors they're expecting to handle. + */ + if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) + return (WT_NOTFOUND); + if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) + return (WT_RESTART); - /* Discard the original held page. */ + /* Discard the original held page on either success or error. */ acquired = ret == 0; WT_TRET(__wt_page_release(session, held, flags)); + /* Fast-path expected success. */ + if (ret == 0) + return (0); + /* - * If there was an error discarding the original held page, discard - * the acquired page too, keeping it is never useful. + * If there was an error at any point that our caller isn't prepared to + * handle, discard any page we acquired. */ - if (acquired && ret != 0) + if (acquired) WT_TRET(__wt_page_release(session, want, flags)); + + /* + * If we're returning an error, don't let it be one our caller expects + * to handle as returned by page-in: the expectation includes the held + * page not having been released, and that's not the case. + */ + if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND) + return (EINVAL); + if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART) + return (EINVAL); + return (ret); } @@ -1437,17 +1460,54 @@ __wt_split_intl_race( * * There's a page-split race when we walk the tree: if we're splitting * an internal page into its parent, we update the parent's page index - * and then update the page being split, and it's not an atomic update. - * A thread could read the parent page's original page index, and then - * read the page's replacement index. Because internal page splits work - * by replacing the original page with the initial part of the original - * page, the result of this race is we will have a key that's past the - * end of the current page, and the parent's page index will have moved. + * before updating the split page's page index, and it's not an atomic + * update. A thread can read the parent page's original page index and + * then read the split page's replacement index. + * + * Because internal page splits work by truncating the original page to + * the initial part of the original page, the result of this race is we + * will have a search key that points past the end of the current page. + * This is only an issue when we search past the end of the page, if we + * find a WT_REF in the page with the namespace we're searching for, we + * don't care if the WT_REF moved or not while we were searching, we + * have the correct page. + * + * For example, imagine an internal page with 3 child pages, with the + * namespaces a-f, g-h and i-j; the first child page splits. The parent + * starts out with the following page-index: + * + * | ... | a | g | i | ... | + * + * which changes to this: + * + * | ... | a | c | e | g | i | ... | + * + * The child starts out with the following page-index: + * + * | a | b | c | d | e | f | + * + * which changes to this: + * + * | a | b | + * + * The thread searches the original parent page index for the key "cat", + * it couples to the "a" child page; if it uses the replacement child + * page index, it will search past the end of the page and couple to the + * "b" page, which is wrong. + * + * To detect the problem, we remember the parent page's page index used + * to descend the tree. Whenever we search past the end of a page, we + * check to see if the parent's page index has changed since our use of + * it during descent. As the problem only appears if we read the split + * page's replacement index, the parent page's index must already have + * changed, ensuring we detect the problem. * - * It's also possible a thread could read the parent page's replacement - * page index, and then read the page's original index. Because internal - * splits work by truncating the original page, the original page's old - * content is compatible, this isn't a problem and we ignore this race. + * It's possible for the opposite race to happen (a thread could read + * the parent page's replacement page index and then read the split + * page's original index). This isn't a problem because internal splits + * work by truncating the split page, so the split page search is for + * content the split page retains after the split, and we ignore this + * race. */ WT_INTL_INDEX_GET(session, parent, pindex); return (pindex != saved_pindex); diff --git a/src/include/btree_cmp.i b/src/include/btree_cmp.i index 8a7fe19a22f..1993c1be293 100644 --- a/src/include/btree_cmp.i +++ b/src/include/btree_cmp.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/buf.i b/src/include/buf.i index b8849396f01..95d945ec6d3 100644 --- a/src/include/buf.i +++ b/src/include/buf.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -92,18 +92,6 @@ __wt_buf_setstr(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *s) } /* - * __wt_buf_set_printable -- - * Set the contents of the buffer to a printable representation of a - * byte string. - */ -static inline int -__wt_buf_set_printable( - WT_SESSION_IMPL *session, WT_ITEM *buf, const void *from_arg, size_t size) -{ - return (__wt_raw_to_esc_hex(session, from_arg, size, buf)); -} - -/* * __wt_buf_free -- * Free a buffer. */ diff --git a/src/include/cache.h b/src/include/cache.h index a0440f23a00..a3961d6043e 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cache.i b/src/include/cache.i index 7cbd72853c3..ee13eee84c5 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cell.i b/src/include/cell.i index 1410b30bb82..481d2a29764 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/column.i b/src/include/column.i index fc1f372b2a9..9f3e2101f6f 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -176,6 +176,16 @@ __col_insert_search(WT_INSERT_HEAD *inshead, continue; } + /* + * When no exact match is found, the search returns the smallest + * key larger than the searched-for key, or the largest key + * smaller than the searched-for key, if there is no larger key. + * Our callers depend on that: specifically, the fixed-length + * column store cursor code interprets returning a key smaller + * than the searched-for key to mean the searched-for key is + * larger than any key on the page. Don't change that behavior, + * things will break. + */ ins_recno = WT_INSERT_RECNO(ret_ins); cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1; @@ -204,9 +214,9 @@ __col_var_last_recno(WT_PAGE *page) WT_COL_RLE *repeat; /* - * If there's an append list (the last page), then there may be more - * records on the page. This function ignores those records, so our - * callers have to handle that explicitly, if they care. + * If there's an append list, there may be more records on the page. + * This function ignores those records, our callers must handle that + * explicitly, if they care. */ if (page->pg_var_nrepeats == 0) return (page->pg_var_entries == 0 ? 0 : @@ -225,9 +235,9 @@ static inline uint64_t __col_fix_last_recno(WT_PAGE *page) { /* - * If there's an append list (the last page), then there may be more - * records on the page. This function ignores those records, so our - * callers have to handle that explicitly, if they care. + * If there's an append list, there may be more records on the page. + * This function ignores those records, our callers must handle that + * explicitly, if they care. */ return (page->pg_fix_entries == 0 ? 0 : page->pg_fix_recno + (page->pg_fix_entries - 1)); @@ -282,7 +292,17 @@ __col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop) start_recno = repeat->recno + repeat->rle; } - if (recno >= start_recno + (page->pg_var_entries - start_indx)) + /* + * !!! + * The test could be written more simply as: + * + * (recno >= start_recno + (page->pg_var_entries - start_indx)) + * + * It's split into two parts because the simpler test will overflow if + * searching for large record numbers. + */ + if (recno >= start_recno && + recno - start_recno >= page->pg_var_entries - start_indx) return (NULL); return (page->pg_var_d + start_indx + (uint32_t)(recno - start_recno)); diff --git a/src/include/compact.h b/src/include/compact.h index 0698bf7b1a4..2bba52e7173 100644 --- a/src/include/compact.h +++ b/src/include/compact.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/config.h b/src/include/config.h index e836abaccba..e63db0e76cf 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -72,25 +72,26 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_log_flush 20 #define WT_CONFIG_ENTRY_WT_SESSION_log_printf 21 #define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 22 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 23 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 24 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 25 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 26 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 27 -#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 28 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 29 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 30 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 31 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 32 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 33 -#define WT_CONFIG_ENTRY_colgroup_meta 34 -#define WT_CONFIG_ENTRY_file_meta 35 -#define WT_CONFIG_ENTRY_index_meta 36 -#define WT_CONFIG_ENTRY_table_meta 37 -#define WT_CONFIG_ENTRY_wiredtiger_open 38 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 39 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 40 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 41 +#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 23 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 24 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 25 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 26 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 27 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 28 +#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 29 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 30 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 31 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 32 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 34 +#define WT_CONFIG_ENTRY_colgroup_meta 35 +#define WT_CONFIG_ENTRY_file_meta 36 +#define WT_CONFIG_ENTRY_index_meta 37 +#define WT_CONFIG_ENTRY_table_meta 38 +#define WT_CONFIG_ENTRY_wiredtiger_open 39 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 40 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 41 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 42 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/include/connection.h b/src/include/connection.h index 2367f5a0035..5d61f9456b3 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -415,6 +415,7 @@ struct __wt_connection_impl { uint32_t direct_io; uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH type flags */ bool mmap; /* mmap configuration */ + int page_size; /* OS page size for mmap alignment */ uint32_t verbose; uint32_t flags; diff --git a/src/include/cursor.h b/src/include/cursor.h index 275e2f2db46..7f7b5dceb79 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -200,18 +200,23 @@ struct __wt_cursor_btree { uint8_t append_tree; /* Cursor appended to the tree */ +#ifdef HAVE_DIAGNOSTIC + /* Check that cursor next/prev never returns keys out-of-order. */ + WT_ITEM *lastkey, _lastkey; + uint64_t lastrecno; +#endif + #define WT_CBT_ACTIVE 0x01 /* Active in the tree */ #define WT_CBT_ITERATE_APPEND 0x02 /* Col-store: iterating append list */ #define WT_CBT_ITERATE_NEXT 0x04 /* Next iteration configuration */ #define WT_CBT_ITERATE_PREV 0x08 /* Prev iteration configuration */ -#define WT_CBT_MAX_RECORD 0x10 /* Col-store: past end-of-table */ -#define WT_CBT_NO_TXN 0x20 /* Non-transactional cursor +#define WT_CBT_NO_TXN 0x10 /* Non-transactional cursor (e.g. on a checkpoint) */ -#define WT_CBT_SEARCH_SMALLEST 0x40 /* Row-store: small-key insert list */ +#define WT_CBT_SEARCH_SMALLEST 0x20 /* Row-store: small-key insert list */ #define WT_CBT_POSITION_MASK /* Flags associated with position */ \ (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \ - WT_CBT_MAX_RECORD | WT_CBT_SEARCH_SMALLEST) + WT_CBT_SEARCH_SMALLEST) uint8_t flags; }; @@ -219,33 +224,32 @@ struct __wt_cursor_btree { struct __wt_cursor_bulk { WT_CURSOR_BTREE cbt; - WT_REF *ref; /* The leaf page */ - WT_PAGE *leaf; - /* * Variable-length column store compares values during bulk load as * part of RLE compression, row-store compares keys during bulk load * to avoid corruption. */ - WT_ITEM last; /* Last key/value seen */ + bool first_insert; /* First insert */ + WT_ITEM last; /* Last key/value inserted */ /* - * Variable-length column-store RLE counter (also overloaded to mean - * the first time through the bulk-load insert routine, when set to 0). + * Additional column-store bulk load support. */ - uint64_t rle; + uint64_t recno; /* Record number */ + uint64_t rle; /* Variable-length RLE counter */ /* - * Fixed-length column-store current entry in memory chunk count, and - * the maximum number of records per chunk. + * Additional fixed-length column store bitmap bulk load support: + * current entry in memory chunk count, and the maximum number of + * records per chunk. */ + bool bitmap; /* Bitmap bulk load */ uint32_t entry; /* Entry count */ uint32_t nrecs; /* Max records per chunk */ - /* Special bitmap bulk load for fixed-length column stores. */ - bool bitmap; - - void *reconcile; /* Reconciliation information */ + void *reconcile; /* Reconciliation support */ + WT_REF *ref; /* The leaf page */ + WT_PAGE *leaf; }; struct __wt_cursor_config { diff --git a/src/include/cursor.i b/src/include/cursor.i index 2e382591313..8ab96c0a69d 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -246,8 +246,12 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) session = (WT_SESSION_IMPL *)cbt->iface.session; - if (reenter) + if (reenter) { +#ifdef HAVE_DIAGNOSTIC + __wt_cursor_key_order_reset(cbt); +#endif WT_RET(__curfile_leave(cbt)); + } /* * Any old insert position is now invalid. We rely on this being diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 9a54b4ddb66..8b313428d06 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -33,6 +33,10 @@ (F_ISSET(dhandle, WT_DHANDLE_DEAD) || \ !F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN)) +/* The metadata cursor's data handle. */ +#define WT_SESSION_META_DHANDLE(s) \ + (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) + /* * WT_DATA_HANDLE -- * A handle for a generic named data source. diff --git a/src/include/dlh.h b/src/include/dlh.h index c374ec36fb0..9e49c2ff3cb 100644 --- a/src/include/dlh.h +++ b/src/include/dlh.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/error.h b/src/include/error.h index e721855ce7c..5f24d205af9 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/extern.h b/src/include/extern.h index d84403cc16d..b71f4b12486 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -14,6 +14,7 @@ extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live); extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci); +extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci); extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci); extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name); extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint); @@ -43,14 +44,15 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); extern int __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie); extern int __wt_block_unmap( WT_SESSION_IMPL *session, WT_BLOCK *block, void *map, size_t maplen, void **mappingcookie); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); -extern int __wt_block_manager_truncate( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); +extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp); extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); -extern int __wt_block_manager_size( WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats); +extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep); +extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep); extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset); @@ -89,6 +91,9 @@ extern int __wt_bloom_close(WT_BLOOM *bloom); extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config); extern int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp); +extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next); +extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); +extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt); extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt); extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating); extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating); @@ -129,7 +134,7 @@ extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]); extern int __wt_btree_close(WT_SESSION_IMPL *session); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); -extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep); +extern int __wt_btree_new_leaf_page( WT_SESSION_IMPL *session, uint64_t recno, WT_PAGE **pagep); extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); @@ -139,6 +144,7 @@ extern const char *__wt_page_type_string(u_int type); extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf); +extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf); extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store); extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack); extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell); @@ -151,6 +157,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags , const char *file, int line #endif ); +extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session); @@ -170,7 +177,7 @@ extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flag extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags); extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove); -extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); +extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key); extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate); @@ -192,7 +199,7 @@ extern int __wt_las_create(WT_SESSION_IMPL *session); extern int __wt_las_destroy(WT_SESSION_IMPL *session); extern void __wt_las_set_written(WT_SESSION_IMPL *session); extern bool __wt_las_is_written(WT_SESSION_IMPL *session); -extern int __wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); +extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags); extern int __wt_las_sweep(WT_SESSION_IMPL *session); @@ -255,7 +262,7 @@ extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]); -extern int __wt_log_wrlsn(WT_SESSION_IMPL *session); +extern int __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield); extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_logmgr_open(WT_SESSION_IMPL *session); extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session); @@ -333,9 +340,10 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full); +extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); +extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session); extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing); -extern int __wt_evict_page_clean_update( WT_SESSION_IMPL *session, WT_REF *ref, bool closing); extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start); extern int __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); @@ -362,23 +370,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep); extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value); extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep); -extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno); extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop); -extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop); extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp); -extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value); extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep); -extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key); extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp); -extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep); -extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); -extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out); +extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced); extern int __wt_log_slot_new(WT_SESSION_IMPL *session); @@ -447,12 +455,13 @@ extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_ses extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value); extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep); extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase); -extern int __wt_metadata_open(WT_SESSION_IMPL *session); -extern int __wt_metadata_cursor( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp); +extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp); +extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); +extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value); extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value); extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key); -extern int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep); +extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep); extern void __wt_meta_track_discard(WT_SESSION_IMPL *session); extern int __wt_meta_track_on(WT_SESSION_IMPL *session); extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll); @@ -468,7 +477,7 @@ extern int __wt_meta_track_init(WT_SESSION_IMPL *session); extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); -extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value); +extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); @@ -488,7 +497,7 @@ extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep); extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep); extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock); extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_directory_sync(WT_SESSION_IMPL *session, char *path); +extern int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path); extern int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); @@ -514,6 +523,7 @@ extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern int __wt_once(void (*init_routine)(void)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_get_vm_pagesize(void); extern bool __wt_absolute_path(const char *path); extern const char *__wt_path_separator(void); extern bool __wt_has_priv(void); @@ -558,8 +568,9 @@ extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize); extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); -extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); -extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); +extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); +extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); +extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret); extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep); extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf); @@ -606,6 +617,7 @@ extern int __wt_session_release_resources(WT_SESSION_IMPL *session); extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config); extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]); +extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp); @@ -639,8 +651,9 @@ extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_sync_and_rename_fh( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); -extern int __wt_sync_and_rename_fp( WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to); +extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); +extern int __wt_fh_sync_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); +extern int __wt_sync_fp_and_rename( WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to); extern int __wt_library_init(void); extern int __wt_breakpoint(void); extern void __wt_attach(WT_SESSION_IMPL *session); @@ -654,6 +667,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp ); extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page); extern void __wt_hazard_close(WT_SESSION_IMPL *session); +extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp); extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to); extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to); extern int __wt_hex2byte(const u_char *from, u_char *to); @@ -671,6 +685,7 @@ extern uint32_t __wt_log2_int(uint32_t n); extern bool __wt_ispo2(uint32_t v); extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2); extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state); +extern int __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state); extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))); @@ -732,7 +747,7 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp); extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session); -extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out); +extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags); extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval); diff --git a/src/include/flags.h b/src/include/flags.h index bafff92fbc0..24fae4abccd 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -39,15 +39,17 @@ #define WT_LOG_SYNC_ENABLED 0x00000010 #define WT_READ_CACHE 0x00000001 #define WT_READ_COMPACT 0x00000002 -#define WT_READ_NO_EMPTY 0x00000004 -#define WT_READ_NO_EVICT 0x00000008 -#define WT_READ_NO_GEN 0x00000010 -#define WT_READ_NO_WAIT 0x00000020 -#define WT_READ_PREV 0x00000040 -#define WT_READ_SKIP_INTL 0x00000080 -#define WT_READ_SKIP_LEAF 0x00000100 -#define WT_READ_TRUNCATE 0x00000200 -#define WT_READ_WONT_NEED 0x00000400 +#define WT_READ_NOTFOUND_OK 0x00000004 +#define WT_READ_NO_EMPTY 0x00000008 +#define WT_READ_NO_EVICT 0x00000010 +#define WT_READ_NO_GEN 0x00000020 +#define WT_READ_NO_WAIT 0x00000040 +#define WT_READ_PREV 0x00000080 +#define WT_READ_RESTART_OK 0x00000100 +#define WT_READ_SKIP_INTL 0x00000200 +#define WT_READ_SKIP_LEAF 0x00000400 +#define WT_READ_TRUNCATE 0x00000800 +#define WT_READ_WONT_NEED 0x00001000 #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_CLEAR_EVICT_WALK 0x00000002 #define WT_SESSION_INTERNAL 0x00000004 @@ -57,15 +59,16 @@ #define WT_SESSION_LOCKED_SLOT 0x00000040 #define WT_SESSION_LOCKED_TABLE 0x00000080 #define WT_SESSION_LOCKED_TURTLE 0x00000100 -#define WT_SESSION_LOGGING_INMEM 0x00000200 -#define WT_SESSION_LOOKASIDE_CURSOR 0x00000400 -#define WT_SESSION_NO_CACHE 0x00000800 -#define WT_SESSION_NO_DATA_HANDLES 0x00001000 -#define WT_SESSION_NO_EVICTION 0x00002000 -#define WT_SESSION_NO_LOGGING 0x00004000 -#define WT_SESSION_NO_SCHEMA_LOCK 0x00008000 -#define WT_SESSION_QUIET_CORRUPT_FILE 0x00010000 -#define WT_SESSION_SERVER_ASYNC 0x00020000 +#define WT_SESSION_LOCK_NO_WAIT 0x00000200 +#define WT_SESSION_LOGGING_INMEM 0x00000400 +#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800 +#define WT_SESSION_NO_CACHE 0x00001000 +#define WT_SESSION_NO_DATA_HANDLES 0x00002000 +#define WT_SESSION_NO_EVICTION 0x00004000 +#define WT_SESSION_NO_LOGGING 0x00008000 +#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000 +#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000 +#define WT_SESSION_SERVER_ASYNC 0x00040000 #define WT_TXN_LOG_CKPT_CLEANUP 0x00000001 #define WT_TXN_LOG_CKPT_PREPARE 0x00000002 #define WT_TXN_LOG_CKPT_START 0x00000004 @@ -85,16 +88,17 @@ #define WT_VERB_MUTEX 0x00000800 #define WT_VERB_OVERFLOW 0x00001000 #define WT_VERB_READ 0x00002000 -#define WT_VERB_RECONCILE 0x00004000 -#define WT_VERB_RECOVERY 0x00008000 -#define WT_VERB_SALVAGE 0x00010000 -#define WT_VERB_SHARED_CACHE 0x00020000 -#define WT_VERB_SPLIT 0x00040000 -#define WT_VERB_TEMPORARY 0x00080000 -#define WT_VERB_TRANSACTION 0x00100000 -#define WT_VERB_VERIFY 0x00200000 -#define WT_VERB_VERSION 0x00400000 -#define WT_VERB_WRITE 0x00800000 +#define WT_VERB_REBALANCE 0x00004000 +#define WT_VERB_RECONCILE 0x00008000 +#define WT_VERB_RECOVERY 0x00010000 +#define WT_VERB_SALVAGE 0x00020000 +#define WT_VERB_SHARED_CACHE 0x00040000 +#define WT_VERB_SPLIT 0x00080000 +#define WT_VERB_TEMPORARY 0x00100000 +#define WT_VERB_TRANSACTION 0x00200000 +#define WT_VERB_VERIFY 0x00400000 +#define WT_VERB_VERSION 0x00800000 +#define WT_VERB_WRITE 0x01000000 #define WT_VISIBILITY_ERR 0x00000010 /* * flags section: END diff --git a/src/include/gcc.h b/src/include/gcc.h index 01e33792d73..6ccc0de3c03 100644 --- a/src/include/gcc.h +++ b/src/include/gcc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -156,8 +156,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) #if defined(x86_64) || defined(__x86_64__) /* Pause instruction to prevent excess processor bus usage */ -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") - +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") #define WT_FULL_BARRIER() do { \ __asm__ volatile ("mfence" ::: "memory"); \ } while (0) @@ -169,7 +168,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) } while (0) #elif defined(i386) || defined(__i386__) -#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") +#define WT_PAUSE() __asm__ volatile("pause\n" ::: "memory") #define WT_FULL_BARRIER() do { \ __asm__ volatile ("lock; addl $0, 0(%%esp)" ::: "memory"); \ } while (0) @@ -177,23 +176,58 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new) #define WT_WRITE_BARRIER() WT_FULL_BARRIER() #elif defined(__PPC64__) || defined(PPC64) +/* ori 0,0,0 is the PPC64 noop instruction */ #define WT_PAUSE() __asm__ volatile("ori 0,0,0" ::: "memory") -#define WT_FULL_BARRIER() do { +#define WT_FULL_BARRIER() do { \ __asm__ volatile ("sync" ::: "memory"); \ } while (0) -#define WT_READ_BARRIER() WT_FULL_BARRIER() -#define WT_WRITE_BARRIER() WT_FULL_BARRIER() + +/* TODO: ISA 2.07 Elemental Memory Barriers would be better, + specifically mbll, and mbss, but they are not supported by POWER 8 */ +#define WT_READ_BARRIER() do { \ + __asm__ volatile ("lwsync" ::: "memory"); \ +} while (0) +#define WT_WRITE_BARRIER() do { \ + __asm__ volatile ("lwsync" ::: "memory"); \ +} while (0) #elif defined(__aarch64__) #define WT_PAUSE() __asm__ volatile("yield" ::: "memory") #define WT_FULL_BARRIER() do { \ - __asm__ volatile ("dsb sy" ::: "memory"); \ + __asm__ volatile ("dsb sy" ::: "memory"); \ +} while (0) +#define WT_READ_BARRIER() do { \ + __asm__ volatile ("dsb ld" ::: "memory"); \ +} while (0) +#define WT_WRITE_BARRIER() do { \ + __asm__ volatile ("dsb st" ::: "memory"); \ +} while (0) + +#elif defined(__s390x__) +#define WT_PAUSE() __asm__ volatile("lr 0,0" ::: "memory") +#define WT_FULL_BARRIER() do { \ + __asm__ volatile ("bcr 15,0\n" ::: "memory"); \ } while (0) +#define WT_READ_BARRIER() WT_FULL_BARRIER() +#define WT_WRITE_BARRIER() WT_FULL_BARRIER() + +#elif defined(__sparc__) +#define WT_PAUSE() __asm__ volatile("rd %%ccr, %%g0" ::: "memory") + +#define WT_FULL_BARRIER() do { \ + __asm__ volatile ("membar #StoreLoad" ::: "memory"); \ +} while (0) + +/* + * On UltraSparc machines, TSO is used, and so there is no need for membar. + * READ_BARRIER = #LoadLoad, and WRITE_BARRIER = #StoreStore are noop. + */ #define WT_READ_BARRIER() do { \ - __asm__ volatile ("dsb ld" ::: "memory"); \ + __asm__ volatile ("" ::: "memory"); \ } while (0) + #define WT_WRITE_BARRIER() do { \ - __asm__ volatile ("dsb st" ::: "memory"); \ + __asm__ volatile ("" ::: "memory"); \ } while (0) #else diff --git a/src/include/hardware.h b/src/include/hardware.h index 1ab2c3d39c4..93ed8a868b6 100644 --- a/src/include/hardware.h +++ b/src/include/hardware.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/intpack.i b/src/include/intpack.i index a13ad05451d..b27afd24e6c 100644 --- a/src/include/intpack.i +++ b/src/include/intpack.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/lint.h b/src/include/lint.h index f288fb98683..f8b17022968 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/log.h b/src/include/log.h index 521de567fc0..577f6a888a3 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -160,9 +160,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_logslot { #define WT_SLOT_INIT_FLAGS 0 -#define WT_WITH_SLOT_LOCK(session, log, op) do { \ +#define WT_WITH_SLOT_LOCK(session, log, ret, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ - WT_WITH_LOCK(session, \ + WT_WITH_LOCK(session, ret, \ &log->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ } while (0) @@ -267,6 +267,11 @@ struct __wt_log_desc { }; /* + * Flags for __wt_txn_op_printlog. + */ +#define WT_TXN_PRINTLOG_HEX 0x0001 /* Add hex output */ + +/* * WT_LOG_REC_DESC -- * A descriptor for a log record type. */ diff --git a/src/include/log.i b/src/include/log.i index ff309c31265..fcdbc72c388 100644 --- a/src/include/log.i +++ b/src/include/log.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/lsm.h b/src/include/lsm.h index d15dab3aa45..7cb3ccc895d 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/meta.h b/src/include/meta.h index 938101e9caa..e29ec4202dc 100644 --- a/src/include/meta.h +++ b/src/include/meta.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -43,9 +43,9 @@ * WT_WITH_TURTLE_LOCK -- * Acquire the turtle file lock, perform an operation, drop the lock. */ -#define WT_WITH_TURTLE_LOCK(session, op) do { \ +#define WT_WITH_TURTLE_LOCK(session, ret, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE));\ - WT_WITH_LOCK(session, \ + WT_WITH_LOCK(session, ret, \ &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \ } while (0) diff --git a/src/include/misc.h b/src/include/misc.h index e542baec642..78997661851 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -268,3 +268,6 @@ union __wt_rand_state { uint32_t w, z; } x; }; + +/* Shared array for converting to hex */ +extern const u_char __wt_hex[]; diff --git a/src/include/misc.i b/src/include/misc.i index 75068706b70..04376441340 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/msvc.h b/src/include/msvc.h index 8f5aa9abde8..99260a44875 100644 --- a/src/include/msvc.h +++ b/src/include/msvc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/mutex.h b/src/include/mutex.h index b67e5e610e8..f798bfb3ece 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/mutex.i b/src/include/mutex.i index 7eb042dd79f..52250f84ab3 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os.h b/src/include/os.h index d135fd9eb1f..fbba7f05f88 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os_windows.h b/src/include/os_windows.h index de97143335f..65938ac9f17 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/packing.i b/src/include/packing.i index 9be38251703..784a55ef2ae 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -25,7 +25,8 @@ typedef struct { char type; } WT_PACK_VALUE; -#define WT_PACK_VALUE_INIT { { 0 }, 0, 0, 0 } +/* Default to size = 1 if there is no size prefix. */ +#define WT_PACK_VALUE_INIT { { 0 }, 1, 0, 0 } #define WT_DECL_PACK_VALUE(pv) WT_PACK_VALUE pv = WT_PACK_VALUE_INIT typedef struct { @@ -151,7 +152,14 @@ next: if (pack->cur == pack->end) switch (pv->type) { case 'S': + return (0); case 's': + if (pv->size < 1) + WT_RET_MSG(pack->session, EINVAL, + "Fixed length strings must be at least 1 byte " + "in format '%.*s'", + (int)(pack->end - pack->orig), pack->orig); + return (0); case 'x': return (0); case 't': @@ -266,9 +274,10 @@ __pack_size(WT_SESSION_IMPL *session, WT_PACK_VALUE *pv) return (s); case 's': case 'S': - if (pv->type == 's' || pv->havesize) + if (pv->type == 's' || pv->havesize) { s = pv->size; - else + WT_ASSERT(session, s != 0); + } else s = strlen(pv->u.s) + 1; return (s); case 'U': @@ -460,9 +469,10 @@ __unpack_read(WT_SESSION_IMPL *session, break; case 's': case 'S': - if (pv->type == 's' || pv->havesize) + if (pv->type == 's' || pv->havesize) { s = pv->size; - else + WT_ASSERT(session, s != 0); + } else s = strlen((const char *)*pp) + 1; if (s > 0) pv->u.s = (const char *)*pp; @@ -667,7 +677,6 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session, if (fmt[0] != '\0' && fmt[1] == '\0') { pv.type = fmt[0]; - pv.size = 1; if ((ret = __unpack_read(session, &pv, &p, size)) == 0) WT_UNPACK_PUT(session, pv, ap); return (0); diff --git a/src/include/posix.h b/src/include/posix.h index 1aa629c98e7..2593c7b6797 100644 --- a/src/include/posix.h +++ b/src/include/posix.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/schema.h b/src/include/schema.h index 023fd398f1c..88a3a39f8b3 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -82,9 +82,17 @@ struct __wt_table { * WT_WITH_LOCK -- * Acquire a lock, perform an operation, drop the lock. */ -#define WT_WITH_LOCK(session, lock, flag, op) do { \ +#define WT_WITH_LOCK(session, ret, lock, flag, op) do { \ + ret = 0; \ if (F_ISSET(session, (flag))) { \ op; \ + } else if (F_ISSET(session, WT_SESSION_LOCK_NO_WAIT)) { \ + if ((ret = __wt_spin_trylock(session, (lock))) == 0) { \ + F_SET(session, (flag)); \ + op; \ + F_CLR(session, (flag)); \ + __wt_spin_unlock(session, (lock)); \ + } \ } else { \ __wt_spin_lock(session, (lock)); \ F_SET(session, (flag)); \ @@ -98,16 +106,16 @@ struct __wt_table { * WT_WITH_CHECKPOINT_LOCK -- * Acquire the checkpoint lock, perform an operation, drop the lock. */ -#define WT_WITH_CHECKPOINT_LOCK(session, op) \ - WT_WITH_LOCK(session, \ +#define WT_WITH_CHECKPOINT_LOCK(session, ret, op) \ + WT_WITH_LOCK(session, ret, \ &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) /* * WT_WITH_HANDLE_LIST_LOCK -- * Acquire the data handle list lock, perform an operation, drop the lock. */ -#define WT_WITH_HANDLE_LIST_LOCK(session, op) \ - WT_WITH_LOCK(session, \ +#define WT_WITH_HANDLE_LIST_LOCK(session, ret, op) \ + WT_WITH_LOCK(session, ret, \ &S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op) /* * WT_WITH_SCHEMA_LOCK -- @@ -115,12 +123,12 @@ struct __wt_table { * Check that we are not already holding some other lock: the schema lock * must be taken first. */ -#define WT_WITH_SCHEMA_LOCK(session, op) do { \ +#define WT_WITH_SCHEMA_LOCK(session, ret, op) do { \ WT_ASSERT(session, \ F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST | \ WT_SESSION_NO_SCHEMA_LOCK | WT_SESSION_LOCKED_TABLE)); \ - WT_WITH_LOCK(session, \ + WT_WITH_LOCK(session, ret, \ &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \ } while (0) @@ -128,11 +136,11 @@ struct __wt_table { * WT_WITH_TABLE_LOCK -- * Acquire the table lock, perform an operation, drop the lock. */ -#define WT_WITH_TABLE_LOCK(session, op) do { \ +#define WT_WITH_TABLE_LOCK(session, ret, op) do { \ WT_ASSERT(session, \ F_ISSET(session, WT_SESSION_LOCKED_TABLE) || \ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - WT_WITH_LOCK(session, \ + WT_WITH_LOCK(session, ret, \ &S2C(session)->table_lock, WT_SESSION_LOCKED_TABLE, op); \ } while (0) diff --git a/src/include/serial.i b/src/include/serial.i index ca22ce12d81..fa920de7e37 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/session.h b/src/include/session.h index 5c3bcfb8ed0..5c3291230b4 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -74,19 +74,22 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { TAILQ_HEAD(__cursors, __wt_cursor) cursors; WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ - WT_COMPACT *compact; /* Compact state */ + + WT_COMPACT *compact; /* Compaction information */ + enum { WT_COMPACT_NONE=0, + WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state; /* * Lookaside table cursor, sweep and eviction worker threads only. */ WT_CURSOR *las_cursor; /* Lookaside table cursor */ - WT_DATA_HANDLE *meta_dhandle; /* Metadata file */ - void *meta_track; /* Metadata operation tracking */ - void *meta_track_next; /* Current position */ - void *meta_track_sub; /* Child transaction / save point */ - size_t meta_track_alloc; /* Currently allocated */ - int meta_track_nest; /* Nesting level of meta transaction */ + WT_CURSOR *meta_cursor; /* Metadata file */ + void *meta_track; /* Metadata operation tracking */ + void *meta_track_next; /* Current position */ + void *meta_track_sub; /* Child transaction / save point */ + size_t meta_track_alloc; /* Currently allocated */ + int meta_track_nest; /* Nesting level of meta transaction */ #define WT_META_TRACKING(session) (session->meta_track_next != NULL) /* @@ -134,8 +137,6 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl { void *reconcile; /* Reconciliation support */ int (*reconcile_cleanup)(WT_SESSION_IMPL *); - bool compaction; /* Compaction did some work */ - uint32_t flags; /* diff --git a/src/include/stat.h b/src/include/stat.h index dfe7ee5c6cd..51d2fa332e7 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -139,8 +139,8 @@ __wt_stats_clear(void *stats_arg, int slot) */ #define WT_STAT_READ(stats, fld) \ __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld)) -#define WT_STAT_WRITE(session, stats, fld) \ - ((stats)[WT_STATS_SLOT_ID(session)]->fld); +#define WT_STAT_WRITE(stats, fld, v) \ + (stats)->fld = (int64_t)(v) #define WT_STAT_DECRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) diff --git a/src/include/txn.h b/src/include/txn.h index 936164fa9a7..1e82e2d982a 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/txn.i b/src/include/txn.i index 1005d4a395d..46f2ff3e5f1 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -185,9 +185,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) if (id == WT_TXN_ABORTED) return (false); - /* - * Read-uncommitted transactions see all other changes. - */ + /* Read-uncommitted transactions see all other changes. */ if (txn->isolation == WT_ISO_READ_UNCOMMITTED) return (true); diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 6a97def12be..477b9b7c134 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index bdd8bb65910..676f95d9b05 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -566,20 +566,21 @@ struct __wt_cursor { */ const char *internal_uri; -#define WT_CURSTD_APPEND 0x0001 -#define WT_CURSTD_BULK 0x0002 -#define WT_CURSTD_DUMP_HEX 0x0004 -#define WT_CURSTD_DUMP_JSON 0x0008 -#define WT_CURSTD_DUMP_PRINT 0x0010 -#define WT_CURSTD_KEY_EXT 0x0020 /* Key points out of the tree. */ -#define WT_CURSTD_KEY_INT 0x0040 /* Key points into the tree. */ +#define WT_CURSTD_APPEND 0x00001 +#define WT_CURSTD_BULK 0x00002 +#define WT_CURSTD_DUMP_HEX 0x00004 +#define WT_CURSTD_DUMP_JSON 0x00008 +#define WT_CURSTD_DUMP_PRINT 0x00010 +#define WT_CURSTD_JOINED 0x00020 +#define WT_CURSTD_KEY_EXT 0x00040 /* Key points out of the tree. */ +#define WT_CURSTD_KEY_INT 0x00080 /* Key points into the tree. */ #define WT_CURSTD_KEY_SET (WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT) -#define WT_CURSTD_JOINED 0x0080 -#define WT_CURSTD_OPEN 0x0100 -#define WT_CURSTD_OVERWRITE 0x0200 -#define WT_CURSTD_RAW 0x0400 -#define WT_CURSTD_VALUE_EXT 0x0800 /* Value points out of the tree. */ -#define WT_CURSTD_VALUE_INT 0x1000 /* Value points into the tree. */ +#define WT_CURSTD_META_INUSE 0x00100 +#define WT_CURSTD_OPEN 0x00200 +#define WT_CURSTD_OVERWRITE 0x00400 +#define WT_CURSTD_RAW 0x00800 +#define WT_CURSTD_VALUE_EXT 0x01000 /* Value points out of the tree. */ +#define WT_CURSTD_VALUE_INT 0x02000 /* Value points into the tree. */ #define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT) uint32_t flags; #endif @@ -1236,6 +1237,9 @@ struct __wt_session { * @configstart{WT_SESSION.drop, see dist/api_data.py} * @config{force, return success if the object does not exist., a * boolean flag; default \c false.} + * @config{lock_wait, wait for locks\, if \c lock_wait=false\, fail if + * any required locks are not available immediately., a boolean flag; + * default \c true.} * @config{remove_files, should the underlying files be removed?., a * boolean flag; default \c true.} * @configend @@ -1329,6 +1333,19 @@ struct __wt_session { int __F(log_printf)(WT_SESSION *session, const char *fmt, ...); /*! + * Rebalance a table, see @ref rebalance. + * + * @snippet ex_all.c Rebalance a table + * + * @param session the session handle + * @param uri the current URI of the object, such as \c "table:mytable" + * @configempty{WT_SESSION.rebalance, see dist/api_data.py} + * @ebusy_errors + */ + int __F(rebalance)( + WT_SESSION *session, const char *uri, const char *config); + + /*! * Rename an object. * * @snippet ex_all.c Rename a table @@ -1920,9 +1937,10 @@ struct __wt_connection { * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default + * empty.} * @configend * @errors */ @@ -2405,9 +2423,9 @@ struct __wt_connection { * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c - * "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as <code>"write_through=[data]"</code>. Configuring \c write_through requires diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h index 28fd8e18329..0db876b56f3 100644 --- a/src/include/wiredtiger_ext.h +++ b/src/include/wiredtiger_ext.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 0a1e143ce70..54b5dfd19f4 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2015 MongoDB, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * |