summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2016-02-19 15:36:42 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2016-02-19 15:36:47 +1100
commit70db6ed51f90f627570de9bf32ab8c5cd23886ca (patch)
tree1f5ae19d38fad8488657ae80ae9014b832ec858e /src/third_party
parentda2441b59b742c077306be6515c999c33cd955a6 (diff)
downloadmongo-70db6ed51f90f627570de9bf32ab8c5cd23886ca.tar.gz
Import wiredtiger-wiredtiger-2.7.0-675-g4f38287.tar.gz from wiredtiger branch mongodb-3.4
ref: cc96d99..4f38287 SERVER-22676 WiredTiger fails to open databases created by 3.0.0 or 3.0.1 WT-2280 Add CRC32 Optimized code for PPC64LE WT-2295 WT_SESSION.create does a full-scan of the main table WT-2346 Don't hold schema lock during checkpoint I/O WT-2361 Column-store starting record number error WT-2367 WT_CURSOR.next out-of-order returns failure WT-2374 Read error on index file WT-2375 Need tests for collators WT-2382 Problem with custom collator for 'u' format with join cursor WT-2387 Fix cursor random unit test on Windows WT-2390 OS X build is broken WT-2393 Unnecessary error handling labels. WT-2396 Jenkins Spinlock GCC task Hung WT-2397 Cursor traversal from end of the tree skips records. WT-2399 Add test case that verifies cursor traversal WT-2411 LSM drop hang
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.subdirs1
-rw-r--r--src/third_party/wiredtiger/build_posix/configure.ac.in1
-rw-r--r--src/third_party/wiredtiger/dist/filelist2
-rw-r--r--src/third_party/wiredtiger/dist/flags.py1
-rw-r--r--src/third_party/wiredtiger/dist/s_funcs.list2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_longlines5
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_style4
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_whitespace7
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_win6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c60
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c153
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c22
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c34
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c135
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c115
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_handle.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c168
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_stat.c6
-rw-r--r--src/third_party/wiredtiger/src/docs/checkpoint.dox34
-rw-r--r--src/third_party/wiredtiger/src/docs/license.dox36
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok4
-rw-r--r--src/third_party/wiredtiger/src/docs/upgrading.dox35
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c4
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i28
-rw-r--r--src/third_party/wiredtiger/src/include/column.i30
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h1
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h8
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h6
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h29
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h1
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h18
-rw-r--r--src/third_party/wiredtiger/src/include/session.h5
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c21
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_apply.c20
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c11
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c25
-rw-r--r--src/third_party/wiredtiger/src/packing/pack_impl.c57
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c136
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c61
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_worker.c17
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c14
-rw-r--r--src/third_party/wiredtiger/src/support/cksum.c19
-rw-r--r--src/third_party/wiredtiger/src/support/power8/LICENSE.TXT476
-rw-r--r--src/third_party/wiredtiger/src/support/power8/README.md208
-rw-r--r--src/third_party/wiredtiger/src/support/power8/crc32.S741
-rw-r--r--src/third_party/wiredtiger/src/support/power8/crc32_constants.h901
-rw-r--r--src/third_party/wiredtiger/src/support/power8/crc32_wrapper.c66
-rw-r--r--src/third_party/wiredtiger/src/support/power8/ppc-opcode.h23
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c67
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/Makefile.am13
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order.c303
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order.h54
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c130
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c364
-rw-r--r--src/third_party/wiredtiger/test/format/config.h4
-rw-r--r--src/third_party/wiredtiger/test/format/format.h2
-rw-r--r--src/third_party/wiredtiger/test/format/t.c10
-rw-r--r--src/third_party/wiredtiger/test/format/util.c2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup05.py6
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bulk02.py10
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor_random.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_index01.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_schema02.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn04.py17
68 files changed, 4076 insertions, 697 deletions
diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs
index e1f8a05c613..bc4283a4876 100644
--- a/src/third_party/wiredtiger/build_posix/Make.subdirs
+++ b/src/third_party/wiredtiger/build_posix/Make.subdirs
@@ -26,6 +26,7 @@ lang/python PYTHON
# Make the tests
test/bloom
test/checkpoint
+test/cursor_order
test/fops
test/format
test/huge
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in
index 875c8b436a8..06d73e2fe12 100644
--- a/src/third_party/wiredtiger/build_posix/configure.ac.in
+++ b/src/third_party/wiredtiger/build_posix/configure.ac.in
@@ -32,6 +32,7 @@ AC_SUBST([LIBTOOL_DEPS])
AC_PROG_CC(cc gcc)
AC_PROG_CXX(c++ g++)
+AM_PROG_AS(as gas)
if test "$GCC" = "yes"; then
# The Solaris gcc compiler gets the additional -pthreads flag.
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index edd59435841..4ed7d7e3beb 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -163,6 +163,8 @@ src/support/hazard.c
src/support/hex.c
src/support/huffman.c
src/support/pow.c
+src/support/power8/crc32.S
+src/support/power8/crc32_wrapper.c
src/support/rand.c
src/support/scratch.c
src/support/stat.c
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index b97235b965a..d5784630ab8 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -114,6 +114,7 @@ flags = {
'SESSION_LOCK_NO_WAIT',
'SESSION_LOCKED_CHECKPOINT',
'SESSION_LOCKED_HANDLE_LIST',
+ 'SESSION_LOCKED_METADATA',
'SESSION_LOCKED_SCHEMA',
'SESSION_LOCKED_SLOT',
'SESSION_LOCKED_TABLE',
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list
index ed6cf43bb2f..8d32eecdfb7 100644
--- a/src/third_party/wiredtiger/dist/s_funcs.list
+++ b/src/third_party/wiredtiger/dist/s_funcs.list
@@ -1,4 +1,6 @@
# List of functions that aren't found by s_funcs, but that's OK.
+FUNC_END
+FUNC_START
WT_CURDUMP_PASS
__bit_ffs
__bit_nclr
diff --git a/src/third_party/wiredtiger/dist/s_longlines b/src/third_party/wiredtiger/dist/s_longlines
index decedb58f44..000f33d51d5 100755
--- a/src/third_party/wiredtiger/dist/s_longlines
+++ b/src/third_party/wiredtiger/dist/s_longlines
@@ -9,8 +9,9 @@ l=`(cd .. &&
find dist -name '*.py' &&
find src -name '*.in') |
sed -e '/dist\/stat_data\.py/d' \
- -e '/support\/stat\.c/d' \
- -e '/include\/extern\.h/d'`
+ -e '/include\/extern\.h/d' \
+ -e '/support\/power8/d' \
+ -e '/support\/stat\.c/d'`
for f in $l ; do
expand -t8 < ../$f | awk -- \
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 19fa27cd719..7a8f3a9b0bd 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -189,6 +189,7 @@ MALLOC
MEM
MEMALIGN
MERCHANTABILITY
+METADATA
MONGODB
MSVC
MULTIBLOCK
@@ -686,6 +687,7 @@ jnr
jrx
json
kb
+kbits
keycmp
keyid
keyv
@@ -838,6 +840,7 @@ pos
posint
posix
postsize
+powerpc
pragmas
pre
prealloc
diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style
index 44a5bdda741..78fb7a6eb03 100755
--- a/src/third_party/wiredtiger/dist/s_style
+++ b/src/third_party/wiredtiger/dist/s_style
@@ -18,7 +18,9 @@ if [ $# -ne 1 ]; then
find bench examples ext src test \
-name '*.[chisy]' -o -name '*.in' -o -name '*.dox' |
- sed -e '/Makefile.in/d' -e '/build_win\/wiredtiger_config.h/d' |
+ sed -e '/Makefile.in/d' \
+ -e '/build_win\/wiredtiger_config.h/d' \
+ -e '/support\/power8/d' |
xargs $xp -n 1 -I{} sh ./dist/s_style {}
else
# General style correction and cleanup for a single file
diff --git a/src/third_party/wiredtiger/dist/s_whitespace b/src/third_party/wiredtiger/dist/s_whitespace
index d13de4b5989..74820a4f0e9 100755
--- a/src/third_party/wiredtiger/dist/s_whitespace
+++ b/src/third_party/wiredtiger/dist/s_whitespace
@@ -36,10 +36,9 @@ for f in `find bench examples ext src test \
-name '*.[chi]' -o \
-name '*.dox' -o \
-name '*.in' -o \
- -name 'Makefile.am'`; do
- if expr "$f" : ".*/Makefile.in" > /dev/null; then
- continue
- fi
+ -name 'Makefile.am' |
+ sed -e '/Makefile.in/d' \
+ -e '/support\/power8/d'`; do
whitespace_and_empty_line $f
done
diff --git a/src/third_party/wiredtiger/dist/s_win b/src/third_party/wiredtiger/dist/s_win
index 1eb4702d517..0b7d5184037 100755
--- a/src/third_party/wiredtiger/dist/s_win
+++ b/src/third_party/wiredtiger/dist/s_win
@@ -44,7 +44,7 @@ win_filelist()
f='../build_win/filelist.win'
# Process the files for which there's a Windows-specific version, then
- # append Windows-only files. (There aren't yet any POSIX-only files.)
+ # append Windows-only files and discard POSIX-only files.
(sed \
-e 's;os_posix/os_dir.c;os_win/os_dir.c;' \
-e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \
@@ -71,7 +71,9 @@ win_filelist()
-e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \
-e 's;os_posix/os_thread.c;os_win/os_thread.c;' \
-e 's;os_posix/os_time.c;os_win/os_time.c;' \
- -e 's;os_posix/os_yield.c;os_win/os_yield.c;'
+ -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \
+ -e '/src\/support\/power8\/crc32.S/d' \
+ -e '/src\/support\/power8\/crc32_wrapper.c/d'
echo 'src/os_win/os_snprintf.c'
echo 'src/os_win/os_vsnprintf.c') < filelist | sort > $t
cmp $t $f > /dev/null 2>&1 ||
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index a083ec4016e..7475c0f1312 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -51,7 +51,8 @@ restart:
if (cbt->btree->type == BTREE_ROW) {
key.data = WT_INSERT_KEY(current);
key.size = WT_INSERT_KEY_SIZE(current);
- WT_RET(__wt_search_insert(session, cbt, &key));
+ WT_RET(__wt_search_insert(
+ session, cbt, cbt->ins_head, &key));
} else
cbt->ins = __col_insert_search(cbt->ins_head,
cbt->ins_stack, cbt->next_stack,
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index bd38451d5d1..3dea03316ce 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1383,11 +1383,27 @@ __split_internal_should_split(WT_SESSION_IMPL *session, WT_REF *ref)
static int
__split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page, bool page_hazard)
{
+ WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *parent;
WT_REF *ref;
bool parent_hazard;
+ btree = S2BT(session);
+
+ /*
+ * Disallow internal splits during the final pass of a checkpoint. Most
+ * splits are already disallowed during checkpoints, but an important
+ * exception is insert splits. The danger is an insert split creates a
+ * new chunk of the namespace, and then the internal split will move it
+ * to a different part of the tree where it will be written; in other
+ * words, in one part of the tree we'll skip the newly created insert
+ * split chunk, but we'll write it upon finding it in a different part
+ * of the tree.
+ */
+ if (btree->checkpointing != WT_CKPT_OFF)
+ return (__split_internal_unlock(session, page, page_hazard));
+
/*
* Page splits trickle up the tree, that is, as leaf pages grow large
* enough and are evicted, they'll split into their parent. And, as
@@ -1771,8 +1787,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
type, WT_INSERT_RECNO(moved_ins), 0, false, &right));
/*
- * The new page is dirty by definition, column-store splits update the
- * page-modify structure, so create it now.
+ * The new page is dirty by definition, plus column-store splits update
+ * the page-modify structure, so create it now.
*/
WT_ERR(__wt_page_modify_init(session, right));
__wt_page_modify_set(session, right);
@@ -1813,15 +1829,6 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
- * We modified the page above, which will have set the first dirty
- * transaction to the last transaction current running. However, the
- * updates we installed may be older than that. Set the first dirty
- * transaction to an impossibly old value so this page is never skipped
- * in a checkpoint.
- */
- right->modify->first_dirty_txn = WT_TXN_FIRST;
-
- /*
* Calculate how much memory we're moving: figure out how deep the skip
* list stack is for the element we are moving, and the memory used by
* the item's list of updates.
@@ -1919,6 +1926,24 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
#endif
/*
+ * We perform insert splits concurrently with checkpoints, where the
+ * requirement is a checkpoint must include either the original page
+ * or both new pages. The page we're splitting is dirty, but that's
+ * insufficient: set the first dirty transaction to an impossibly old
+ * value so this page is not skipped by a checkpoint.
+ */
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
+ * We modified the page above, which will have set the first dirty
+ * transaction to the last transaction current running. However, the
+ * updates we installed may be older than that. Set the first dirty
+ * transaction to an impossibly old value so this page is never skipped
+ * in a checkpoint.
+ */
+ right->modify->first_dirty_txn = WT_TXN_FIRST;
+
+ /*
* Update the page accounting.
*
* XXX
@@ -1928,10 +1953,14 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_cache_page_inmem_incr(session, right, right_incr);
/*
- * Split into the parent. On successful return, the original page is no
- * longer locked, so we cannot safely look at it.
+ * The act of splitting into the parent releases the pages for eviction;
+ * ensure the page contents are consistent.
+ */
+ WT_WRITE_BARRIER();
+
+ /*
+ * Split into the parent.
*/
- page = NULL;
if ((ret = __split_parent(
session, ref, split_ref, 2, parent_incr, false, true)) == 0)
return (0);
@@ -1941,7 +1970,8 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
*
* Reset the split column-store page record.
*/
- page->modify->mod_split_recno = WT_RECNO_OOB;
+ if (type != WT_PAGE_ROW_LEAF)
+ page->modify->mod_split_recno = WT_RECNO_OOB;
/*
* Clear the allocated page's reference to the moved insert list element
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 5cbd8d1e996..bbfb06c636f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -105,13 +105,13 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
__wt_spin_lock(session, &btree->flush_lock);
/*
- * When internal pages are being reconciled by checkpoint their
- * child pages cannot disappear from underneath them or be split
- * into them, nor can underlying blocks be freed until the block
- * lists for the checkpoint are stable. Set the checkpointing
- * flag to block eviction of dirty pages until the checkpoint's
- * internal page pass is complete, then wait for any existing
- * eviction to complete.
+ * In the final checkpoint pass, child pages cannot be evicted
+ * from underneath internal pages nor can underlying blocks be
+ * freed until the checkpoint's block lists are stable. Also,
+ * we cannot split child pages into parents unless we know the
+ * final pass will write a consistent view of that namespace.
+ * Set the checkpointing flag to block such actions and wait for
+ * any problematic eviction or page splits to complete.
*/
WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index d7785c689d9..55b11d7b2d1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -89,11 +89,11 @@ __ref_is_leaf(WT_REF *ref)
}
/*
- * __page_ascend --
+ * __ref_ascend --
* Ascend the tree one level.
*/
-static void
-__page_ascend(WT_SESSION_IMPL *session,
+static inline void
+__ref_ascend(WT_SESSION_IMPL *session,
WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
{
WT_REF *parent_ref, *ref;
@@ -163,12 +163,12 @@ __page_ascend(WT_SESSION_IMPL *session,
}
/*
- * __page_descend --
- * Descend the tree one level.
+ * __ref_descend_prev --
+ * Descend the tree one level, during a previous-cursor walk.
*/
-static void
-__page_descend(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_PAGE_INDEX **pindexp, uint32_t *slotp, bool prev)
+static inline void
+__ref_descend_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
WT_PAGE_INDEX *pindex;
@@ -177,9 +177,6 @@ __page_descend(WT_SESSION_IMPL *session,
* we have a hazard pointer.
*/
for (;; __wt_yield()) {
- WT_INTL_INDEX_GET(session, page, pindex);
- *slotp = prev ? pindex->entries - 1 : 0;
-
/*
* There's a split race when a cursor moving backwards through
* the tree descends the tree. If we're splitting an internal
@@ -233,21 +230,41 @@ __page_descend(WT_SESSION_IMPL *session,
* being split and part of its namespace moved. We have the
* correct page and we don't have to move, all we have to do is
* wait until the split page's page index is updated.
- *
- * No test is necessary for a next-cursor movement because we
- * do right-hand splits on internal pages and the initial part
- * of the page's namespace won't change as part of a split.
- * Instead of testing the direction boolean, do the test the
- * previous cursor movement requires in all cases, even though
- * it will always succeed for a next-cursor movement.
*/
- if (pindex->index[*slotp]->home == page)
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ if (pindex->index[pindex->entries - 1]->home == ref->page)
break;
}
*pindexp = pindex;
}
/*
+ * __ref_initial_descent_prev --
+ * Descend the tree one level, when setting up the initial cursor position
+ * for a previous-cursor walk.
+ */
+static inline bool
+__ref_initial_descent_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
+{
+ WT_PAGE_INDEX *pindex;
+
+ /*
+ * We're passed a child page into which we're descending, and on which
+ * we have a hazard pointer.
+ *
+ * Acquire a page index for the child page and then confirm we haven't
+ * raced with a parent split.
+ */
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ if (__wt_split_descent_race(session, ref, *pindexp))
+ return (false);
+
+ *pindexp = pindex;
+ return (true);
+}
+
+/*
* __tree_walk_internal --
* Move to the next/previous page in the tree.
*/
@@ -259,11 +276,12 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *couple_orig, *ref;
- bool empty_internal, prev, skip;
+ bool empty_internal, initial_descent, prev, skip;
uint32_t slot;
btree = S2BT(session);
- empty_internal = false;
+ pindex = NULL;
+ empty_internal = initial_descent = false;
/*
* Tree walks are special: they look inside page structures that splits
@@ -323,22 +341,30 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
couple = couple_orig = ref = *refp;
*refp = NULL;
- /* If no page is active, begin a walk from the start of the tree. */
+ /* If no page is active, begin a walk from the start/end of the tree. */
if (ref == NULL) {
- ref = &btree->root;
+restart: /*
+ * We can reach here with a NULL or root reference; the release
+ * function handles them internally, don't complicate this code
+ * by calling them out.
+ */
+ WT_ERR(__wt_page_release(session, couple, flags));
+
+ couple = couple_orig = ref = &btree->root;
if (ref->page == NULL)
goto done;
+
+ initial_descent = true;
goto descend;
}
/*
- * If the active page was the root, we've reached the walk's end.
- * Release any hazard-pointer we're holding.
+ * If the active page was the root, we've reached the walk's end; we
+ * only get here if we've returned the root to our caller, so we're
+ * holding no hazard pointers.
*/
- if (__wt_ref_is_root(ref)) {
- WT_ERR(__wt_page_release(session, couple, flags));
+ if (__wt_ref_is_root(ref))
goto done;
- }
/* Figure out the current slot in the WT_REF array. */
__ref_index_slot(session, ref, &pindex, &slot);
@@ -352,7 +378,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
while ((prev && slot == 0) ||
(!prev && slot == pindex->entries - 1)) {
/* Ascend to the parent. */
- __page_ascend(session, &ref, &pindex, &slot);
+ __ref_ascend(session, &ref, &pindex, &slot);
/*
* If we got all the way through an internal page and
@@ -521,16 +547,21 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
ret = 0;
/*
+ * If a cursor is setting up at the end of the
+ * tree, we can't use our parent page's index,
+ * because it may have already split; restart
+ * the walk.
+ */
+ if (prev && initial_descent)
+ goto restart;
+
+ /*
* If a new walk that never coupled from the
* root to a new saved position in the tree,
* restart the walk.
*/
- if (couple == &btree->root) {
- ref = &btree->root;
- if (ref->page == NULL)
- goto done;
- goto descend;
- }
+ if (couple == &btree->root)
+ goto restart;
/*
* If restarting from some original position,
@@ -561,10 +592,56 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
descend: couple = ref;
empty_internal = true;
- __page_descend(
- session, ref->page, &pindex, &slot, prev);
+ /*
+ * There's a split race when a cursor is setting
+ * up at the end of the tree or moving backwards
+ * through the tree and descending a level. When
+ * splitting an internal page into its parent,
+ * we move the WT_REF structures and update the
+ * parent's page index before updating the split
+ * page's page index, and it's not an atomic
+ * update. A thread can read the parent page's
+ * replacement page index, then read the split
+ * page's original index, or the parent page's
+ * original and the split page's replacement.
+ *
+ * This isn't a problem for a cursor setting up
+ * at the start of the tree or moving forwards
+ * through the tree because we do right-hand
+ * splits on internal pages and the initial part
+ * of the split page's namespace won't change as
+ * part of a split. A thread reading the parent
+ * page's and split page's indexes will move to
+ * the same slot no matter what order of indexes
+ * are read.
+ *
+ * Handle a cursor setting up at the end of the
+ * tree or moving backwards through the tree.
+ */
+ if (!prev) {
+ WT_INTL_INDEX_GET(
+ session, ref->page, pindex);
+ slot = 0;
+ } else if (initial_descent) {
+ if (!__ref_initial_descent_prev(
+ session, ref, &pindex))
+ goto restart;
+ slot = pindex->entries - 1;
+ } else {
+ __ref_descend_prev(
+ session, ref, &pindex);
+ slot = pindex->entries - 1;
+ }
} else {
/*
+ * At the lowest tree level (considering a leaf
+ * page), turn off the initial-descent state.
+ * Descent race tests are different when moving
+ * through the tree vs. the initial descent.
+ */
+ initial_descent = false;
+
+ /*
* Optionally skip leaf pages, the second half.
* We didn't have an on-page cell to figure out
* if it was a leaf page, we had to acquire the
@@ -605,7 +682,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
/*
* __wt_tree_walk_count --
* Move to the next/previous page in the tree, tracking how many
- * references were visited to get there.
+ * references were visited to get there.
*/
int
__wt_tree_walk_count(WT_SESSION_IMPL *session,
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index 645d98d9c9b..fd60b12538a 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -25,6 +25,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_ITEM _value;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_UPDATE *old_upd, *upd;
size_t ins_size, upd_size;
u_int i, skipdepth;
@@ -60,6 +61,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
+ mod = page->modify;
/*
* Delete, insert or update a column-store entry.
@@ -105,17 +107,17 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
/* Allocate the append/update list reference as necessary. */
if (append) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_append, ins_headp, 1);
- ins_headp = &page->modify->mod_append[0];
+ page, mod->mod_append, ins_headp, 1);
+ ins_headp = &mod->mod_append[0];
} else if (page->type == WT_PAGE_COL_FIX) {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_update, ins_headp, 1);
- ins_headp = &page->modify->mod_update[0];
+ page, mod->mod_update, ins_headp, 1);
+ ins_headp = &mod->mod_update[0];
} else {
WT_PAGE_ALLOC_AND_SWAP(session,
- page, page->modify->mod_update, ins_headp,
+ page, mod->mod_update, ins_headp,
page->pg_var_entries);
- ins_headp = &page->modify->mod_update[cbt->slot];
+ ins_headp = &mod->mod_update[cbt->slot];
}
/* Allocate the WT_INSERT_HEAD structure as necessary. */
@@ -135,6 +137,14 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
cbt->ins_head = ins_head;
cbt->ins = ins;
+ /*
+ * Check for insert split and checkpoint races in column-store:
+ * it's easy (as opposed to in row-store) and a difficult bug to
+ * otherwise diagnose.
+ */
+ WT_ASSERT(session, mod->mod_split_recno == WT_RECNO_OOB ||
+ (recno != WT_RECNO_OOB && mod->mod_split_recno > recno));
+
if (upd_arg == NULL) {
WT_ERR(
__wt_update_alloc(session, value, &upd, &upd_size));
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index cb5a227495f..23eae75ec2b 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -77,6 +77,7 @@ __wt_col_search(WT_SESSION_IMPL *session,
int depth;
btree = S2BT(session);
+ current = NULL;
__cursor_pos_clear(cbt);
@@ -116,12 +117,19 @@ __wt_col_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
-restart_root:
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ }
+
/* Search the internal pages of the tree. */
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_COL_INT)
break;
@@ -137,12 +145,10 @@ restart_page: page = current->page;
* If on the last slot (the key is larger than any key
* on the page), check for an internal page split race.
*/
- if (parent_pindex != NULL &&
- __wt_split_intl_race(
- session, current->home, parent_pindex)) {
- WT_RET(__wt_page_release(session, current, 0));
- goto restart_root;
- }
+ if (__wt_split_descent_race(
+ session, current, parent_pindex))
+ goto restart;
+
goto descend;
}
@@ -178,8 +184,14 @@ descend: /*
/*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -188,7 +200,7 @@ descend: /*
continue;
}
if (ret == WT_RESTART)
- goto restart_page;
+ goto restart;
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 71564a7b3c5..9d68c8e0ce7 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -9,18 +9,17 @@
#include "wt_internal.h"
/*
- * __wt_search_insert_append --
+ * __search_insert_append --
* Fast append search of a row-store insert list, creating a skiplist stack
* as we go.
*/
static inline int
-__wt_search_insert_append(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool *donep)
+__search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+ WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key, bool *donep)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_INSERT *ins;
- WT_INSERT_HEAD *inshead;
WT_ITEM key;
int cmp, i;
@@ -28,8 +27,7 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
collator = btree->collator;
*donep = 0;
- inshead = cbt->ins_head;
- if ((ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
return (0);
key.data = WT_INSERT_KEY(ins);
key.size = WT_INSERT_KEY_SIZE(ins);
@@ -48,12 +46,13 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
*/
for (i = WT_SKIP_MAXDEPTH - 1; i >= 0; i--) {
cbt->ins_stack[i] = (i == 0) ? &ins->next[0] :
- (inshead->tail[i] != NULL) ?
- &inshead->tail[i]->next[i] : &inshead->head[i];
+ (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] : &ins_head->head[i];
cbt->next_stack[i] = NULL;
}
cbt->compare = -cmp;
cbt->ins = ins;
+ cbt->ins_head = ins_head;
*donep = 1;
}
return (0);
@@ -64,20 +63,18 @@ __wt_search_insert_append(WT_SESSION_IMPL *session,
* Search a row-store insert list, creating a skiplist stack as we go.
*/
int
-__wt_search_insert(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key)
+__wt_search_insert(WT_SESSION_IMPL *session,
+ WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_INSERT *ins, **insp, *last_ins;
- WT_INSERT_HEAD *inshead;
WT_ITEM key;
size_t match, skiphigh, skiplow;
int cmp, i;
btree = S2BT(session);
collator = btree->collator;
- inshead = cbt->ins_head;
cmp = 0; /* -Wuninitialized */
/*
@@ -86,7 +83,7 @@ __wt_search_insert(
*/
match = skiphigh = skiplow = 0;
ins = last_ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;) {
if ((ins = *insp) == NULL) {
cbt->next_stack[i] = NULL;
cbt->ins_stack[i--] = insp--;
@@ -128,6 +125,7 @@ __wt_search_insert(
*/
cbt->compare = -cmp;
cbt->ins = (ins != NULL) ? ins : last_ins;
+ cbt->ins_head = ins_head;
return (0);
}
@@ -212,6 +210,7 @@ __wt_row_search(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_COLLATOR *collator;
WT_DECL_RET;
+ WT_INSERT_HEAD *ins_head;
WT_ITEM *item;
WT_PAGE *page;
WT_PAGE_INDEX *pindex, *parent_pindex;
@@ -276,12 +275,20 @@ __wt_row_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ skiphigh = skiplow = 0;
+ }
+
/* Search the internal pages of the tree. */
-restart_root:
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
@@ -418,22 +425,21 @@ restart_page: page = current->page;
* page), check for an internal page split race.
*/
if (pindex->entries == base) {
-append: if (parent_pindex != NULL &&
- __wt_split_intl_race(
- session, current->home, parent_pindex)) {
- if ((ret = __wt_page_release(
- session, current, 0)) != 0)
- return (ret);
-
- skiplow = skiphigh = 0;
- goto restart_root;
- }
+append: if (__wt_split_descent_race(
+ session, current, parent_pindex))
+ goto restart;
}
descend: /*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -441,10 +447,8 @@ descend: /*
current = descent;
continue;
}
- if (ret == WT_RESTART) {
- skiphigh = skiplow = 0;
- goto restart_page;
- }
+ if (ret == WT_RESTART)
+ goto restart;
return (ret);
}
@@ -480,24 +484,18 @@ leaf_only:
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d);
F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
} else {
cbt->slot = WT_ROW_SLOT(page,
page->pg_row_d + (page->pg_row_entries - 1));
- cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
}
- WT_ERR(
- __wt_search_insert_append(session, cbt, srch_key, &done));
+ WT_ERR(__search_insert_append(
+ session, cbt, ins_head, srch_key, &done));
if (done)
return (0);
-
- /*
- * Don't leave the insert list head set, code external to the
- * search uses it.
- */
- cbt->ins_head = NULL;
}
/*
@@ -590,16 +588,16 @@ leaf_match: cbt->compare = 0;
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d);
F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
+ ins_head = WT_ROW_INSERT_SMALLEST(page);
} else {
cbt->compare = -1;
cbt->slot = WT_ROW_SLOT(page, page->pg_row_d + (base - 1));
- cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
+ ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot);
}
/* If there's no insert list, we're done. */
- if (WT_SKIP_FIRST(cbt->ins_head) == NULL)
+ if (WT_SKIP_FIRST(ins_head) == NULL)
return (0);
/*
@@ -607,12 +605,12 @@ leaf_match: cbt->compare = 0;
* catch cursors repeatedly inserting at a single point.
*/
if (insert) {
- WT_ERR(
- __wt_search_insert_append(session, cbt, srch_key, &done));
+ WT_ERR(__search_insert_append(
+ session, cbt, ins_head, srch_key, &done));
if (done)
return (0);
}
- WT_ERR(__wt_search_insert(session, cbt, srch_key));
+ WT_ERR(__wt_search_insert(session, cbt, ins_head, srch_key));
return (0);
@@ -661,19 +659,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/*
* If the tree is new (and not empty), it might have a large insert
* list.
- */
- F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
- if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
- return (WT_NOTFOUND);
-
- /*
+ *
* Walk down the list until we find a level with at least 50 entries,
* that's where we'll start rolling random numbers. The value 50 is
* used to ignore levels with only a few entries, that is, levels which
* are potentially badly skewed.
*/
- for (ins_head = cbt->ins_head,
- level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
+ F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+ if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
+ return (WT_NOTFOUND);
+ for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
start = &ins_head->head[level];
for (entries = 0, stop = start;
*stop != NULL; stop = &(*stop)->next[level])
@@ -768,6 +763,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
ins = ins->next[0];
cbt->ins = ins;
+ cbt->ins_head = ins_head;
cbt->compare = 0;
return (0);
@@ -787,11 +783,19 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_REF *current, *descent;
btree = S2BT(session);
+ current = NULL;
__cursor_pos_clear(cbt);
-restart_root:
- /* Walk the internal pages of the tree. */
+ if (0) {
+restart: /*
+ * Discard the currently held page and restart the search from
+ * the root.
+ */
+ WT_RET(__wt_page_release(session, current, 0));
+ }
+
+ /* Search the internal pages of the tree. */
current = &btree->root;
for (;;) {
page = current->page;
@@ -803,22 +807,19 @@ restart_root:
__wt_random(&session->rnd) % pindex->entries];
/*
- * Swap the parent page for the child page; return on error,
- * the swap function ensures we're holding nothing on failure.
+ * Swap the current page for the child page. If the page splits
+ * while we're retrieving it, restart the search at the root.
+ *
+ * On other error, simply return, the swap call ensures we're
+ * holding nothing on failure.
*/
if ((ret = __wt_page_swap(
session, current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
- /*
- * Restart is returned if we find a page that's been split; the
- * held page isn't discarded when restart is returned, discard
- * it and restart the search from the top of the tree.
- */
- if (ret == WT_RESTART &&
- (ret = __wt_page_release(session, current, 0)) == 0)
- goto restart_root;
+ if (ret == WT_RESTART)
+ goto restart;
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 60136a71b99..3bea24be508 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -368,24 +368,21 @@ __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle,
* sure it's referenced to stop other internal code dropping the handle
* (e.g in LSM when cleaning up obsolete chunks).
*/
- ret = __wt_session_get_btree(session,
- dhandle->name, dhandle->checkpoint, NULL, 0);
- if (ret == 0) {
- WT_SAVE_DHANDLE(session,
- ret = func(session, cfg));
- if (WT_META_TRACKING(session))
- WT_TRET(__wt_meta_track_handle_lock(session, false));
- else
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY)
- ret = __wt_conn_btree_apply_single(session, dhandle->name,
- dhandle->checkpoint, func, cfg);
+ if ((ret = __wt_session_get_btree(session,
+ dhandle->name, dhandle->checkpoint, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+
+ WT_SAVE_DHANDLE(session, ret = func(session, cfg));
+ if (WT_META_TRACKING(session))
+ WT_TRET(__wt_meta_track_handle_lock(session, false));
+ else
+ WT_TRET(__wt_session_release_btree(session));
return (ret);
}
/*
* __wt_conn_btree_apply --
- * Apply a function to all open btree handles apart from the metadata.
+ * Apply a function to all open btree handles with the given URI.
*/
int
__wt_conn_btree_apply(WT_SESSION_IMPL *session,
@@ -430,98 +427,6 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
}
/*
- * __wt_conn_btree_apply_single_ckpt --
- * Decode any checkpoint information from the configuration string then
- * call btree apply single.
- */
-int
-__wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session,
- const char *uri,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
-{
- WT_CONFIG_ITEM cval;
- WT_DECL_RET;
- const char *checkpoint;
-
- checkpoint = NULL;
-
- /*
- * This function exists to handle checkpoint configuration. Callers
- * that never open a checkpoint call the underlying function directly.
- */
- WT_RET_NOTFOUND_OK(
- __wt_config_gets_def(session, cfg, "checkpoint", 0, &cval));
- if (cval.len != 0) {
- /*
- * The internal checkpoint name is special, find the last
- * unnamed checkpoint of the object.
- */
- if (WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
- WT_RET(__wt_meta_checkpoint_last_name(
- session, uri, &checkpoint));
- } else
- WT_RET(__wt_strndup(
- session, cval.str, cval.len, &checkpoint));
- }
-
- ret = __wt_conn_btree_apply_single(session, uri, checkpoint, func, cfg);
-
- __wt_free(session, checkpoint);
-
- return (ret);
-}
-
-/*
- * __wt_conn_btree_apply_single --
- * Apply a function to a single btree handle that couldn't be locked
- * (attempting to get the handle returned EBUSY).
- */
-int
-__wt_conn_btree_apply_single(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint,
- int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[])
-{
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- uint64_t bucket, hash;
-
- conn = S2C(session);
-
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
- hash = __wt_hash_city64(uri, strlen(uri));
- bucket = hash % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq)
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
- (hash == dhandle->name_hash &&
- strcmp(uri, dhandle->name) == 0) &&
- ((dhandle->checkpoint == NULL && checkpoint == NULL) ||
- (dhandle->checkpoint != NULL && checkpoint != NULL &&
- strcmp(dhandle->checkpoint, checkpoint) == 0))) {
- /*
- * We're holding the handle list lock which locks out
- * handle open (which might change the state of the
- * underlying object). However, closing a handle
- * doesn't require the handle list lock, lock out
- * closing the handle and then confirm the handle is
- * still open.
- */
- __wt_spin_lock(session, &dhandle->close_lock);
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- !F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
- WT_WITH_DHANDLE(session, dhandle,
- ret = func(session, cfg));
- }
- __wt_spin_unlock(session, &dhandle->close_lock);
- WT_RET(ret);
- }
-
- return (0);
-}
-
-/*
* __wt_conn_dhandle_close_all --
* Close all data handles handles with matching name (including all
* checkpoint handles).
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index 12b4e87e921..b33ec18dfca 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -56,6 +56,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_rwlock_alloc(session,
&conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
+ WT_RET(__wt_spin_init(session, &conn->metadata_lock, "metadata"));
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
@@ -143,6 +144,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->fh_lock);
WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock));
__wt_spin_destroy(session, &conn->las_lock);
+ __wt_spin_destroy(session, &conn->metadata_lock);
__wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
__wt_spin_destroy(session, &conn->table_lock);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 60f46288072..fa3928a8539 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -633,7 +633,7 @@ restart:
if (slot->slot_start_lsn.l.offset !=
slot->slot_last_offset)
slot->slot_start_lsn.l.offset =
- slot->slot_last_offset;
+ (uint32_t)slot->slot_last_offset;
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
WT_ERR(__wt_cond_signal(
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index 2cbefa68c5e..797e6e5879a 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -53,7 +53,9 @@ __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
iter->session = session;
iter->entry = entry;
iter->cursor = newcur;
- iter->advance = false;
+ iter->positioned = false;
+ iter->isequal = (entry->ends_next == 1 &&
+ WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ);
*iterp = iter;
if (0) {
@@ -72,18 +74,16 @@ static int
__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf,
size_t bufsize, WT_ITEM *item)
{
- WT_DECL_RET;
WT_SESSION *wtsession;
size_t sz;
wtsession = (WT_SESSION *)session;
- WT_ERR(wiredtiger_struct_size(wtsession, &sz, "r", r));
+ WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r));
WT_ASSERT(session, sz < bufsize);
- WT_ERR(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
+ WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r));
item->size = sz;
item->data = buf;
-
-err: return (ret);
+ return (0);
}
/*
@@ -97,14 +97,13 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_ITEM *primkey,
{
WT_CURSOR *firstcg_cur;
WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t r;
- if (iter->advance)
- WT_ERR(iter->cursor->next(iter->cursor));
+ if (iter->positioned)
+ WT_RET(iter->cursor->next(iter->cursor));
else
- iter->advance = true;
+ iter->positioned = true;
session = iter->session;
cjoin = iter->cjoin;
@@ -119,7 +118,7 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_ITEM *primkey,
firstcg_cur = ((WT_CURSOR_TABLE *)iter->cursor)->cg_cursors[0];
if (WT_CURSOR_RECNO(&cjoin->iface)) {
r = *(uint64_t *)firstcg_cur->key.data;
- WT_ERR(__curjoin_pack_recno(session, r, cjoin->recno_buf,
+ WT_RET(__curjoin_pack_recno(session, r, cjoin->recno_buf,
sizeof(cjoin->recno_buf), primkey));
*rp = r;
} else {
@@ -129,8 +128,7 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_ITEM *primkey,
iter->curkey = primkey;
iter->entry->stats.actual_count++;
iter->entry->stats.accesses++;
-
-err: return (ret);
+ return (0);
}
/*
@@ -141,17 +139,14 @@ err: return (ret);
static int
__curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter)
{
- WT_DECL_RET;
-
- if (iter->advance) {
- WT_ERR(iter->cursor->reset(iter->cursor));
- WT_ERR(__wt_cursor_dup_position(
+ if (iter->positioned) {
+ WT_RET(iter->cursor->reset(iter->cursor));
+ WT_RET(__wt_cursor_dup_position(
iter->cjoin->entries[0].ends[0].cursor, iter->cursor));
- iter->advance = false;
+ iter->positioned = false;
iter->entry->stats.actual_count = 0;
}
-
-err: return (ret);
+ return (0);
}
/*
@@ -162,7 +157,7 @@ err: return (ret);
static bool
__curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter)
{
- return (iter->advance);
+ return (iter->positioned);
}
/*
@@ -255,18 +250,16 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
WT_DECL_RET;
WT_DECL_ITEM(uribuf);
- WT_ITEM curkey, curvalue, *k;
+ WT_ITEM curkey, curvalue;
WT_TABLE *maintable;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
const char *mainkey_str, *p;
- void *allocbuf;
size_t mainkey_len, size;
u_int i;
int cmp, skip;
c = NULL;
- allocbuf = NULL;
skip = 0;
if (entry->index != NULL) {
@@ -305,7 +298,7 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
if ((end = &entry->ends[0]) < endmax &&
F_ISSET(end, WT_CURJOIN_END_GE)) {
WT_ERR(__wt_cursor_dup_position(end->cursor, c));
- if (end->flags == WT_CURJOIN_END_GE)
+ if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE)
skip = 1;
}
collator = (entry->index == NULL) ? NULL : entry->index->collator;
@@ -313,18 +306,15 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_ERR(c->get_key(c, &curkey));
if (entry->index != NULL) {
cindex = (WT_CURSOR_INDEX *)c;
- if (cindex->index->extractor == NULL) {
- /*
- * Repack so it's comparable to the
- * reference endpoints.
- */
- k = &cindex->child->key;
- WT_ERR(__wt_struct_repack(session,
- cindex->child->key_format,
- entry->main->value_format, k, &curkey,
- &allocbuf));
- } else
- curkey = cindex->child->key;
+ /*
+ * Repack so it's comparable to the
+ * reference endpoints.
+ */
+ WT_ERR(__wt_struct_repack(session,
+ cindex->child->key_format,
+ (entry->repack_format != NULL ?
+ entry->repack_format : cindex->iface.key_format),
+ &cindex->child->key, &curkey));
}
for (end = &entry->ends[skip]; end < endmax; end++) {
WT_ERR(__wt_compare(session, collator, &curkey,
@@ -361,7 +351,6 @@ done:
err: if (c != NULL)
WT_TRET(c->close(c));
__wt_scr_free(session, &uribuf);
- __wt_free(session, allocbuf);
return (ret);
}
@@ -375,27 +364,23 @@ __curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
{
WT_CURSOR *cursor;
WT_CURSOR_INDEX *cindex;
- WT_DECL_RET;
WT_ITEM *k;
uint64_t r;
- void *allocbuf;
- allocbuf = NULL;
if ((cursor = endpoint->cursor) != NULL) {
if (entry->index != NULL) {
/* Extract and save the index's logical key. */
cindex = (WT_CURSOR_INDEX *)endpoint->cursor;
- WT_ERR(__wt_struct_repack(session,
+ WT_RET(__wt_struct_repack(session,
cindex->child->key_format,
- cindex->iface.key_format,
- &cindex->child->key, &endpoint->key, &allocbuf));
- if (allocbuf != NULL)
- F_SET(endpoint, WT_CURJOIN_END_OWN_KEY);
+ (entry->repack_format != NULL ?
+ entry->repack_format : cindex->iface.key_format),
+ &cindex->child->key, &endpoint->key));
} else {
k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key;
if (WT_CURSOR_RECNO(cursor)) {
r = *(uint64_t *)k->data;
- WT_ERR(__curjoin_pack_recno(session, r,
+ WT_RET(__curjoin_pack_recno(session, r,
endpoint->recno_buf,
sizeof(endpoint->recno_buf),
&endpoint->key));
@@ -404,10 +389,7 @@ __curjoin_endpoint_init_key(WT_SESSION_IMPL *session,
endpoint->key = *k;
}
}
- if (0) {
-err: __wt_free(session, allocbuf);
- }
- return (ret);
+ return (0);
}
/*
@@ -520,35 +502,34 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
{
WT_COLLATOR *collator;
WT_CURSOR_JOIN_ENDPOINT *end, *endmax;
- WT_DECL_RET;
int cmp;
collator = (entry->index != NULL) ? entry->index->collator : NULL;
endmax = &entry->ends[entry->ends_next];
for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) {
- WT_ERR(__wt_compare(session, collator, curkey, &end->key,
+ WT_RET(__wt_compare(session, collator, curkey, &end->key,
&cmp));
if (!F_ISSET(end, WT_CURJOIN_END_LT)) {
if (cmp < 0 ||
(cmp == 0 &&
!F_ISSET(end, WT_CURJOIN_END_EQ)) ||
(cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT)))
- WT_ERR(WT_NOTFOUND);
+ WT_RET(WT_NOTFOUND);
} else {
if (cmp > 0 ||
(cmp == 0 &&
!F_ISSET(end, WT_CURJOIN_END_EQ)) ||
(cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT)))
- WT_ERR(WT_NOTFOUND);
+ WT_RET(WT_NOTFOUND);
}
}
-err: return (ret);
+ return (0);
}
typedef struct {
WT_CURSOR iface;
WT_CURSOR_JOIN_ENTRY *entry;
- int ismember;
+ bool ismember;
} WT_CURJOIN_EXTRACTOR;
/*
@@ -584,8 +565,8 @@ __curjoin_extract_insert(WT_CURSOR *cursor) {
ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false);
if (ret == WT_NOTFOUND)
ret = 0;
- else
- cextract->ismember = 1;
+ else if (ret == 0)
+ cextract->ismember = true;
return (ret);
}
@@ -659,10 +640,11 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
v = *key;
if ((idx = entry->index) != NULL && idx->extractor != NULL) {
+ WT_CLEAR(extract_cursor);
extract_cursor.iface = iface;
extract_cursor.iface.session = &session->iface;
extract_cursor.iface.key_format = idx->exkey_format;
- extract_cursor.ismember = 0;
+ extract_cursor.ismember = false;
extract_cursor.entry = entry;
WT_ERR(idx->extractor->extract(idx->extractor,
&session->iface, key, &v, &extract_cursor.iface));
@@ -715,8 +697,15 @@ nextkey:
for (i = 0; i < cjoin->entries_next; i++) {
ret = __curjoin_entry_member(session, cjoin,
&cjoin->entries[i], skip_left);
- if (ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND) {
+ /*
+ * If this is compare=eq on our outer iterator,
+ * and we've moved past it, we're done.
+ */
+ if (cjoin->iter->isequal && i == 0)
+ break;
goto nextkey;
+ }
skip_left = false;
WT_ERR(ret);
}
@@ -783,12 +772,10 @@ __curjoin_close(WT_CURSOR *cursor)
if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM))
WT_TRET(__wt_bloom_close(entry->bloom));
for (end = &entry->ends[0];
- end < &entry->ends[entry->ends_next]; end++) {
+ end < &entry->ends[entry->ends_next]; end++)
F_CLR(end->cursor, WT_CURSTD_JOINED);
- if (F_ISSET(end, WT_CURJOIN_END_OWN_KEY))
- __wt_free(session, end->key.data);
- }
__wt_free(session, entry->ends);
+ __wt_free(session, entry->repack_format);
}
if (cjoin->iter != NULL)
@@ -891,22 +878,22 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range,
uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count)
{
+ WT_CURSOR_INDEX *cindex;
+ WT_CURSOR_JOIN_ENDPOINT *end, *newend;
WT_CURSOR_JOIN_ENTRY *entry;
WT_DECL_RET;
- WT_CURSOR_JOIN_ENDPOINT *end, *newend;
bool hasins, needbloom, range_eq;
- u_int i, ins, nonbloom;
+ char *main_uri, *newformat;
const char *raw_cfg[] = { WT_CONFIG_BASE(
session, WT_SESSION_open_cursor), "raw", NULL };
- char *main_uri;
- size_t namesize, newsize;
+ size_t len, newsize;
+ u_int i, ins, nonbloom;
entry = NULL;
hasins = needbloom = false;
ins = 0; /* -Wuninitialized */
main_uri = NULL;
nonbloom = 0; /* -Wuninitialized */
- namesize = strlen(cjoin->table->name);
for (i = 0; i < cjoin->entries_next; i++) {
if (cjoin->entries[i].index == idx) {
@@ -982,13 +969,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
((range & WT_CURJOIN_END_GT) != 0 || range_eq)) ||
(F_ISSET(end, WT_CURJOIN_END_LT) &&
((range & WT_CURJOIN_END_LT) != 0 || range_eq)) ||
- (end->flags == WT_CURJOIN_END_EQ &&
+ (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
(range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT))
!= 0))
WT_ERR_MSG(session, EINVAL,
"join has overlapping ranges");
if (range == WT_CURJOIN_END_EQ &&
- end->flags == WT_CURJOIN_END_EQ &&
+ WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ &&
!F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION))
WT_ERR_MSG(session, EINVAL,
"compare=eq can only be combined "
@@ -1026,15 +1013,40 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
F_SET(newend, range);
/* Open the main file with a projection of the indexed columns. */
- if (entry->main == NULL && entry->index != NULL) {
- namesize = strlen(cjoin->table->name);
- newsize = namesize + entry->index->colconf.len + 1;
+ if (entry->main == NULL && idx != NULL) {
+ newsize = strlen(cjoin->table->name) + idx->colconf.len + 1;
WT_ERR(__wt_calloc(session, 1, newsize, &main_uri));
snprintf(main_uri, newsize, "%s%.*s",
- cjoin->table->name, (int)entry->index->colconf.len,
- entry->index->colconf.str);
+ cjoin->table->name, (int)idx->colconf.len,
+ idx->colconf.str);
WT_ERR(__wt_open_cursor(session, main_uri,
(WT_CURSOR *)cjoin, raw_cfg, &entry->main));
+ if (idx->extractor == NULL) {
+ /*
+ * Add no-op padding so trailing 'u' formats are not
+ * transformed to 'U'. This matches what happens in
+ * the index. We don't do this when we have an
+ * extractor, extractors already use the padding
+ * byte trick.
+ */
+ len = strlen(entry->main->value_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &newformat));
+ snprintf(newformat, len, "%s0x",
+ entry->main->value_format);
+ __wt_free(session, entry->main->value_format);
+ entry->main->value_format = newformat;
+ }
+
+ /*
+ * When we are repacking index keys to remove the primary
+ * key, we never want to transform trailing 'u'. Use no-op
+ * padding to force this.
+ */
+ cindex = (WT_CURSOR_INDEX *)ref_cursor;
+ len = strlen(cindex->iface.key_format) + 3;
+ WT_ERR(__wt_calloc(session, len, 1, &entry->repack_format));
+ snprintf(entry->repack_format, len, "%s0x",
+ cindex->iface.key_format);
}
err: if (main_uri != NULL)
diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c
index 00a6ade21c6..bb492c66ace 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_stat.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c
@@ -504,14 +504,13 @@ __curstat_join_init(WT_SESSION_IMPL *session,
WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst)
{
WT_CURSOR_JOIN *cjoin;
- WT_DECL_RET;
WT_UNUSED(cfg);
if (curjoin == NULL && cst->u.join_stats_group.join_cursor != NULL)
curjoin = &cst->u.join_stats_group.join_cursor->iface;
if (curjoin == NULL || !WT_PREFIX_MATCH(curjoin->uri, "join:"))
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"join cursor must be used with statistics:join");
cjoin = (WT_CURSOR_JOIN *)curjoin;
memset(&cst->u.join_stats_group, 0, sizeof(WT_JOIN_STATS_GROUP));
@@ -522,8 +521,7 @@ __curstat_join_init(WT_SESSION_IMPL *session,
cst->stats_count = sizeof(WT_JOIN_STATS) / sizeof(int64_t);
cst->stats_desc = __curstat_join_desc;
cst->next_set = __curstat_join_next_set;
-
-err: return (ret);
+ return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/docs/checkpoint.dox b/src/third_party/wiredtiger/src/docs/checkpoint.dox
index 523c0887859..ec28fea13c3 100644
--- a/src/third_party/wiredtiger/src/docs/checkpoint.dox
+++ b/src/third_party/wiredtiger/src/docs/checkpoint.dox
@@ -23,11 +23,16 @@ All transactional updates committed before a checkpoint are made durable
by the checkpoint, therefore the frequency of checkpoints limits the
volume of data that may be lost due to application or system failure.
-When WiredTiger data sources are first opened, they are opened in the
-state of the most recent checkpoint taken on the file, in other words,
-updates after the most recent checkpoint will not appear in the data
-source. If no checkpoint is found when the data source is opened, the
-data source will appear empty.
+Data sources that are involved in an exclusive operation when the
+checkpoint starts, including bulk load, verify or salvage, will be skipped
+by the checkpoint. Operations requiring exclusive access may fail with
+an \c EBUSY error if attempted during a checkpoint.
+
+When data sources are first opened, they are opened in the state of the
+most recent checkpoint taken on the file, in other words, updates after the
+most recent checkpoint will not appear in the data source. If no
+checkpoint is found when the data source is opened, the data source will
+appear empty.
@section checkpoint_server Automatic checkpoints
@@ -54,15 +59,16 @@ checkpoint cursor is closed.
@section checkpoint_naming Checkpoint naming
-Additionally, checkpoints that do not include LSM trees may optionally
-be given names by the application. Checkpoints named by the application
-persist until explicitly discarded or the application creates a new
-checkpoint with the same name (which replaces the previous checkpoint
-of that name). If the previous checkpoint cannot be replaced, either
-because a cursor is reading from the previous checkpoint, or backups are
-in progress, the checkpoint will fail. Because named checkpoints
-persist until discarded or replaced, they can be used to periodically
-snapshot data for later use.
+Additionally, checkpoints that do not include LSM trees may optionally be
+given names by the application. Because named checkpoints persist until
+discarded or replaced, they can be used to periodically snapshot data for
+later use.
+
+Checkpoints named by the application persist until explicitly discarded or
+the application creates a new checkpoint with the same name (which replaces
+the previous checkpoint of that name). If the previous checkpoint cannot be
+replaced, either because a cursor is reading from the previous checkpoint,
+or backups are in progress, the checkpoint will fail.
Internal checkpoints (that is, checkpoints not named by the application)
use the reserved name "WiredTigerCheckpoint". Applications can open the
diff --git a/src/third_party/wiredtiger/src/docs/license.dox b/src/third_party/wiredtiger/src/docs/license.dox
index febced2c6af..d7814d04fd6 100644
--- a/src/third_party/wiredtiger/src/docs/license.dox
+++ b/src/third_party/wiredtiger/src/docs/license.dox
@@ -2,16 +2,16 @@
The complete WiredTiger software package is Open Source software: you
are welcome to modify and redistribute it under the terms of
-<a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">
-<b>version 2</b></a> or
-<a href="http://www.gnu.org/licenses/gpl-3.0-standalone.html">
-<b>version 3</b></a> of the
-<b>GNU General Public License</b></a>
+<a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">version 2</a>
+or
+<a href="http://www.gnu.org/licenses/gpl-3.0-standalone.html">version 3</a>
+of the
+<b>GNU General Public License</b>
as published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the
-<b>GNU General Public License</b></a> for details.
+<b>GNU General Public License</b> for details.
Additionally, portions of the WiredTiger distribution are distributed
under the terms of the
@@ -31,10 +31,10 @@ those described above, or for technical support for this software, please
contact MongoDB, Inc. at
<a mailto="info@wiredtiger.com">info@wiredtiger.com</a>.
-@section license_library 3rd party software included in the WiredTiger library
+@section license_library 3rd party software always included in the WiredTiger library
Every build of the WiredTiger library binary includes the following 3rd
-party software, distributed under their license terms. Redistribution
+party software, distributed under separate license terms. Redistribution
of the WiredTiger library should comply with these copyrights.
<table>
@@ -46,14 +46,26 @@ of the WiredTiger library should comply with these copyrights.
@row{\c src/support/hash_fnv.c, Authors, Public Domain}
</table>
+@section license_crc32-vpmsum 3rd party software optionally included in the WiredTiger library: PPC64
+
+PPC64 and PPC64LE builds of the WiredTiger library binary include additional
+3rd party software, distributed under separate license terms. Redistribution
+of the WiredTiger library PPC64 and PPC64LE builds should comply with these
+copyrights.
+
+<table>
+@hrow{Distribution Files, Copyright Holder, License}
+@row{\c src/support/power8/*, Anton Blanchard, <a href="http://opensource.org/licenses/Apache-2.0">Apache License\, Version 2.0</a> or the <a href="http://www.gnu.org/licenses/gpl-2.0-standalone.html">GNU General Public License\, version 2 or later</a>}
+</table>
+
@section license_leveldb 3rd party software optionally included in the WiredTiger library: LevelDB
If the \c --enable-leveldb configuration option is specified when
configuring the WiredTiger build, additional 3rd party software is
-included in the WiredTiger LevelDB library binary, distributed under
-their license terms. Redistribution of the WiredTiger library built
-with the \c --enable-leveldb configuration option should comply with
-these copyrights.
+included in the WiredTiger library binary, distributed under separate
+license terms. Redistribution of the WiredTiger library built with the
+\c --enable-leveldb configuration option should comply with these
+copyrights.
<table>
@hrow{Distribution Files, Copyright Holder, License}
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index 80597302cbb..ac71214f8b1 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -7,11 +7,13 @@ Atomicity
BLOBs
CFLAGS
CPPFLAGS
+CRC
Cheng
Christoph
Collet's
Coverity
Coverity's
+crc
DB's
DBTs
Datastore
@@ -64,6 +66,7 @@ NOTFOUND
NUMA
NoSQL
OPTYPE
+PPC
PRELOAD
README
Rebalance
@@ -78,6 +81,7 @@ Seward's
SiH
TXT
URIs
+vpmsum
WiredTiger
WiredTiger's
WiredTigerCheckpoint
diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox
index e4d85003a1e..df0a22ba0fe 100644
--- a/src/third_party/wiredtiger/src/docs/upgrading.dox
+++ b/src/third_party/wiredtiger/src/docs/upgrading.dox
@@ -4,25 +4,24 @@
<dl>
<dt>Column-store bulk-load cursors</dt>
<dd>
-Historically, bulk-load of a column-store object ignored any key set in
-the cursor and automatically assigned each inserted row the next
-sequential record number for its key. In the 2.7.1 release, column-store
-objects match row-store behavior and require the cursor key be set
-before an insert. (This also allows allows sparse tables to be created
-in column-store objects, any skipped records are created as
-already-deleted rows.) To match the previous behavior, specify the
-\c append configuration string when opening the column-store bulk-load
-cursor; this causes the cursor's key to be ignored and each inserted row
-will be assigned the next record number.
+Historically, bulk-load of a column-store object ignored any key set in the
+cursor and automatically assigned each inserted row the next sequential
+record number for its key. In the 2.7.1 release, column-store objects match
+row-store behavior and require the cursor key be set before an insert.
+(This allows sparse tables to be created in column-store objects, any
+skipped records are created as already-deleted rows.) To match the previous
+behavior, specify the \c append configuration string when opening the
+column-store bulk-load cursor; this causes the cursor's key to be ignored
+and each inserted row will be assigned the next record number.
</dd>
<dt>Change to WT_SESSION::truncate with URI</dt>
<dd>
If using the WT_SESSION::truncate API with a file: URI for a full table
-truncate, underlying algorithmic changes result in some visible differences.
-This call can now return WT_ROLLBACK. Applications should be prepared to
-handle this error. This method no longer requires exclusive access to the
-table. Also the underlying disk space may not be immediately
+truncate, underlying algorithmic changes result in some visible
+differences. This call can now return WT_ROLLBACK. Applications should be
+prepared to handle this error. This method no longer requires exclusive
+access to the table. Also the underlying disk space may not be immediately
reclaimed when the call returns. The performance of this API may differ
from earlier releases.
</dd>
@@ -34,6 +33,14 @@ from the WiredTiger release; remaining compression engines include LZ4,
snappy and zlib.
</dd>
+<dt>Change to named checkpoints with bulk loads</dt>
+<dd>
+Previous versions of WiredTiger created empty named checkpoints in files
+being bulk-loaded. In this release, checkpoint skips files being
+bulk-loaded, so they do not get named checkpoints that complete during the
+bulk load.
+</dd>
+
</dl><hr>
@section version_270 Upgrading to Version 2.7.0
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 0536a06bc22..8ef7164dbc6 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -1209,7 +1209,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
uint64_t pages_walked;
uint32_t walk_flags;
int internal_pages, restarts;
- bool enough, modified, would_split;
+ bool enough, modified;
conn = S2C(session);
btree = S2BT(session);
@@ -1298,7 +1298,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
page->read_gen = __wt_cache_read_gen_new(session);
fast: /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, ref, &would_split))
+ if (!__wt_page_can_evict(session, ref, NULL))
continue;
/*
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 94111397abd..6df7f87073f 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1149,7 +1149,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
* parent frees the backing blocks for any no-longer-used overflow keys,
* which will corrupt the checkpoint's block management.
*/
- if (btree->checkpointing &&
+ if (btree->checkpointing != WT_CKPT_OFF &&
F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS))
return (false);
@@ -1294,19 +1294,19 @@ __wt_page_swap_func(
bool acquired;
/*
- * In rare cases when walking the tree, we try to swap to the same
- * page. Fast-path that to avoid thinking about error handling.
- */
- if (held == want)
- return (0);
-
- /*
* This function is here to simplify the error handling during hazard
* pointer coupling so we never leave a hazard pointer dangling. The
* assumption is we're holding a hazard pointer on "held", and want to
* acquire a hazard pointer on "want", releasing the hazard pointer on
* "held" when we're done.
+ *
+ * When walking the tree, we sometimes swap to the same page. Fast-path
+ * that to avoid thinking about error handling.
*/
+ if (held == want)
+ return (0);
+
+ /* Get the wanted page. */
ret = __wt_page_in_func(session, want, flags
#ifdef HAVE_DIAGNOSTIC
, file, line
@@ -1446,15 +1446,19 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize)
}
/*
- * __wt_split_intl_race --
+ * __wt_split_descent_race --
* Return if we raced with an internal page split when descending the tree.
*/
static inline bool
-__wt_split_intl_race(
- WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE_INDEX *saved_pindex)
+__wt_split_descent_race(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex)
{
WT_PAGE_INDEX *pindex;
+ /* No test when starting the descent (there's no home to check). */
+ if (__wt_ref_is_root(ref))
+ return (false);
+
/*
* A place to hang this comment...
*
@@ -1509,6 +1513,6 @@ __wt_split_intl_race(
* content the split page retains after the split, and we ignore this
* race.
*/
- WT_INTL_INDEX_GET(session, parent, pindex);
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
return (pindex != saved_pindex);
}
diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i
index 9f3e2101f6f..d64e68420a5 100644
--- a/src/third_party/wiredtiger/src/include/column.i
+++ b/src/third_party/wiredtiger/src/include/column.i
@@ -11,13 +11,13 @@
* Search a column-store insert list for the next larger record.
*/
static inline WT_INSERT *
-__col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_gt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT *ins, **insp;
int i;
/* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path check for targets past the end of the skiplist. */
@@ -29,7 +29,7 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* go as far as possible at each level before stepping down to the next.
*/
ins = NULL;
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;)
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;)
if (*insp != NULL && recno >= WT_INSERT_RECNO(*insp)) {
ins = *insp; /* GTE: keep going at this level */
insp = &(*insp)->next[i];
@@ -50,7 +50,7 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* such a record exists before searching.
*/
if (ins == NULL)
- ins = WT_SKIP_FIRST(inshead);
+ ins = WT_SKIP_FIRST(ins_head);
while (recno >= WT_INSERT_RECNO(ins))
ins = WT_SKIP_NEXT(ins);
return (ins);
@@ -61,13 +61,13 @@ __col_insert_search_gt(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list for the next smaller record.
*/
static inline WT_INSERT *
-__col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_lt(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT *ins, **insp;
int i;
/* If there's no insert chain to search, we're done. */
- if ((ins = WT_SKIP_FIRST(inshead)) == NULL)
+ if ((ins = WT_SKIP_FIRST(ins_head)) == NULL)
return (NULL);
/* Fast path check for targets before the skiplist. */
@@ -78,7 +78,7 @@ __col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0;)
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0;)
if (*insp != NULL && recno > WT_INSERT_RECNO(*insp)) {
ins = *insp; /* GT: keep going at this level */
insp = &(*insp)->next[i];
@@ -95,14 +95,14 @@ __col_insert_search_lt(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list for an exact match.
*/
static inline WT_INSERT *
-__col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
+__col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno)
{
WT_INSERT **insp, *ret_ins;
uint64_t ins_recno;
int cmp, i;
/* If there's no insert chain to search, we're done. */
- if ((ret_ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path the check for values at the end of the skiplist. */
@@ -115,7 +115,7 @@ __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0; ) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
if (*insp == NULL) {
--i;
--insp;
@@ -143,7 +143,7 @@ __col_insert_search_match(WT_INSERT_HEAD *inshead, uint64_t recno)
* Search a column-store insert list, creating a skiplist stack as we go.
*/
static inline WT_INSERT *
-__col_insert_search(WT_INSERT_HEAD *inshead,
+__col_insert_search(WT_INSERT_HEAD *ins_head,
WT_INSERT ***ins_stack, WT_INSERT **next_stack, uint64_t recno)
{
WT_INSERT **insp, *ret_ins;
@@ -151,15 +151,15 @@ __col_insert_search(WT_INSERT_HEAD *inshead,
int cmp, i;
/* If there's no insert chain to search, we're done. */
- if ((ret_ins = WT_SKIP_LAST(inshead)) == NULL)
+ if ((ret_ins = WT_SKIP_LAST(ins_head)) == NULL)
return (NULL);
/* Fast path appends. */
if (recno >= WT_INSERT_RECNO(ret_ins)) {
for (i = 0; i < WT_SKIP_MAXDEPTH; i++) {
ins_stack[i] = (i == 0) ? &ret_ins->next[0] :
- (inshead->tail[i] != NULL) ?
- &inshead->tail[i]->next[i] : &inshead->head[i];
+ (ins_head->tail[i] != NULL) ?
+ &ins_head->tail[i]->next[i] : &ins_head->head[i];
next_stack[i] = NULL;
}
return (ret_ins);
@@ -169,7 +169,7 @@ __col_insert_search(WT_INSERT_HEAD *inshead,
* The insert list is a skip list: start at the highest skip level, then
* go as far as possible at each level before stepping down to the next.
*/
- for (i = WT_SKIP_MAXDEPTH - 1, insp = &inshead->head[i]; i >= 0; ) {
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i]; i >= 0; ) {
if ((ret_ins = *insp) == NULL) {
next_stack[i] = NULL;
ins_stack[i--] = insp--;
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 88797e83ad6..b0edcef718b 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -175,6 +175,7 @@ struct __wt_connection_impl {
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
+ WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_SPINLOCK table_lock; /* Table creation spinlock */
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 7f7b5dceb79..f9bd20c8ba1 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -289,7 +289,8 @@ struct __wt_cursor_join_iter {
WT_CURSOR_JOIN_ENTRY *entry;
WT_CURSOR *cursor;
WT_ITEM *curkey;
- bool advance;
+ bool positioned;
+ bool isequal; /* advancing means we're done */
};
struct __wt_cursor_join_endpoint {
@@ -302,14 +303,17 @@ struct __wt_cursor_join_endpoint {
#define WT_CURJOIN_END_GT 0x04 /* include values > cursor */
#define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)
#define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ)
-#define WT_CURJOIN_END_OWN_KEY 0x08 /* must free key's data */
uint8_t flags; /* range for this endpoint */
};
+#define WT_CURJOIN_END_RANGE(endp) \
+ ((endp)->flags & \
+ (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | WT_CURJOIN_END_LT))
struct __wt_cursor_join_entry {
WT_INDEX *index;
WT_CURSOR *main; /* raw main table cursor */
WT_BLOOM *bloom; /* Bloom filter handle */
+ char *repack_format; /* target format for repack */
uint32_t bloom_bit_count; /* bits per item in bloom */
uint32_t bloom_hash_count; /* hash functions in bloom */
uint64_t count; /* approx number of matches */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 1999ff6b732..07b4adfe698 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -190,7 +190,7 @@ extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int s
extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep);
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
-extern int __wt_search_insert( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key);
+extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key);
extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert);
extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
@@ -253,8 +253,6 @@ extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, co
extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags);
extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, bool apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
-extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
-extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force);
extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force);
extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
@@ -553,7 +551,7 @@ extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *sizep, const char
extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, ...);
extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, ...);
extern int __wt_struct_unpack_size(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, size_t *resultp);
-extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf, void **reallocp);
+extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf);
extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell);
extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index 24fae4abccd..e610c07f432 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -55,20 +55,21 @@
#define WT_SESSION_INTERNAL 0x00000004
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008
#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000010
-#define WT_SESSION_LOCKED_SCHEMA 0x00000020
-#define WT_SESSION_LOCKED_SLOT 0x00000040
-#define WT_SESSION_LOCKED_TABLE 0x00000080
-#define WT_SESSION_LOCKED_TURTLE 0x00000100
-#define WT_SESSION_LOCK_NO_WAIT 0x00000200
-#define WT_SESSION_LOGGING_INMEM 0x00000400
-#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800
-#define WT_SESSION_NO_CACHE 0x00001000
-#define WT_SESSION_NO_DATA_HANDLES 0x00002000
-#define WT_SESSION_NO_EVICTION 0x00004000
-#define WT_SESSION_NO_LOGGING 0x00008000
-#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000
-#define WT_SESSION_SERVER_ASYNC 0x00040000
+#define WT_SESSION_LOCKED_METADATA 0x00000020
+#define WT_SESSION_LOCKED_SCHEMA 0x00000040
+#define WT_SESSION_LOCKED_SLOT 0x00000080
+#define WT_SESSION_LOCKED_TABLE 0x00000100
+#define WT_SESSION_LOCKED_TURTLE 0x00000200
+#define WT_SESSION_LOCK_NO_WAIT 0x00000400
+#define WT_SESSION_LOGGING_INMEM 0x00000800
+#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000
+#define WT_SESSION_NO_CACHE 0x00002000
+#define WT_SESSION_NO_DATA_HANDLES 0x00004000
+#define WT_SESSION_NO_EVICTION 0x00008000
+#define WT_SESSION_NO_LOGGING 0x00010000
+#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
+#define WT_SESSION_SERVER_ASYNC 0x00080000
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index d61022c0c44..ac0f5fedac4 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -21,6 +21,7 @@
#define WT_METADATA_TURTLE_SET "WiredTiger.turtle.set" /* Turtle temp file */
#define WT_METADATA_URI "metadata:" /* Metadata alias */
+#define WT_METAFILE "WiredTiger.wt" /* Metadata table */
#define WT_METAFILE_URI "file:WiredTiger.wt" /* Metadata table URI */
#define WT_LAS_URI "file:WiredTigerLAS.wt" /* Lookaside table URI*/
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index a51030870c1..f93c596e2ca 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -133,6 +133,14 @@ struct __wt_table {
&S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op)
/*
+ * WT_WITH_METADATA_LOCK --
+ * Acquire the metadata lock, perform an operation, drop the lock.
+ */
+#define WT_WITH_METADATA_LOCK(session, ret, op) \
+ WT_WITH_LOCK(session, ret, \
+ &S2C(session)->metadata_lock, WT_SESSION_LOCKED_METADATA, op)
+
+/*
* WT_WITH_SCHEMA_LOCK --
* Acquire the schema lock, perform an operation, drop the lock.
* Check that we are not already holding some other lock: the schema lock
@@ -166,6 +174,8 @@ struct __wt_table {
*/
#define WT_WITHOUT_LOCKS(session, op) do { \
WT_CONNECTION_IMPL *__conn = S2C(session); \
+ bool __checkpoint_locked = \
+ F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
bool __handle_locked = \
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \
bool __table_locked = \
@@ -184,7 +194,15 @@ struct __wt_table {
F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
__wt_spin_unlock(session, &__conn->schema_lock); \
} \
+ if (__checkpoint_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ __wt_spin_unlock(session, &__conn->checkpoint_lock); \
+ } \
op; \
+ if (__checkpoint_locked) { \
+ __wt_spin_lock(session, &__conn->checkpoint_lock); \
+ F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
+ } \
if (__schema_locked) { \
__wt_spin_lock(session, &__conn->schema_lock); \
F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 5c3291230b4..b3c475805a4 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -127,10 +127,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
int (*block_manager_cleanup)(WT_SESSION_IMPL *);
/* Checkpoint support */
- struct {
- WT_DATA_HANDLE *dhandle;
- const char *name;
- } *ckpt_handle; /* Handle list */
+ WT_DATA_HANDLE **ckpt_handle; /* Handle list */
u_int ckpt_handle_next; /* Next empty slot */
size_t ckpt_handle_allocated; /* Bytes allocated */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index d5d81df6785..099bde176f7 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -334,14 +334,27 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/*
* Turn on metadata tracking to ensure the checkpoint gets the
* necessary handle locks.
+ *
+ * Ensure that we don't race with a running checkpoint: the checkpoint
+ * lock protects against us racing with an application checkpoint in
+ * this chunk. Don't wait for it, though: checkpoints can take a long
+ * time, and our checkpoint operation should be very quick.
*/
WT_ERR(__wt_meta_track_on(session));
- WT_WITH_SCHEMA_LOCK(session, ret,
- ret = __wt_schema_worker(
- session, chunk->uri, __wt_checkpoint, NULL, NULL, 0));
+ F_SET(session, WT_SESSION_LOCK_NO_WAIT);
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ ret = __wt_schema_worker(
+ session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)));
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
- if (ret != 0)
+ F_CLR(session, WT_SESSION_LOCK_NO_WAIT);
+ if (ret != 0) {
+ if (ret == EBUSY) {
+ ret = 0;
+ goto err;
+ }
WT_ERR_MSG(session, ret, "LSM checkpoint");
+ }
/* Now the file is written, get the chunk size. */
WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
diff --git a/src/third_party/wiredtiger/src/meta/meta_apply.c b/src/third_party/wiredtiger/src/meta/meta_apply.c
index 92766213b33..7722cd55fbd 100644
--- a/src/third_party/wiredtiger/src/meta/meta_apply.c
+++ b/src/third_party/wiredtiger/src/meta/meta_apply.c
@@ -37,17 +37,15 @@ __meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
* dropping the handle (e.g in LSM when cleaning up obsolete
* chunks). Holding the metadata lock isn't enough.
*/
- ret = __wt_session_get_btree(session, uri, NULL, NULL, 0);
- if (ret == 0) {
- WT_SAVE_DHANDLE(session, ret = func(session, cfg));
- if (WT_META_TRACKING(session))
- WT_TRET(__wt_meta_track_handle_lock(
- session, false));
- else
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY)
- ret = __wt_conn_btree_apply_single(
- session, uri, NULL, func, cfg);
+ if ((ret = __wt_session_get_btree(
+ session, uri, NULL, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
+ WT_SAVE_DHANDLE(session, ret = func(session, cfg));
+ if (WT_META_TRACKING(session))
+ WT_TRET(__wt_meta_track_handle_lock(
+ session, false));
+ else
+ WT_TRET(__wt_session_release_btree(session));
WT_RET(ret);
}
WT_RET_NOTFOUND_OK(ret);
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 1baab2deae1..a73b7e09d37 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -284,11 +284,12 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
* should be included in the checkpoint.
*/
ckpt_session->txn.id = session->txn.id;
- F_SET(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
- WT_WITH_DHANDLE(ckpt_session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint(ckpt_session, NULL));
- F_CLR(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA);
+ WT_WITH_METADATA_LOCK(session, ret,
+ WT_WITH_DHANDLE(ckpt_session,
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint(ckpt_session, NULL)));
+ F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA);
ckpt_session->txn.id = WT_TXN_NONE;
WT_RET(ret);
WT_WITH_DHANDLE(session,
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 7182bb0fe5f..3d27f0b5845 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -153,10 +153,11 @@ int
__wt_turtle_init(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
- bool exist, exist_incr;
+ bool exist_backup, exist_incr, exist_turtle, load;
char *metaconf;
metaconf = NULL;
+ load = false;
/*
* Discard any turtle setup file left-over from previous runs. This
@@ -179,13 +180,29 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* done.
*/
WT_RET(__wt_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
- WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist));
- if (exist) {
+ WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist_backup));
+ WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist_turtle));
+ if (exist_turtle) {
if (exist_incr)
WT_RET_MSG(session, EINVAL,
"Incremental backup after running recovery "
"is not allowed.");
- } else {
+ /*
+ * If we have a backup file and metadata and turtle files,
+ * we want to recreate the metadata from the backup.
+ */
+ if (exist_backup) {
+ WT_RET(__wt_msg(session, "Both %s and %s exist. "
+ "Recreating metadata from backup.",
+ WT_METADATA_TURTLE, WT_METADATA_BACKUP));
+ WT_RET(__wt_remove_if_exists(session, WT_METAFILE));
+ WT_RET(__wt_remove_if_exists(
+ session, WT_METADATA_TURTLE));
+ load = true;
+ }
+ } else
+ load = true;
+ if (load) {
if (exist_incr)
F_SET(S2C(session), WT_CONN_WAS_BACKUP);
diff --git a/src/third_party/wiredtiger/src/packing/pack_impl.c b/src/third_party/wiredtiger/src/packing/pack_impl.c
index 0e3ed44ba6a..2bd850bfc9a 100644
--- a/src/third_party/wiredtiger/src/packing/pack_impl.c
+++ b/src/third_party/wiredtiger/src/packing/pack_impl.c
@@ -144,70 +144,43 @@ __wt_struct_unpack_size(WT_SESSION_IMPL *session,
*/
int
__wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt,
- const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf, void **reallocp)
+ const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf)
{
WT_DECL_PACK_VALUE(pvin);
WT_DECL_PACK_VALUE(pvout);
WT_DECL_RET;
WT_PACK packin, packout;
const uint8_t *before, *end, *p;
- uint8_t *pout;
- size_t len;
const void *start;
start = NULL;
p = inbuf->data;
end = p + inbuf->size;
- /*
- * Handle this non-contiguous case: 'U' -> 'u' at the end of the buf.
- * The former case has the size embedded before the item, the latter
- * does not.
- */
- if ((len = strlen(outfmt)) > 1 && outfmt[len - 1] == 'u' &&
- strlen(infmt) > len && infmt[len - 1] == 'U') {
- WT_ERR(__wt_realloc(session, NULL, inbuf->size, reallocp));
- pout = *reallocp;
- } else
- pout = NULL;
-
- WT_ERR(__pack_init(session, &packout, outfmt));
- WT_ERR(__pack_init(session, &packin, infmt));
+ WT_RET(__pack_init(session, &packout, outfmt));
+ WT_RET(__pack_init(session, &packin, infmt));
/* Outfmt should complete before infmt */
while ((ret = __pack_next(&packout, &pvout)) == 0) {
if (p >= end)
- WT_ERR(EINVAL);
- WT_ERR(__pack_next(&packin, &pvin));
+ WT_RET(EINVAL);
+ if (pvout.type == 'x' && pvout.size == 0 && pvout.havesize)
+ continue;
+ WT_RET(__pack_next(&packin, &pvin));
before = p;
- WT_ERR(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
- if (pvout.type != pvin.type) {
- if (pvout.type == 'u' && pvin.type == 'U') {
- /* Skip the prefixed size, we don't need it */
- WT_ERR(__wt_struct_unpack_size(session, before,
- (size_t)(end - before), "I", &len));
- before += len;
- } else
- WT_ERR(ENOTSUP);
- }
- if (pout != NULL) {
- memcpy(pout, before, WT_PTRDIFF(p, before));
- pout += p - before;
- } else if (start == NULL)
+ WT_RET(__unpack_read(session, &pvin, &p, (size_t)(end - p)));
+ if (pvout.type != pvin.type)
+ WT_RET(ENOTSUP);
+ if (start == NULL)
start = before;
}
- WT_ERR_NOTFOUND_OK(ret);
+ WT_RET_NOTFOUND_OK(ret);
/* Be paranoid - __pack_write should never overflow. */
WT_ASSERT(session, p <= end);
- if (pout != NULL) {
- outbuf->data = *reallocp;
- outbuf->size = WT_PTRDIFF(pout, *reallocp);
- } else {
- outbuf->data = start;
- outbuf->size = WT_PTRDIFF(p, start);
- }
+ outbuf->data = start;
+ outbuf->size = WT_PTRDIFF(p, start);
-err: return (ret);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index c25d7b5e493..f245ff5d921 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -363,6 +363,17 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_ASSERT(session, __wt_page_is_modified(page));
/*
+ * Reconciliation locks the page for three reasons:
+ * Reconciliation reads the lists of page updates, obsolete updates
+ * cannot be discarded while reconciliation is in progress;
+ * The compaction process reads page modification information, which
+ * reconciliation modifies;
+ * In-memory splits: reconciliation of an internal page cannot handle
+ * a child page splitting during the reconciliation.
+ */
+ WT_RET(__wt_fair_lock(session, &page->page_lock));
+
+ /*
* Check that transaction time always moves forward for a given page.
* If this check fails, reconciliation can free something that a future
* reconciliation will need.
@@ -376,17 +387,6 @@ __wt_reconcile(WT_SESSION_IMPL *session,
session, ref, flags, salvage, &session->reconcile));
r = session->reconcile;
- /*
- * Reconciliation locks the page for three reasons:
- * Reconciliation reads the lists of page updates, obsolete updates
- * cannot be discarded while reconciliation is in progress;
- * The compaction process reads page modification information, which
- * reconciliation modifies;
- * In-memory splits: reconciliation of an internal page cannot handle
- * a child page splitting during the reconciliation.
- */
- WT_RET(__wt_fair_lock(session, &page->page_lock));
-
/* Reconcile the page. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -1313,7 +1313,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
} while (0)
typedef enum {
- WT_CHILD_IGNORE, /* Deleted child: ignore */
+ WT_CHILD_IGNORE, /* Ignored child */
WT_CHILD_MODIFIED, /* Modified child */
WT_CHILD_ORIGINAL, /* Original child */
WT_CHILD_PROXY /* Deleted child: proxy */
@@ -1450,16 +1450,15 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* This function is called when walking an internal page to decide how
- * to handle child pages referenced by the internal page, specifically
- * if the child page is to be merged into its parent.
+ * to handle child pages referenced by the internal page.
*
* Internal pages are reconciled for two reasons: first, when evicting
* an internal page, second by the checkpoint code when writing internal
- * pages. During eviction, the subtree is locked down so all pages
- * should be in the WT_REF_DISK or WT_REF_LOCKED state. During
- * checkpoint, any eviction that might affect our review of an internal
- * page is prohibited, however, as the subtree is not reserved for our
- * exclusive use, there are other page states that must be considered.
+ * pages. During eviction, all pages should be in the WT_REF_DISK or
+ * WT_REF_DELETED state. During checkpoint, eviction that might affect
+ * review of an internal page is prohibited, however, as the subtree is
+ * not reserved for our exclusive use, there are other page states that
+ * must be considered.
*/
for (;; __wt_yield())
switch (r->tested_ref_state = ref->state) {
@@ -1488,15 +1487,14 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* Locked.
*
- * If evicting, the evicted page's subtree, including
- * this child, was selected for eviction by us and the
- * state is stable until we reset it, it's an in-memory
- * state. This is the expected state for a child being
- * merged into a page (where the page was selected by
- * the eviction server for eviction).
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- if (F_ISSET(r, WT_EVICTING))
- goto in_memory;
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
/*
* If called during checkpoint, the child is being
@@ -1514,24 +1512,21 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* In memory.
*
- * If evicting, the evicted page's subtree, including
- * this child, was selected for eviction by us and the
- * state is stable until we reset it, it's an in-memory
- * state. This is the expected state for a child being
- * merged into a page (where the page belongs to a file
- * being discarded from the cache during close).
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- if (F_ISSET(r, WT_EVICTING))
- goto in_memory;
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
/*
* If called during checkpoint, acquire a hazard pointer
* so the child isn't evicted, it's an in-memory case.
*
- * This call cannot return split/restart, eviction of
- * pages that split into their parent is shutout during
- * checkpoint, all splits in process will have completed
- * before we walk any pages for checkpoint.
+ * This call cannot return split/restart, we have a lock
+ * on the parent which prevents a child page split.
*/
ret = __wt_page_in(session, ref,
WT_READ_CACHE | WT_READ_NO_EVICT |
@@ -1548,29 +1543,31 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* Being read, not modified by definition.
*
- * We should never be here during eviction, a child page
- * in this state within an evicted page's subtree would
- * have caused normally eviction to fail, and exclusive
- * eviction shouldn't ever see pages being read.
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*/
- WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ if (F_ISSET(r, WT_EVICTING)) {
+ WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+ return (EBUSY);
+ }
goto done;
case WT_REF_SPLIT:
/*
* The page was split out from under us.
*
- * We should never be here during eviction, a child page
- * in this state within an evicted page's subtree would
- * have caused eviction to fail.
+ * We should never be here during eviction, active child
+ * pages in an evicted page's subtree fails the eviction
+ * attempt.
*
* We should never be here during checkpoint, dirty page
* eviction is shutout during checkpoint, all splits in
* process will have completed before we walk any pages
* for checkpoint.
*/
- WT_ASSERT(session, ref->state != WT_REF_SPLIT);
- /* FALLTHROUGH */
+ WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
+ return (EBUSY);
WT_ILLEGAL_VALUE(session);
}
@@ -1581,11 +1578,21 @@ in_memory:
* modify structure has been instantiated. If the modify structure
* exists and the page has actually been modified, set that state.
* If that's not the case, we would normally use the original cell's
- * disk address as our reference, but, if we're forced to instantiate
- * a deleted child page and it's never modified, we end up here with
- * a page that has a modify structure, no modifications, and no disk
- * address. Ignore those pages, they're not modified and there is no
- * reason to write the cell.
+ * disk address as our reference, however there are two special cases,
+ * both flagged by a missing block address.
+ *
+ * First, if forced to instantiate a deleted child page and it's never
+ * modified, we end up here with a page that has a modify structure, no
+ * modifications, and no disk address. Ignore those pages, they're not
+ * modified and there is no reason to write the cell.
+ *
+ * Second, insert splits are permitted during checkpoint. When doing the
+ * final checkpoint pass, we first walk the internal page's page-index
+ * and write out any dirty pages we find, then we write out the internal
+ * page in post-order traversal. If we found the split page in the first
+ * step, it will have an address; if we didn't find the split page in
+ * the first step, it won't have an address and we ignore it, it's not
+ * part of the checkpoint.
*/
mod = ref->page->modify;
if (mod != NULL && mod->rec_result != 0)
@@ -3808,7 +3815,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
switch (state) {
case WT_CHILD_IGNORE:
- /* Deleted child we don't have to write. */
+ /* Ignored child. */
WT_CHILD_RELEASE_ERR(session, hazard, ref);
continue;
@@ -3977,7 +3984,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* record 100 moves to another page. When we reconcile
* the original page, we write record 98, then we don't
* see record 99 for whatever reason. If we've moved
- * record 1000, we don't know to write a deleted record
+ * record 100, we don't know to write a deleted record
* 99 on the page.)
*
* The record number recorded during the split is the
@@ -3999,8 +4006,6 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
} else {
WT_RET(
__rec_txn_read(session, r, ins, NULL, NULL, &upd));
- if (upd == NULL)
- continue;
recno = WT_INSERT_RECNO(ins);
}
for (;;) {
@@ -4536,22 +4541,25 @@ compare: /*
* record 100 moves to another page. When we reconcile
* the original page, we write record 98, then we don't
* see record 99 for whatever reason. If we've moved
- * record 1000, we don't know to write a deleted record
+ * record 100, we don't know to write a deleted record
* 99 on the page.)
*
+ * Assert the recorded record number is past the end of
+ * the page.
+ *
* The record number recorded during the split is the
* first key on the split page, that is, one larger than
* the last key on this page, we have to decrement it.
*/
if ((n = page->modify->mod_split_recno) == WT_RECNO_OOB)
break;
+ WT_ASSERT(session, n >= src_recno);
n -= 1;
+
upd = NULL;
} else {
WT_ERR(
__rec_txn_read(session, r, ins, NULL, NULL, &upd));
- if (upd == NULL)
- continue;
n = WT_INSERT_RECNO(ins);
}
while (src_recno <= n) {
@@ -4734,10 +4742,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
switch (state) {
case WT_CHILD_IGNORE:
/*
- * Deleted child we don't have to write.
+ * Ignored child.
*
- * Overflow keys referencing discarded pages are no
- * longer useful, schedule them for discard. Don't
+ * Overflow keys referencing pages we're not writing are
+ * no longer useful, schedule them for discard. Don't
* worry about instantiation, internal page keys are
* always instantiated. Don't worry about reuse,
* reusing this key in this reconciliation is unlikely.
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index 25bbd496798..9b3b76b62de 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -275,15 +275,11 @@ __create_colgroup(WT_SESSION_IMPL *session,
WT_ERR(__wt_schema_create(session, source, sourceconf));
WT_ERR(__wt_config_collapse(session, cfg, &cgconf));
- if (exists) {
- if (strcmp(cgconf, origconf) != 0)
- WT_ERR_MSG(session, EINVAL,
- "%s: does not match existing configuration", name);
- goto err;
- }
- WT_ERR(__wt_metadata_insert(session, name, cgconf));
- WT_ERR(__wt_schema_open_colgroups(session, table));
+ if (!exists) {
+ WT_ERR(__wt_metadata_insert(session, name, cgconf));
+ WT_ERR(__wt_schema_open_colgroups(session, table));
+ }
err: __wt_free(session, cgconf);
__wt_free(session, sourceconf);
@@ -539,20 +535,17 @@ __create_index(WT_SESSION_IMPL *session,
cfg[1] = sourceconf;
cfg[2] = confbuf.data;
WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
- if (exists) {
- if (strcmp(idxconf, origconf) != 0)
- WT_ERR_MSG(session, EINVAL,
- "%s: does not match existing configuration", name);
- goto err;
- }
- WT_ERR(__wt_metadata_insert(session, name, idxconf));
- /* Make sure that the configuration is valid. */
- WT_ERR(__wt_schema_open_index(
- session, table, idxname, strlen(idxname), &idx));
+ if (!exists) {
+ WT_ERR(__wt_metadata_insert(session, name, idxconf));
- /* If there is data in the table, fill the index. */
- WT_ERR(__fill_index(session, table, idx));
+ /* Make sure that the configuration is valid. */
+ WT_ERR(__wt_schema_open_index(
+ session, table, idxname, strlen(idxname), &idx));
+
+ /* If there is data in the table, fill the index. */
+ WT_ERR(__fill_index(session, table, idx));
+ }
err: __wt_free(session, idxconf);
__wt_free(session, origconf);
@@ -612,23 +605,21 @@ __create_table(WT_SESSION_IMPL *session,
WT_ERR_NOTFOUND_OK(ret);
WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
- if (exists) {
- if (strcmp(tableconf, table->config) != 0)
- WT_ERR_MSG(session, EINVAL,
- "%s: does not match existing configuration", name);
- goto err;
- }
- WT_ERR(__wt_metadata_insert(session, name, tableconf));
- /* Attempt to open the table now to catch any errors. */
- WT_ERR(__wt_schema_get_table(
- session, tablename, strlen(tablename), true, &table));
+ if (!exists) {
+ WT_ERR(__wt_metadata_insert(session, name, tableconf));
+
+ /* Attempt to open the table now to catch any errors. */
+ WT_ERR(__wt_schema_get_table(
+ session, tablename, strlen(tablename), true, &table));
- if (ncolgroups == 0) {
- cgsize = strlen("colgroup:") + strlen(tablename) + 1;
- WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
- snprintf(cgname, cgsize, "colgroup:%s", tablename);
- WT_ERR(__create_colgroup(session, cgname, exclusive, config));
+ if (ncolgroups == 0) {
+ cgsize = strlen("colgroup:") + strlen(tablename) + 1;
+ WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
+ snprintf(cgname, cgsize, "colgroup:%s", tablename);
+ WT_ERR(__create_colgroup(
+ session, cgname, exclusive, config));
+ }
}
if (0) {
diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c
index b5ee3bb7f7d..e60a7107786 100644
--- a/src/third_party/wiredtiger/src/schema/schema_worker.c
+++ b/src/third_party/wiredtiger/src/schema/schema_worker.c
@@ -55,18 +55,11 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
WT_ERR(ret);
}
- if ((ret = __wt_session_get_btree_ckpt(
- session, uri, cfg, open_flags)) == 0) {
- WT_SAVE_DHANDLE(session,
- ret = file_func(session, cfg));
- WT_TRET(__wt_session_release_btree(session));
- } else if (ret == EBUSY) {
- WT_ASSERT(session, !FLD_ISSET(
- open_flags, WT_DHANDLE_EXCLUSIVE));
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_conn_btree_apply_single_ckpt(
- session, uri, file_func, cfg));
- }
+ WT_ERR(__wt_session_get_btree_ckpt(
+ session, uri, cfg, open_flags));
+ WT_SAVE_DHANDLE(session,
+ ret = file_func(session, cfg));
+ WT_TRET(__wt_session_release_btree(session));
WT_ERR(ret);
}
} else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index c03b5fdc044..5511674dc5e 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -597,9 +597,10 @@ __session_rename(WT_SESSION *wt_session,
WT_ERR(__wt_str_name_check(session, uri));
WT_ERR(__wt_str_name_check(session, newuri));
- WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_TABLE_LOCK(session, ret,
- ret = __wt_schema_rename(session, uri, newuri, cfg)));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ WT_WITH_TABLE_LOCK(session, ret,
+ ret = __wt_schema_rename(session, uri, newuri, cfg))));
err: API_END_RET_NOTFOUND_MAP(session, ret);
}
@@ -646,9 +647,10 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
if (!lock_wait)
F_SET(session, WT_SESSION_LOCK_NO_WAIT);
- WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_TABLE_LOCK(session, ret,
- ret = __wt_schema_drop(session, uri, cfg)));
+ WT_WITH_CHECKPOINT_LOCK(session, ret,
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ WT_WITH_TABLE_LOCK(session, ret,
+ ret = __wt_schema_drop(session, uri, cfg))));
if (!lock_wait)
F_CLR(session, WT_SESSION_LOCK_NO_WAIT);
diff --git a/src/third_party/wiredtiger/src/support/cksum.c b/src/third_party/wiredtiger/src/support/cksum.c
index c2982c40015..0b086753406 100644
--- a/src/third_party/wiredtiger/src/support/cksum.c
+++ b/src/third_party/wiredtiger/src/support/cksum.c
@@ -1260,6 +1260,23 @@ __wt_cksum_hw(const void *chunk, size_t len)
}
#endif
+#if defined(__powerpc64__)
+
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p,
+ unsigned long len);
+
+/*
+ * __wt_cksum_hw --
+ * Return a checksum for a chunk of memory, computed in hardware
+ * using 8 byte steps.
+ */
+static uint32_t
+__wt_cksum_hw(const void *chunk, size_t len)
+{
+ return crc32_vpmsum(0, chunk, len);
+}
+#endif
+
/*
* __wt_cksum --
* Return a checksum for a chunk of memory using the fastest method
@@ -1302,6 +1319,8 @@ __wt_cksum_init(void)
__wt_cksum_func = __wt_cksum_hw;
else
__wt_cksum_func = __wt_cksum_sw;
+#elif defined(__powerpc64__)
+ __wt_cksum_func = __wt_cksum_hw;
#else
__wt_cksum_func = __wt_cksum_sw;
#endif
diff --git a/src/third_party/wiredtiger/src/support/power8/LICENSE.TXT b/src/third_party/wiredtiger/src/support/power8/LICENSE.TXT
new file mode 100644
index 00000000000..2f4bb91f574
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/LICENSE.TXT
@@ -0,0 +1,476 @@
+Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+
+crc32-vpmsum is free software; you can redistribute it and/or
+modify it under the terms of either:
+
+ a) the GNU General Public License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version., or
+ b) the Apache License, Version 2.0
+
+
+
+
+
+
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+
+
+
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
diff --git a/src/third_party/wiredtiger/src/support/power8/README.md b/src/third_party/wiredtiger/src/support/power8/README.md
new file mode 100644
index 00000000000..3e2976650cd
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/README.md
@@ -0,0 +1,208 @@
+crc32-vpmsum
+============
+
+A set of examples for accelerating CRC32 calculations using the vector
+polynomial multiply sum (vpmsum) instructions introduced in POWER8. These
+instructions implement byte, halfword, word and doubleword carryless
+multiply/add.
+
+Performance
+-----------
+
+An implementation of slice-by-8, one of the fastest lookup table methods
+is included so we can compare performance against it. Testing 5000000
+iterations of a CRC of 32 kB of data (to keep it L1 cache contained):
+
+```
+# time slice_by_8_bench 32768 5000000
+122.220 seconds
+
+# time crc32_bench 32768 5000000
+2.937 seconds
+```
+
+The vpmsum accelerated CRC is just over 41x faster.
+
+This test was run on a 4.1 GHz POWER8, so the algorithm sustains about
+52 GiB/sec or 13.6 bytes/cycle. The theoretical limit is 16 bytes/cycle
+since we can execute a maximum of one vpmsum instruction per cycle.
+
+In another test, a version was added to the kernel and btrfs write
+performance was shown to be 3.8x faster. The test was done to a ramdisk
+to mitigate any I/O induced variability.
+
+Quick start
+-----------
+
+- Modify CRC and OPTIONS in the Makefile. There are examples for the two most
+ common crc32s.
+
+- Type make to create the constants (crc32_constants.h)
+
+- Import the code into your application (crc32.S crc32_wrapper.c
+ crc32_constants.h ppc-opcode.h) and call the CRC:
+
+```
+unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p, unsigned long len);
+```
+
+CRC background
+--------------
+
+For a good background on CRCs, check out:
+
+http://www.ross.net/crc/download/crc_v3.txt
+
+A few key points:
+
+- A CRC is the remainder after dividing a message by the CRC polynomial,
+ ie M mod CRC_POLY
+- multiply/divide is carryless
+- add/subtract is an xor
+- n (where n is the order of the CRC) bits of zeroes are appended to the
+ end of the message.
+
+One more important piece of information - a CRC is a linear function, so:
+
+```
+ CRC(A xor B) = CRC(A) xor CRC(B)
+
+ CRC(A . B) = CRC(A) . CRC(B) (remember this is carryless multiply)
+```
+
+If we take 64bits of data, represented by two 32 bit chunks (AAAAAAAA
+and BBBBBBBB):
+
+```
+CRC(AAAAAAAABBBBBBBB)
+ = CRC(AAAAAAAA00000000 xor BBBBBBBB)
+ = CRC(AAAAAAAA00000000) xor CRC(BBBBBBBB)
+```
+
+If we operate on AAAAAAAA:
+
+```
+CRC(AAAAAAAA00000000)
+ = CRC(AAAAAAAA . 100000000)
+ = CRC(AAAAAAAA) . CRC(100000000)
+```
+
+And CRC(100000000) is a constant which we can pre-calculate:
+
+```
+CRC(100000000)
+ = 100000000 mod CRC_POLY
+ = 2^32 mod CRC_POLY
+```
+
+Finally we can add our modified AAAAAAAA to BBBBBBBB:
+
+```
+CRC(AAAAAAAABBBBBBBB)
+ = ((2^32 mod CRC_POLY) . CRC(AAAAAAAA)) xor CRC(BBBBBBBB)
+```
+
+In other words, with the right constants pre-calculated we can shift the
+input data around and we can also calculate the CRC in as many parallel
+chunks as we want.
+
+No matter how much shifting we do, the final result will be be 64 bits of
+data (63 actually, because there is no carry into the top bit). To reduce
+it further we need a another trick, and that is Barrett reduction:
+
+http://en.wikipedia.org/wiki/Barrett_reduction
+
+Barrett reduction is a method of calculating a mod n. The idea is to
+calculate q, the multiple of our polynomial that we need to subtract. By
+doing the computation 2x bits higher (ie 64 bits) and shifting the
+result back down 2x bits, we round down to the nearest multiple.
+
+```
+ k = 32
+ m = floor((4^k)/n) = floor((4^32))/n)
+ n = 64 bits of data
+ a = 32 bit CRC
+
+ q = floor(ma/(2^64))
+ result = a - qn
+```
+
+An example in the floating point domain makes it clearer how this works:
+
+```
+a mod n = a - floor(am) * n
+```
+
+Let's use it to calculate 22 mod 10:
+
+```
+ a = 22
+ n = 10
+ m = 1/n = 1/10 = 0.1
+
+22 mod 10
+ = 22 - floor(22*0.1) * 10
+ = 22 - 2 * 10
+ = 22 - 20
+ = 2
+```
+
+There is one more issue left - bit reflection. Some CRCs are defined to
+operate on the least significant bit first (eg CRC32c). Lets look at
+how this would get laid out in a register, and lets simplify it to just
+two bytes (vs a 16 byte VMX register):
+
+ [ 8..15 ] [ 0..7 ]
+
+Notice how the bits and bytes are out of order. Since we are doing
+multi word multiplication on these values we need them to both be
+in order.
+
+The simplest way to fix this is to reflect the bits in each byte:
+
+ [ 15..8 ] [ 7..0 ]
+
+However shuffling bits in a byte is expensive on most CPUs. It is
+however relatively cheap to shuffle bytes around. What if we load
+the bytes in reversed:
+
+ [ 0..7 ] [ 8..15 ]
+
+Now the bits and bytes are in order, except the least significant bit
+of the register is now on the left and the most significant bit is on the
+right. We operate as if the register is reflected, which normally we
+cannot do. The reason we get away with this is our multiplies are carryless
+and our addition and subtraction is xor, so our operations never create
+carries.
+
+The only trick is we have to shift the result of multiplies left one
+because the high bit of the multiply is always 0, and we want that high bit
+on the right not the left.
+
+Implementation
+--------------
+
+The vpmsum instructions on POWER8 have a 6 cycle latency and we can
+execute one every cycle. In light of this the main loop has 8 parallel
+streams which consume 8 x 16 B each iteration. At the completion of this
+loop we have taken 32 kB of data and reduced it to 8 x 16 B (128 B).
+
+The next step is to take this 128 B and reduce it to 8 B. At this stage
+we also add 32 bits of 0 to the end.
+
+We then apply Barrett reduction to get our CRC.
+
+Examples
+--------
+- barrett_reduction: An example of Barrett reduction
+
+- final_fold: Starting with 128 bits, add 32 bits of zeros and reduce it to
+ 64 bits, then apply Barrett reduction
+
+- final_fold2: A second method of reduction
+
+Acknowledgements
+----------------
+
+Thanks to Michael Gschwind, Jeff Derby, Lorena Pesantez and Stewart Smith
+for their ideas and assistance.
diff --git a/src/third_party/wiredtiger/src/support/power8/crc32.S b/src/third_party/wiredtiger/src/support/power8/crc32.S
new file mode 100644
index 00000000000..4bc1fad416d
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/crc32.S
@@ -0,0 +1,741 @@
+#if defined(__powerpc64__)
+/*
+ * Calculate the checksum of data that is 16 byte aligned and a multiple of
+ * 16 bytes.
+ *
+ * The first step is to reduce it to 1024 bits. We do this in 8 parallel
+ * chunks in order to mask the latency of the vpmsum instructions. If we
+ * have more than 32 kB of data to checksum we repeat this step multiple
+ * times, passing in the previous 1024 bits.
+ *
+ * The next step is to reduce the 1024 bits to 64 bits. This step adds
+ * 32 bits of 0s to the end - this matches what a CRC does. We just
+ * calculate constants that land the data in this 32 bits.
+ *
+ * We then use fixed point Barrett reduction to compute a mod n over GF(2)
+ * for n = CRC using POWER8 instructions. We use x = 32.
+ *
+ * http://en.wikipedia.org/wiki/Barrett_reduction
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <ppc-asm.h>
+#include "ppc-opcode.h"
+
+#undef toc
+
+#ifndef r1
+#define r1 1
+#endif
+
+#ifndef r2
+#define r2 2
+#endif
+
+ .section .rodata
+.balign 16
+
+.byteswap_constant:
+ /* byte reverse permute constant */
+ .octa 0x0F0E0D0C0B0A09080706050403020100
+
+#define __ASSEMBLY__
+#include "crc32_constants.h"
+
+ .text
+
+#if defined(__BIG_ENDIAN__) && defined(REFLECT)
+#define BYTESWAP_DATA
+#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
+#define BYTESWAP_DATA
+#else
+#undef BYTESWAP_DATA
+#endif
+
+#define off16 r25
+#define off32 r26
+#define off48 r27
+#define off64 r28
+#define off80 r29
+#define off96 r30
+#define off112 r31
+
+#define const1 v25
+#define const2 v26
+
+#define byteswap v27
+#define mask_32bit v28
+#define mask_64bit v29
+#define zeroes v30
+#define ones v31
+
+#ifdef BYTESWAP_DATA
+#define VPERM(A, B, C, D) vperm A, B, C, D
+#else
+#define VPERM(A, B, C, D)
+#endif
+
+/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
+FUNC_START(__crc32_vpmsum)
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+ std r26,-48(r1)
+ std r25,-56(r1)
+
+ li off16,16
+ li off32,32
+ li off48,48
+ li off64,64
+ li off80,80
+ li off96,96
+ li off112,112
+ li r0,0
+
+ mr r10,r3
+
+ vxor zeroes,zeroes,zeroes
+ vspltisw ones,-1
+
+ vsldoi mask_32bit,zeroes,ones,4
+ vsldoi mask_64bit,zeroes,ones,8
+
+ /* Get the initial value into v8 */
+ vxor v8,v8,v8
+ MTVRD(v8, r3)
+#ifdef REFLECT
+ vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
+#else
+ vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
+#endif
+
+#ifdef BYTESWAP_DATA
+ addis r3,r2,.byteswap_constant@toc@ha
+ addi r3,r3,.byteswap_constant@toc@l
+
+ lvx byteswap,0,r3
+ addi r3,r3,16
+#endif
+
+ cmpdi r5,256
+ blt .Lshort
+
+ rldicr r6,r5,0,56
+
+ /* Checksum in blocks of MAX_SIZE */
+1: lis r7,MAX_SIZE@h
+ ori r7,r7,MAX_SIZE@l
+ mr r9,r7
+ cmpd r6,r7
+ bgt 2f
+ mr r7,r6
+2: subf r6,r7,r6
+
+ /* our main loop does 128 bytes at a time */
+ srdi r7,r7,7
+
+ /*
+ * Work out the offset into the constants table to start at. Each
+ * constant is 16 bytes, and it is used against 128 bytes of input
+ * data - 128 / 16 = 8
+ */
+ sldi r8,r7,4
+ srdi r9,r9,3
+ subf r8,r8,r9
+
+ /* We reduce our final 128 bytes in a separate step */
+ addi r7,r7,-1
+ mtctr r7
+
+ addis r3,r2,.constants@toc@ha
+ addi r3,r3,.constants@toc@l
+
+ /* Find the start of our constants */
+ add r3,r3,r8
+
+ /* zero v0-v7 which will contain our checksums */
+ vxor v0,v0,v0
+ vxor v1,v1,v1
+ vxor v2,v2,v2
+ vxor v3,v3,v3
+ vxor v4,v4,v4
+ vxor v5,v5,v5
+ vxor v6,v6,v6
+ vxor v7,v7,v7
+
+ lvx const1,0,r3
+
+ /*
+ * If we are looping back to consume more data we use the values
+ * already in v16-v23.
+ */
+ cmpdi r0,1
+ beq 2f
+
+ /* First warm up pass */
+ lvx v16,0,r4
+ lvx v17,off16,r4
+ VPERM(v16,v16,v16,byteswap)
+ VPERM(v17,v17,v17,byteswap)
+ lvx v18,off32,r4
+ lvx v19,off48,r4
+ VPERM(v18,v18,v18,byteswap)
+ VPERM(v19,v19,v19,byteswap)
+ lvx v20,off64,r4
+ lvx v21,off80,r4
+ VPERM(v20,v20,v20,byteswap)
+ VPERM(v21,v21,v21,byteswap)
+ lvx v22,off96,r4
+ lvx v23,off112,r4
+ VPERM(v22,v22,v22,byteswap)
+ VPERM(v23,v23,v23,byteswap)
+ addi r4,r4,8*16
+
+ /* xor in initial value */
+ vxor v16,v16,v8
+
+2: bdz .Lfirst_warm_up_done
+
+ addi r3,r3,16
+ lvx const2,0,r3
+
+ /* Second warm up pass */
+ VPMSUMD(v8,v16,const1)
+ lvx v16,0,r4
+ VPERM(v16,v16,v16,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v9,v17,const1)
+ lvx v17,off16,r4
+ VPERM(v17,v17,v17,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v10,v18,const1)
+ lvx v18,off32,r4
+ VPERM(v18,v18,v18,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v11,v19,const1)
+ lvx v19,off48,r4
+ VPERM(v19,v19,v19,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v12,v20,const1)
+ lvx v20,off64,r4
+ VPERM(v20,v20,v20,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v13,v21,const1)
+ lvx v21,off80,r4
+ VPERM(v21,v21,v21,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v14,v22,const1)
+ lvx v22,off96,r4
+ VPERM(v22,v22,v22,byteswap)
+ ori r2,r2,0
+
+ VPMSUMD(v15,v23,const1)
+ lvx v23,off112,r4
+ VPERM(v23,v23,v23,byteswap)
+
+ addi r4,r4,8*16
+
+ bdz .Lfirst_cool_down
+
+ /*
+ * main loop. We modulo schedule it such that it takes three iterations
+ * to complete - first iteration load, second iteration vpmsum, third
+ * iteration xor.
+ */
+ .balign 16
+4: lvx const1,0,r3
+ addi r3,r3,16
+ ori r2,r2,0
+
+ vxor v0,v0,v8
+ VPMSUMD(v8,v16,const2)
+ lvx v16,0,r4
+ VPERM(v16,v16,v16,byteswap)
+ ori r2,r2,0
+
+ vxor v1,v1,v9
+ VPMSUMD(v9,v17,const2)
+ lvx v17,off16,r4
+ VPERM(v17,v17,v17,byteswap)
+ ori r2,r2,0
+
+ vxor v2,v2,v10
+ VPMSUMD(v10,v18,const2)
+ lvx v18,off32,r4
+ VPERM(v18,v18,v18,byteswap)
+ ori r2,r2,0
+
+ vxor v3,v3,v11
+ VPMSUMD(v11,v19,const2)
+ lvx v19,off48,r4
+ VPERM(v19,v19,v19,byteswap)
+ lvx const2,0,r3
+ ori r2,r2,0
+
+ vxor v4,v4,v12
+ VPMSUMD(v12,v20,const1)
+ lvx v20,off64,r4
+ VPERM(v20,v20,v20,byteswap)
+ ori r2,r2,0
+
+ vxor v5,v5,v13
+ VPMSUMD(v13,v21,const1)
+ lvx v21,off80,r4
+ VPERM(v21,v21,v21,byteswap)
+ ori r2,r2,0
+
+ vxor v6,v6,v14
+ VPMSUMD(v14,v22,const1)
+ lvx v22,off96,r4
+ VPERM(v22,v22,v22,byteswap)
+ ori r2,r2,0
+
+ vxor v7,v7,v15
+ VPMSUMD(v15,v23,const1)
+ lvx v23,off112,r4
+ VPERM(v23,v23,v23,byteswap)
+
+ addi r4,r4,8*16
+
+ bdnz 4b
+
+.Lfirst_cool_down:
+ /* First cool down pass */
+ lvx const1,0,r3
+ addi r3,r3,16
+
+ vxor v0,v0,v8
+ VPMSUMD(v8,v16,const1)
+ ori r2,r2,0
+
+ vxor v1,v1,v9
+ VPMSUMD(v9,v17,const1)
+ ori r2,r2,0
+
+ vxor v2,v2,v10
+ VPMSUMD(v10,v18,const1)
+ ori r2,r2,0
+
+ vxor v3,v3,v11
+ VPMSUMD(v11,v19,const1)
+ ori r2,r2,0
+
+ vxor v4,v4,v12
+ VPMSUMD(v12,v20,const1)
+ ori r2,r2,0
+
+ vxor v5,v5,v13
+ VPMSUMD(v13,v21,const1)
+ ori r2,r2,0
+
+ vxor v6,v6,v14
+ VPMSUMD(v14,v22,const1)
+ ori r2,r2,0
+
+ vxor v7,v7,v15
+ VPMSUMD(v15,v23,const1)
+ ori r2,r2,0
+
+.Lsecond_cool_down:
+ /* Second cool down pass */
+ vxor v0,v0,v8
+ vxor v1,v1,v9
+ vxor v2,v2,v10
+ vxor v3,v3,v11
+ vxor v4,v4,v12
+ vxor v5,v5,v13
+ vxor v6,v6,v14
+ vxor v7,v7,v15
+
+#ifdef REFLECT
+ /*
+ * vpmsumd produces a 96 bit result in the least significant bits
+ * of the register. Since we are bit reflected we have to shift it
+ * left 32 bits so it occupies the least significant bits in the
+ * bit reflected domain.
+ */
+ vsldoi v0,v0,zeroes,4
+ vsldoi v1,v1,zeroes,4
+ vsldoi v2,v2,zeroes,4
+ vsldoi v3,v3,zeroes,4
+ vsldoi v4,v4,zeroes,4
+ vsldoi v5,v5,zeroes,4
+ vsldoi v6,v6,zeroes,4
+ vsldoi v7,v7,zeroes,4
+#endif
+
+ /* xor with last 1024 bits */
+ lvx v8,0,r4
+ lvx v9,off16,r4
+ VPERM(v8,v8,v8,byteswap)
+ VPERM(v9,v9,v9,byteswap)
+ lvx v10,off32,r4
+ lvx v11,off48,r4
+ VPERM(v10,v10,v10,byteswap)
+ VPERM(v11,v11,v11,byteswap)
+ lvx v12,off64,r4
+ lvx v13,off80,r4
+ VPERM(v12,v12,v12,byteswap)
+ VPERM(v13,v13,v13,byteswap)
+ lvx v14,off96,r4
+ lvx v15,off112,r4
+ VPERM(v14,v14,v14,byteswap)
+ VPERM(v15,v15,v15,byteswap)
+
+ addi r4,r4,8*16
+
+ vxor v16,v0,v8
+ vxor v17,v1,v9
+ vxor v18,v2,v10
+ vxor v19,v3,v11
+ vxor v20,v4,v12
+ vxor v21,v5,v13
+ vxor v22,v6,v14
+ vxor v23,v7,v15
+
+ li r0,1
+ cmpdi r6,0
+ addi r6,r6,128
+ bne 1b
+
+ /* Work out how many bytes we have left */
+ andi. r5,r5,127
+
+ /* Calculate where in the constant table we need to start */
+ subfic r6,r5,128
+ add r3,r3,r6
+
+ /* How many 16 byte chunks are in the tail */
+ srdi r7,r5,4
+ mtctr r7
+
+ /*
+ * Reduce the previously calculated 1024 bits to 64 bits, shifting
+ * 32 bits to include the trailing 32 bits of zeros
+ */
+ lvx v0,0,r3
+ lvx v1,off16,r3
+ lvx v2,off32,r3
+ lvx v3,off48,r3
+ lvx v4,off64,r3
+ lvx v5,off80,r3
+ lvx v6,off96,r3
+ lvx v7,off112,r3
+ addi r3,r3,8*16
+
+ VPMSUMW(v0,v16,v0)
+ VPMSUMW(v1,v17,v1)
+ VPMSUMW(v2,v18,v2)
+ VPMSUMW(v3,v19,v3)
+ VPMSUMW(v4,v20,v4)
+ VPMSUMW(v5,v21,v5)
+ VPMSUMW(v6,v22,v6)
+ VPMSUMW(v7,v23,v7)
+
+ /* Now reduce the tail (0 - 112 bytes) */
+ cmpdi r7,0
+ beq 1f
+
+ lvx v16,0,r4
+ lvx v17,0,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off16,r4
+ lvx v17,off16,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off32,r4
+ lvx v17,off32,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off48,r4
+ lvx v17,off48,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off64,r4
+ lvx v17,off64,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off80,r4
+ lvx v17,off80,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+ bdz 1f
+
+ lvx v16,off96,r4
+ lvx v17,off96,r3
+ VPERM(v16,v16,v16,byteswap)
+ VPMSUMW(v16,v16,v17)
+ vxor v0,v0,v16
+
+ /* Now xor all the parallel chunks together */
+1: vxor v0,v0,v1
+ vxor v2,v2,v3
+ vxor v4,v4,v5
+ vxor v6,v6,v7
+
+ vxor v0,v0,v2
+ vxor v4,v4,v6
+
+ vxor v0,v0,v4
+
+.Lbarrett_reduction:
+ /* Barrett constants */
+ addis r3,r2,.barrett_constants@toc@ha
+ addi r3,r3,.barrett_constants@toc@l
+
+ lvx const1,0,r3
+ lvx const2,off16,r3
+
+ vsldoi v1,v0,v0,8
+ vxor v0,v0,v1 /* xor two 64 bit results together */
+
+#ifdef REFLECT
+ /* shift left one bit */
+ vspltisb v1,1
+ vsl v0,v0,v1
+#endif
+
+ vand v0,v0,mask_64bit
+
+#ifndef REFLECT
+ /*
+ * Now for the Barrett reduction algorithm. The idea is to calculate q,
+ * the multiple of our polynomial that we need to subtract. By
+ * doing the computation 2x bits higher (ie 64 bits) and shifting the
+ * result back down 2x bits, we round down to the nearest multiple.
+ */
+ VPMSUMD(v1,v0,const1) /* ma */
+ vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
+ VPMSUMD(v1,v1,const2) /* qn */
+ vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
+
+ /*
+ * Get the result into r3. We need to shift it left 8 bytes:
+ * V0 [ 0 1 2 X ]
+ * V0 [ 0 X 2 3 ]
+ */
+ vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
+#else
+ /*
+ * The reflected version of Barrett reduction. Instead of bit
+ * reflecting our data (which is expensive to do), we bit reflect our
+ * constants and our algorithm, which means the intermediate data in
+ * our vector registers goes from 0-63 instead of 63-0. We can reflect
+ * the algorithm because we don't carry in mod 2 arithmetic.
+ */
+ vand v1,v0,mask_32bit /* bottom 32 bits of a */
+ VPMSUMD(v1,v1,const1) /* ma */
+ vand v1,v1,mask_32bit /* bottom 32bits of ma */
+ VPMSUMD(v1,v1,const2) /* qn */
+ vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
+
+ /*
+ * Since we are bit reflected, the result (ie the low 32 bits) is in
+ * the high 32 bits. We just need to shift it left 4 bytes
+ * V0 [ 0 1 X 3 ]
+ * V0 [ 0 X 2 3 ]
+ */
+ vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
+#endif
+
+ /* Get it into r3 */
+ MFVRD(r3, v0)
+
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ ld r26,-48(r1)
+ ld r25,-56(r1)
+
+ blr
+
+.Lfirst_warm_up_done:
+ lvx const1,0,r3
+ addi r3,r3,16
+
+ VPMSUMD(v8,v16,const1)
+ VPMSUMD(v9,v17,const1)
+ VPMSUMD(v10,v18,const1)
+ VPMSUMD(v11,v19,const1)
+ VPMSUMD(v12,v20,const1)
+ VPMSUMD(v13,v21,const1)
+ VPMSUMD(v14,v22,const1)
+ VPMSUMD(v15,v23,const1)
+
+ b .Lsecond_cool_down
+
+.Lshort:
+ cmpdi r5,0
+ beq .Lzero
+
+ addis r3,r2,.short_constants@toc@ha
+ addi r3,r3,.short_constants@toc@l
+
+ /* Calculate where in the constant table we need to start */
+ subfic r6,r5,256
+ add r3,r3,r6
+
+ /* How many 16 byte chunks? */
+ srdi r7,r5,4
+ mtctr r7
+
+ vxor v19,v19,v19
+ vxor v20,v20,v20
+
+ lvx v0,0,r4
+ lvx v16,0,r3
+ VPERM(v0,v0,v16,byteswap)
+ vxor v0,v0,v8 /* xor in initial value */
+ VPMSUMW(v0,v0,v16)
+ bdz .Lv0
+
+ lvx v1,off16,r4
+ lvx v17,off16,r3
+ VPERM(v1,v1,v17,byteswap)
+ VPMSUMW(v1,v1,v17)
+ bdz .Lv1
+
+ lvx v2,off32,r4
+ lvx v16,off32,r3
+ VPERM(v2,v2,v16,byteswap)
+ VPMSUMW(v2,v2,v16)
+ bdz .Lv2
+
+ lvx v3,off48,r4
+ lvx v17,off48,r3
+ VPERM(v3,v3,v17,byteswap)
+ VPMSUMW(v3,v3,v17)
+ bdz .Lv3
+
+ lvx v4,off64,r4
+ lvx v16,off64,r3
+ VPERM(v4,v4,v16,byteswap)
+ VPMSUMW(v4,v4,v16)
+ bdz .Lv4
+
+ lvx v5,off80,r4
+ lvx v17,off80,r3
+ VPERM(v5,v5,v17,byteswap)
+ VPMSUMW(v5,v5,v17)
+ bdz .Lv5
+
+ lvx v6,off96,r4
+ lvx v16,off96,r3
+ VPERM(v6,v6,v16,byteswap)
+ VPMSUMW(v6,v6,v16)
+ bdz .Lv6
+
+ lvx v7,off112,r4
+ lvx v17,off112,r3
+ VPERM(v7,v7,v17,byteswap)
+ VPMSUMW(v7,v7,v17)
+ bdz .Lv7
+
+ addi r3,r3,128
+ addi r4,r4,128
+
+ lvx v8,0,r4
+ lvx v16,0,r3
+ VPERM(v8,v8,v16,byteswap)
+ VPMSUMW(v8,v8,v16)
+ bdz .Lv8
+
+ lvx v9,off16,r4
+ lvx v17,off16,r3
+ VPERM(v9,v9,v17,byteswap)
+ VPMSUMW(v9,v9,v17)
+ bdz .Lv9
+
+ lvx v10,off32,r4
+ lvx v16,off32,r3
+ VPERM(v10,v10,v16,byteswap)
+ VPMSUMW(v10,v10,v16)
+ bdz .Lv10
+
+ lvx v11,off48,r4
+ lvx v17,off48,r3
+ VPERM(v11,v11,v17,byteswap)
+ VPMSUMW(v11,v11,v17)
+ bdz .Lv11
+
+ lvx v12,off64,r4
+ lvx v16,off64,r3
+ VPERM(v12,v12,v16,byteswap)
+ VPMSUMW(v12,v12,v16)
+ bdz .Lv12
+
+ lvx v13,off80,r4
+ lvx v17,off80,r3
+ VPERM(v13,v13,v17,byteswap)
+ VPMSUMW(v13,v13,v17)
+ bdz .Lv13
+
+ lvx v14,off96,r4
+ lvx v16,off96,r3
+ VPERM(v14,v14,v16,byteswap)
+ VPMSUMW(v14,v14,v16)
+ bdz .Lv14
+
+ lvx v15,off112,r4
+ lvx v17,off112,r3
+ VPERM(v15,v15,v17,byteswap)
+ VPMSUMW(v15,v15,v17)
+
+.Lv15: vxor v19,v19,v15
+.Lv14: vxor v20,v20,v14
+.Lv13: vxor v19,v19,v13
+.Lv12: vxor v20,v20,v12
+.Lv11: vxor v19,v19,v11
+.Lv10: vxor v20,v20,v10
+.Lv9: vxor v19,v19,v9
+.Lv8: vxor v20,v20,v8
+.Lv7: vxor v19,v19,v7
+.Lv6: vxor v20,v20,v6
+.Lv5: vxor v19,v19,v5
+.Lv4: vxor v20,v20,v4
+.Lv3: vxor v19,v19,v3
+.Lv2: vxor v20,v20,v2
+.Lv1: vxor v19,v19,v1
+.Lv0: vxor v20,v20,v0
+
+ vxor v0,v19,v20
+
+ b .Lbarrett_reduction
+
+.Lzero:
+ mr r3,r10
+ blr
+FUNC_END(__crc32_vpmsum)
+#endif
diff --git a/src/third_party/wiredtiger/src/support/power8/crc32_constants.h b/src/third_party/wiredtiger/src/support/power8/crc32_constants.h
new file mode 100644
index 00000000000..02c471d1c56
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/crc32_constants.h
@@ -0,0 +1,901 @@
+#define CRC 0x1edc6f41
+#define CRC_XOR
+#define REFLECT
+
+#ifndef __ASSEMBLY__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+ 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
+ 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+ 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
+ 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
+ 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+ 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
+ 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
+ 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+ 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
+ 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
+ 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+ 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
+ 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
+ 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+ 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
+ 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
+ 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+ 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
+ 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
+ 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+ 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
+ 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
+ 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+ 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
+ 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
+ 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+ 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
+ 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
+ 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+ 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
+ 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
+ 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+ 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
+
+#endif
+#else
+#define MAX_SIZE 32768
+.constants:
+
+ /* Reduce 262144 kbits to 1024 bits */
+ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+ .octa 0x00000000b6ca9e20000000009c37c408
+
+ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+ .octa 0x00000000350249a800000001b51df26c
+
+ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+ .octa 0x00000001862dac54000000000724b9d0
+
+ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+ .octa 0x00000001d87fb48c00000001c00532fe
+
+ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+ .octa 0x00000001f39b699e00000000f05a9362
+
+ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+ .octa 0x0000000101da11b400000001e1007970
+
+ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+ .octa 0x00000001cab571e000000000a57366ee
+
+ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+ .octa 0x00000000c7020cfe0000000192011284
+
+ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+ .octa 0x00000000cdaed1ae0000000162716d9a
+
+ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+ .octa 0x00000001e804effc00000000cd97ecde
+
+ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+ .octa 0x0000000077c3ea3a0000000058812bc0
+
+ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+ .octa 0x0000000068df31b40000000088b8c12e
+
+ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+ .octa 0x00000000b059b6c200000001230b234c
+
+ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+ .octa 0x0000000145fb8ed800000001120b416e
+
+ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+ .octa 0x00000000cbc0916800000001974aecb0
+
+ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+ .octa 0x000000005ceeedc2000000008ee3f226
+
+ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+ .octa 0x0000000047d74e8600000001089aba9a
+
+ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+ .octa 0x00000001407e9e220000000065113872
+
+ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+ .octa 0x00000001da967bda000000005c07ec10
+
+ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+ .octa 0x000000006c8983680000000187590924
+
+ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+ .octa 0x00000000f2d14c9800000000e35da7c6
+
+ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+ .octa 0x00000001993c6ad4000000000415855a
+
+ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+ .octa 0x000000014683d1ac0000000073617758
+
+ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+ .octa 0x00000001a7c93e6c0000000176021d28
+
+ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+ .octa 0x000000010211e90a00000001c358fd0a
+
+ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+ .octa 0x000000001119403e00000001ff7a2c18
+
+ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+ .octa 0x000000001c3261aa00000000f2d9f7e4
+
+ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+ .octa 0x000000014e37a634000000016cf1f9c8
+
+ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+ .octa 0x0000000073786c0c000000010af9279a
+
+ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+ .octa 0x000000011dc037f80000000004f101e8
+
+ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+ .octa 0x0000000031433dfc0000000070bcf184
+
+ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+ .octa 0x000000009cde8348000000000a8de642
+
+ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+ .octa 0x0000000038d3c2a60000000062ea130c
+
+ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+ .octa 0x000000011b25f26000000001eb31cbb2
+
+ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+ .octa 0x000000001629e6f00000000170783448
+
+ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+ .octa 0x0000000160838b4c00000001a684b4c6
+
+ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+ .octa 0x000000007a44011c00000000253ca5b4
+
+ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+ .octa 0x00000000226f417a0000000057b4b1e2
+
+ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+ .octa 0x0000000045eb2eb400000000b6bd084c
+
+ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+ .octa 0x000000014459d70c0000000123c2d592
+
+ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+ .octa 0x00000001d406ed8200000000159dafce
+
+ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+ .octa 0x0000000160c8e1a80000000127e1a64e
+
+ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+ .octa 0x0000000027ba80980000000056860754
+
+ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+ .octa 0x000000006d92d01800000001e661aae8
+
+ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+ .octa 0x000000012ed7e3f200000000f82c6166
+
+ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+ .octa 0x000000002dc8778800000000c4f9c7ae
+
+ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+ .octa 0x0000000018240bb80000000074203d20
+
+ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+ .octa 0x000000001ad381580000000198173052
+
+ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+ .octa 0x00000001396b78f200000001ce8aba54
+
+ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+ .octa 0x000000011a68133400000001850d5d94
+
+ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+ .octa 0x000000012104732e00000001d609239c
+
+ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+ .octa 0x00000000a140d90c000000001595f048
+
+ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+ .octa 0x00000001b7215eda0000000042ccee08
+
+ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+ .octa 0x00000001aaf1df3c000000010a389d74
+
+ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+ .octa 0x0000000029d15b8a000000012a840da6
+
+ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+ .octa 0x00000000f1a96922000000001d181c0c
+
+ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+ .octa 0x00000001ac80d03c0000000068b7d1f6
+
+ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+ .octa 0x000000000f11d56a000000005b0f14fc
+
+ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+ .octa 0x00000001f1c022a20000000179e9e730
+
+ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+ .octa 0x0000000173d00ae200000001ce1368d6
+
+ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+ .octa 0x00000001d4ffe4ac0000000112c3a84c
+
+ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+ .octa 0x000000016edc5ae400000000de940fee
+
+ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+ .octa 0x00000001f1a0214000000000fe896b7e
+
+ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+ .octa 0x00000000ca0b28a000000001f797431c
+
+ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+ .octa 0x00000001928e30a20000000053e989ba
+
+ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+ .octa 0x0000000097b1b002000000003920cd16
+
+ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+ .octa 0x00000000b15bf90600000001e6f579b8
+
+ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+ .octa 0x00000000411c5d52000000007493cb0a
+
+ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+ .octa 0x00000001c36f330000000001bdd376d8
+
+ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+ .octa 0x00000001119227e0000000016badfee6
+
+ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+ .octa 0x00000000114d47020000000071de5c58
+
+ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+ .octa 0x00000000458b5b9800000000453f317c
+
+ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+ .octa 0x000000012e31fb8e0000000121675cce
+
+ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+ .octa 0x000000005cf619d800000001f409ee92
+
+ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+ .octa 0x0000000063f4d8b200000000f36b9c88
+
+ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+ .octa 0x000000004138dc8a0000000036b398f4
+
+ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+ .octa 0x00000001d29ee8e000000001748f9adc
+
+ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+ .octa 0x000000006a08ace800000001be94ec00
+
+ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+ .octa 0x0000000127d4201000000000b74370d6
+
+ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+ .octa 0x0000000019d76b6200000001174d0b98
+
+ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+ .octa 0x00000001b1471f6e00000000befc06a4
+
+ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+ .octa 0x00000001f64c19cc00000001ae125288
+
+ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+ .octa 0x00000000003c0ea00000000095c19b34
+
+ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+ .octa 0x000000014d73abf600000001a78496f2
+
+ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+ .octa 0x00000001620eb84400000001ac5390a0
+
+ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+ .octa 0x0000000147655048000000002a80ed6e
+
+ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+ .octa 0x0000000067b5077e00000001fa9b0128
+
+ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+ .octa 0x0000000010ffe20600000001ea94929e
+
+ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+ .octa 0x000000000fee8f1e0000000125f4305c
+
+ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+ .octa 0x00000001da26fbae00000001471e2002
+
+ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+ .octa 0x00000001b3a8bd880000000132d2253a
+
+ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+ .octa 0x00000000e8f3898e00000000f26b3592
+
+ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+ .octa 0x00000000b0d0d28c00000000bc8b67b0
+
+ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+ .octa 0x0000000030f2a798000000013a826ef2
+
+ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+ .octa 0x000000000fba10020000000081482c84
+
+ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+ .octa 0x00000000bdb9bd7200000000e77307c2
+
+ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+ .octa 0x0000000075d3bf5a00000000d4a07ec8
+
+ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+ .octa 0x00000000ef1f98a00000000017102100
+
+ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+ .octa 0x00000000689c760200000000db406486
+
+ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+ .octa 0x000000016d5fa5fe0000000192db7f88
+
+ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+ .octa 0x00000001d0d2b9ca000000018bf67b1e
+
+ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+ .octa 0x0000000041e7b470000000007c09163e
+
+ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+ .octa 0x00000001cbb6495e000000000adac060
+
+ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+ .octa 0x000000010052a0b000000000bd8316ae
+
+ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+ .octa 0x00000001d8effb5c000000019f09ab54
+
+ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+ .octa 0x00000001d969853c0000000125155542
+
+ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+ .octa 0x00000000523ccce2000000018fdb5882
+
+ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+ .octa 0x000000001e2436bc00000000e794b3f4
+
+ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+ .octa 0x00000000ddd1c3a2000000016f9bb022
+
+ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+ .octa 0x0000000019fcfe3800000000290c9978
+
+ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+ .octa 0x00000001ce95db640000000083c0f350
+
+ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+ .octa 0x00000000af5828060000000173ea6628
+
+ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+ .octa 0x00000001006388f600000001c8b4e00a
+
+ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+ .octa 0x0000000179eca00a00000000de95d6aa
+
+ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+ .octa 0x0000000122410a6a000000010b7f7248
+
+ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+ .octa 0x000000004288e87c00000001326e3a06
+
+ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+ .octa 0x000000016c5490da00000000bb62c2e6
+
+ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+ .octa 0x00000000d1c71f6e0000000156a4b2c2
+
+ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+ .octa 0x00000001b4ce08a6000000011dfe763a
+
+ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+ .octa 0x00000001466ba60c000000007bcca8e2
+
+ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+ .octa 0x00000001f6c488a40000000186118faa
+
+ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+ .octa 0x000000013bfb06820000000111a65a88
+
+ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+ .octa 0x00000000690e9e54000000003565e1c4
+
+ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+ .octa 0x00000000281346b6000000012ed02a82
+
+ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+ .octa 0x000000015646402400000000c486ecfc
+
+ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+ .octa 0x000000016063a8dc0000000001b951b2
+
+ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+ .octa 0x0000000116a663620000000048143916
+
+ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+ .octa 0x000000017e8aa4d200000001dc2ae124
+
+ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+ .octa 0x00000001728eb10c00000001416c58d6
+
+ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+ .octa 0x00000001b08fd7fa00000000a479744a
+
+ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+ .octa 0x00000001092a16e80000000096ca3a26
+
+ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+ .octa 0x00000000a505637c00000000ff223d4e
+
+ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+ .octa 0x00000000d94869b2000000010e84da42
+
+ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+ .octa 0x00000001c8b203ae00000001b61ba3d0
+
+ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+ .octa 0x000000005704aea000000000680f2de8
+
+ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+ .octa 0x000000012e295fa2000000008772a9a8
+
+ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+ .octa 0x000000011d0908bc0000000155f295bc
+
+ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+ .octa 0x0000000193ed97ea00000000595f9282
+
+ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+ .octa 0x000000013a0f1c520000000164b1c25a
+
+ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+ .octa 0x000000010c2c40c000000000fbd67c50
+
+ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+ .octa 0x00000000ff6fac3e0000000096076268
+
+ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+ .octa 0x000000017b3609c000000001d288e4cc
+
+ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+ .octa 0x0000000088c8c92200000001eaac1bdc
+
+ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+ .octa 0x00000001751baae600000001f1ea39e2
+
+ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+ .octa 0x000000010795297200000001eb6506fc
+
+ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+ .octa 0x0000000162b00abe000000010f806ffe
+
+ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+ .octa 0x000000000d7b404c000000010408481e
+
+ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+ .octa 0x00000000763b13d40000000188260534
+
+ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+ .octa 0x00000000f6dc22d80000000058fc73e0
+
+ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+ .octa 0x000000007daae06000000000391c59b8
+
+ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+ .octa 0x000000013359ab7c000000018b638400
+
+ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+ .octa 0x000000008add438a000000011738f5c4
+
+ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+ .octa 0x00000001edbefdea000000008cf7c6da
+
+ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+ .octa 0x000000004104e0f800000001ef97fb16
+
+ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+ .octa 0x00000000b48a82220000000102130e20
+
+ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+ .octa 0x00000001bcb4684400000000db968898
+
+ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+ .octa 0x000000013293ce0a00000000b5047b5e
+
+ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+ .octa 0x00000001710d0844000000010b90fdb2
+
+ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+ .octa 0x0000000117907f6e000000004834a32e
+
+ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+ .octa 0x0000000087ddf93e0000000059c8f2b0
+
+ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+ .octa 0x000000005970e9b00000000122cec508
+
+ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+ .octa 0x0000000185b2b7d0000000000a330cda
+
+ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+ .octa 0x00000001dcee0efc000000014a47148c
+
+ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+ .octa 0x0000000030da27220000000042c61cb8
+
+ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+ .octa 0x000000012f925a180000000012fe6960
+
+ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+ .octa 0x00000000dd2e357c00000000dbda2c20
+
+ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+ .octa 0x00000000071c80de000000011122410c
+
+ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+ .octa 0x000000011513140a00000000977b2070
+
+ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+ .octa 0x00000001df876e8e000000014050438e
+
+ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+ .octa 0x000000015f81d6ce0000000147c840e8
+
+ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+ .octa 0x000000019dd94dbe00000001cc7c88ce
+
+ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+ .octa 0x00000001373d206e00000001476b35a4
+
+ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+ .octa 0x00000000668ccade000000013d52d508
+
+ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+ .octa 0x00000001b192d268000000008e4be32e
+
+ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+ .octa 0x00000000e30f3a7800000000024120fe
+
+ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+ .octa 0x000000010ef1f7bc00000000ddecddb4
+
+ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+ .octa 0x00000001f5ac738000000000d4d403bc
+
+ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+ .octa 0x000000011822ea7000000001734b89aa
+
+ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+ .octa 0x00000000c3a33848000000010e7a58d6
+
+ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+ .octa 0x00000001bd151c2400000001f9f04e9c
+
+ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+ .octa 0x0000000056002d7600000000b692225e
+
+ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+ .octa 0x000000014657c4f4000000019b8d3f3e
+
+ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+ .octa 0x0000000113742d7c00000001a874f11e
+
+ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+ .octa 0x000000019c5920ba000000010d5a4254
+
+ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+ .octa 0x000000005216d2d600000000bbb2f5d6
+
+ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+ .octa 0x0000000136f5ad8a0000000179cc0e36
+
+ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+ .octa 0x000000018b07beb600000001dca1da4a
+
+ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+ .octa 0x00000000db1e93b000000000feb1a192
+
+ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+ .octa 0x000000000b96fa3a00000000d1eeedd6
+
+ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+ .octa 0x00000001d9968af0000000008fad9bb4
+
+ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+ .octa 0x000000000e4a77a200000001884938e4
+
+ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+ .octa 0x00000000508c2ac800000001bc2e9bc0
+
+ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+ .octa 0x0000000021572a8000000001f9658a68
+
+ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+ .octa 0x00000001b859daf2000000001b9224fc
+
+ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+ .octa 0x000000016f7884740000000055b2fb84
+
+ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+ .octa 0x00000001b438810e000000018b090348
+
+ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+ .octa 0x0000000095ddc6f2000000011ccbd5ea
+
+ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+ .octa 0x00000001d977c20c0000000007ae47f8
+
+ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+ .octa 0x00000000ebedb99a0000000172acbec0
+
+ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+ .octa 0x00000001df9e9e9200000001c6e3ff20
+
+ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+ .octa 0x00000001a4a3f95200000000e1b38744
+
+ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+ .octa 0x00000000e2f5122000000000791585b2
+
+ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+ .octa 0x000000004aa01f3e00000000ac53b894
+
+ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+ .octa 0x00000000b3e90a5800000001ed5f2cf4
+
+ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+ .octa 0x000000000c9ca2aa00000001df48b2e0
+
+ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+ .octa 0x000000015168231600000000049c1c62
+
+ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+ .octa 0x0000000036fce78c000000017c460c12
+
+ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+ .octa 0x000000009037dc10000000015be4da7e
+
+ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+ .octa 0x00000000d3298582000000010f38f668
+
+ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+ .octa 0x00000001b42e8ad60000000039f40a00
+
+ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+ .octa 0x00000000142a983800000000bd4c10c4
+
+ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+ .octa 0x0000000109c7f1900000000042db1d98
+
+ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+ .octa 0x0000000056ff931000000001c905bae6
+
+ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+ .octa 0x00000001594513aa00000000069d40ea
+
+ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+ .octa 0x00000001e3b5b1e8000000008e4fbad0
+
+ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+ .octa 0x000000011dd5fc080000000047bedd46
+
+ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+ .octa 0x00000001675f0cc20000000026396bf8
+
+ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+ .octa 0x00000000d1c8dd4400000000379beb92
+
+ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+ .octa 0x0000000115ebd3d8000000000abae54a
+
+ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+ .octa 0x00000001ecbd0dac0000000007e6a128
+
+ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+ .octa 0x00000000cdf67af2000000000ade29d2
+
+ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+ .octa 0x000000004c01ff4c00000000f974c45c
+
+ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+ .octa 0x00000000f2d8657e00000000e77ac60a
+
+ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+ .octa 0x000000006bae74c40000000145895816
+
+ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+ .octa 0x0000000152af8aa00000000038e362be
+
+ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+ .octa 0x0000000004663802000000007f991a64
+
+ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+ .octa 0x00000001ab2f5afc00000000fa366d3a
+
+ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+ .octa 0x0000000074a4ebd400000001a2bb34f0
+
+ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+ .octa 0x00000001d7ab3a4c0000000028a9981e
+
+ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+ .octa 0x00000001a8da60c600000001dbc672be
+
+ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+ .octa 0x000000013cf6382000000000b04d77f6
+
+ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+ .octa 0x00000000bec12e1e0000000124400d96
+
+ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+ .octa 0x00000001c6368010000000014ca4b414
+
+ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+ .octa 0x00000001e6e78758000000012fe2c938
+
+ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+ .octa 0x000000008d7f2b3c00000001faed01e6
+
+ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+ .octa 0x000000016b4a156e000000007e80ecfe
+
+ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+ .octa 0x00000001c63cfeb60000000098daee94
+
+ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+ .octa 0x000000015f902670000000010a04edea
+
+ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+ .octa 0x00000001cd5de11e00000001c00b4524
+
+ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+ .octa 0x000000001acaec540000000170296550
+
+ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+ .octa 0x000000002bd0ca780000000181afaa48
+
+ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+ .octa 0x0000000032d63d5c0000000185a31ffa
+
+ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+ .octa 0x000000001c6d4e4c000000002469f608
+
+ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+ .octa 0x0000000106a60b92000000006980102a
+
+ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+ .octa 0x00000000d3855e120000000111ea9ca8
+
+ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+ .octa 0x00000000e312563600000001bd1d29ce
+
+ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+ .octa 0x000000009e8f7ea400000001b34b9580
+
+ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+ .octa 0x00000001c82e562c000000003076054e
+
+ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+ .octa 0x00000000ca9f09ce000000012a608ea4
+
+ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+ .octa 0x00000000c63764e600000000784d05fe
+
+ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+ .octa 0x0000000168d2e49e000000016ef0d82a
+
+ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+ .octa 0x00000000e986c1480000000075bda454
+
+ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+ .octa 0x00000000cfb65894000000003dc0a1c4
+
+ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+ .octa 0x0000000111cadee400000000e9a5d8be
+
+ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+ .octa 0x0000000171fb63ce00000001609bc4b4
+
+.short_constants:
+
+ /* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+ /* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
+ .octa 0x7fec2963e5bf80485cf015c388e56f72
+
+ /* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
+ .octa 0x38e888d4844752a9963a18920246e2e6
+
+ /* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
+ .octa 0x42316c00730206ad419a441956993a31
+
+ /* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
+ .octa 0x543d5c543e65ddf9924752ba2b830011
+
+ /* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
+ .octa 0x78e87aaf56767c9255bd7f9518e4a304
+
+ /* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
+ .octa 0x8f68fcec1903da7f6d76739fe0553f1e
+
+ /* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
+ .octa 0x3f4840246791d588c133722b1fe0b5c3
+
+ /* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
+ .octa 0x34c96751b04de25a64b67ee0e55ef1f3
+
+ /* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
+ .octa 0x156c8e180b4a395b069db049b8fdb1e7
+
+ /* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
+ .octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
+
+ /* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
+ .octa 0x041d37768cd75659817cdc5119b29a35
+
+ /* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
+ .octa 0x3a0777818cfaa9651ce9d94b36c41f1c
+
+ /* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
+ .octa 0x0e148e8252377a554f256efcb82be955
+
+ /* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
+ .octa 0x9c25531d19e65ddeec1631edb2dea967
+
+ /* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
+ .octa 0x790606ff9957c0a65d27e147510ac59a
+
+ /* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
+ .octa 0x82f63b786ea2d55ca66805eb18b8ea18
+
+
+.barrett_constants:
+ /* 33 bit reflected Barrett constant m - (4^32)/n */
+ .octa 0x000000000000000000000000dea713f1 /* x^64 div p(x)` */
+ /* 33 bit reflected Barrett constant n */
+ .octa 0x00000000000000000000000105ec76f1
+#endif
diff --git a/src/third_party/wiredtiger/src/support/power8/crc32_wrapper.c b/src/third_party/wiredtiger/src/support/power8/crc32_wrapper.c
new file mode 100644
index 00000000000..34ac4150338
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/crc32_wrapper.c
@@ -0,0 +1,66 @@
+#if defined(__powerpc64__)
+#define CRC_TABLE
+#include "crc32_constants.h"
+
+#define VMX_ALIGN 16
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
+
+#ifdef REFLECT
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
+ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
+ return crc;
+}
+#else
+static unsigned int crc32_align(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ while (len--)
+ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
+ return crc;
+}
+#endif
+
+unsigned int __crc32_vpmsum(unsigned int crc, unsigned char *p,
+ unsigned long len);
+
+unsigned int crc32_vpmsum(unsigned int crc, unsigned char *p,
+ unsigned long len)
+{
+ unsigned int prealign;
+ unsigned int tail;
+
+#ifdef CRC_XOR
+ crc ^= 0xffffffff;
+#endif
+
+ if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
+ crc = crc32_align(crc, p, len);
+ goto out;
+ }
+
+ if ((unsigned long)p & VMX_ALIGN_MASK) {
+ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+ crc = crc32_align(crc, p, prealign);
+ len -= prealign;
+ p += prealign;
+ }
+
+ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+
+ tail = len & VMX_ALIGN_MASK;
+ if (tail) {
+ p += len & ~VMX_ALIGN_MASK;
+ crc = crc32_align(crc, p, tail);
+ }
+
+out:
+#ifdef CRC_XOR
+ crc ^= 0xffffffff;
+#endif
+
+ return crc;
+}
+#endif
diff --git a/src/third_party/wiredtiger/src/support/power8/ppc-opcode.h b/src/third_party/wiredtiger/src/support/power8/ppc-opcode.h
new file mode 100644
index 00000000000..b63feea60a0
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/power8/ppc-opcode.h
@@ -0,0 +1,23 @@
+#ifndef __OPCODES_H
+#define __OPCODES_H
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RB(b) (((b) & 0x1f) << 11)
+#define __PPC_XA(a) ((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
+#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
+#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
+#define __PPC_XT(s) __PPC_XS(s)
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+
+#define PPC_INST_VPMSUMW 0x10000488
+#define PPC_INST_VPMSUMD 0x100004c8
+#define PPC_INST_MFVSRD 0x7c000066
+#define PPC_INST_MTVSRD 0x7c000166
+
+#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
+#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
+#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t)+32, a, 0)
+#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t)+32, a, 0)
+
+#endif
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 6a2c1eef826..85102ae8cfe 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -179,14 +179,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
/* If we have already locked the handles, apply the operation. */
for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i].dhandle != NULL)
- WT_WITH_DHANDLE(session,
- session->ckpt_handle[i].dhandle,
- ret = (*op)(session, cfg));
- else
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_conn_btree_apply_single(session,
- session->ckpt_handle[i].name, NULL, op, cfg));
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i],
+ ret = (*op)(session, cfg));
WT_RET(ret);
}
@@ -257,15 +251,11 @@ __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[])
name = session->dhandle->name;
session->dhandle = NULL;
- /* Record busy file names, we'll deal with them in the checkpoint. */
- if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) == 0)
- session->ckpt_handle[session->ckpt_handle_next++].dhandle =
- session->dhandle;
- else if (ret == EBUSY)
- ret = __wt_strdup(session, name,
- &session->ckpt_handle[session->ckpt_handle_next++].name);
+ if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0)
+ return (ret == EBUSY ? 0 : ret);
- return (ret);
+ session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
+ return (0);
}
/*
@@ -421,10 +411,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
if (F_ISSET(conn, WT_CONN_CKPT_SYNC))
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- /* Acquire the schema lock. */
- F_SET(session, WT_SESSION_LOCKED_SCHEMA);
- __wt_spin_lock(session, &conn->schema_lock);
-
+ /* Start the checkpoint for real. */
WT_ERR(__wt_meta_track_on(session));
tracking = true;
@@ -543,16 +530,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Recovery relies on the checkpoint LSN in the metadata only being
* updated by full checkpoints so only checkpoint the metadata for
* full or non-logged checkpoints.
+ *
+ * This is very similar to __wt_meta_track_off, ideally they would be
+ * merged.
*/
if (full || !logging) {
session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
/* Disable metadata tracking during the metadata checkpoint. */
saved_meta_next = session->meta_track_next;
session->meta_track_next = NULL;
+ WT_WITH_METADATA_LOCK(session, ret,
+ WT_WITH_DHANDLE(session,
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint(session, cfg)));
+ session->meta_track_next = saved_meta_next;
+ WT_ERR(ret);
+
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint(session, cfg));
- session->meta_track_next = saved_meta_next;
+ ret = __wt_checkpoint_sync(session, NULL));
WT_ERR(ret);
WT_ERR(__checkpoint_verbose_track(session,
@@ -610,23 +606,13 @@ err: /*
WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
}
- for (i = 0; i < session->ckpt_handle_next; ++i) {
- if (session->ckpt_handle[i].dhandle == NULL) {
- __wt_free(session, session->ckpt_handle[i].name);
- continue;
- }
- WT_WITH_DHANDLE(session, session->ckpt_handle[i].dhandle,
+ for (i = 0; i < session->ckpt_handle_next; ++i)
+ WT_WITH_DHANDLE(session, session->ckpt_handle[i],
WT_TRET(__wt_session_release_btree(session)));
- }
__wt_free(session, session->ckpt_handle);
session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
- if (F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
- F_CLR(session, WT_SESSION_LOCKED_SCHEMA);
- __wt_spin_unlock(session, &conn->schema_lock);
- }
-
session->isolation = txn->isolation = saved_isolation;
return (ret);
}
@@ -1189,7 +1175,8 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
/* Should be holding the schema lock. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, !WT_IS_METADATA(session, session->dhandle) ||
+ F_ISSET(session, WT_SESSION_LOCKED_METADATA));
return (__checkpoint_worker(session, cfg, true, true));
}
@@ -1253,17 +1240,9 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
}
/*
- * We should already have the schema lock unless we're finishing a bulk
- * load -- the only other paths to closing files (sweep and LSM) have
- * already checked for read-only trees.
- */
- WT_ASSERT(session,
- final || bulk || F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
-
- /*
* Turn on metadata tracking if:
* - The session is not already doing metadata tracking.
- * - The file was bulk loaded.
+ * - The file was not bulk loaded.
* - The close is not during connection close.
*/
need_tracking = !WT_META_TRACKING(session) && !bulk && !final;
diff --git a/src/third_party/wiredtiger/test/cursor_order/Makefile.am b/src/third_party/wiredtiger/test/cursor_order/Makefile.am
new file mode 100644
index 00000000000..c0c0ed639bf
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cursor_order/Makefile.am
@@ -0,0 +1,13 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
+ -I$(top_srcdir)/test/utility
+
+noinst_PROGRAMS = cursor_order
+cursor_order_LDADD = $(top_builddir)/libwiredtiger.la
+
+cursor_order_SOURCES = cursor_order_file.c cursor_order_ops.c cursor_order.c
+cursor_order_LDFLAGS = -static
+
+TESTS = $(noinst_PROGRAMS)
+
+clean-local:
+ rm -rf WiredTiger* wt.* *.core __stats
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
new file mode 100644
index 00000000000..14709a2e88e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
@@ -0,0 +1,303 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static char home[512]; /* Program working dir */
+static char *progname; /* Program name */
+static FILE *logfp; /* Log file */
+
+static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+static void onint(int);
+static void shutdown(void);
+static int usage(void);
+static void wt_connect(SHARED_CONFIG *, char *);
+static void wt_shutdown(SHARED_CONFIG *);
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+ SHARED_CONFIG _cfg, *cfg;
+ int ch, cnt, runs;
+ char *config_open, *working_dir;
+
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ cfg = &_cfg;
+ config_open = NULL;
+ working_dir = NULL;
+ runs = 1;
+
+ /*
+ * Explicitly initialize the shared configuration object before
+ * parsing command line options.
+ */
+ cfg->append_inserters = 1;
+ cfg->conn = NULL;
+ cfg->ftype = ROW;
+ cfg->max_nops = 1000000;
+ cfg->multiple_files = false;
+ cfg->nkeys = 1000;
+ cfg->reverse_scanners = 5;
+ cfg->reverse_scan_ops = 10;
+ cfg->thread_finish = false;
+ cfg->vary_nops = false;
+
+ while ((ch = __wt_getopt(
+ progname, argc, argv, "C:Fk:h:l:n:R:r:t:vw:W:")) != EOF)
+ switch (ch) {
+ case 'C': /* wiredtiger_open config */
+ config_open = __wt_optarg;
+ break;
+ case 'F': /* multiple files */
+ cfg->multiple_files = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'k': /* rows */
+ cfg->nkeys = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'l': /* log */
+ if ((logfp = fopen(__wt_optarg, "w")) == NULL) {
+ fprintf(stderr,
+ "%s: %s\n", __wt_optarg, strerror(errno));
+ return (EXIT_FAILURE);
+ }
+ break;
+ case 'n': /* operations */
+ cfg->max_nops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'R':
+ cfg->reverse_scanners = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'r': /* runs */
+ runs = atoi(__wt_optarg);
+ break;
+ case 't':
+ switch (__wt_optarg[0]) {
+ case 'f':
+ cfg->ftype = FIX;
+ break;
+ case 'r':
+ cfg->ftype = ROW;
+ break;
+ case 'v':
+ cfg->ftype = VAR;
+ break;
+ default:
+ return (usage());
+ }
+ break;
+ case 'v': /* vary operation count */
+ cfg->vary_nops = true;
+ break;
+ case 'w':
+ cfg->reverse_scan_ops = (uint64_t)atol(__wt_optarg);
+ break;
+ case 'W':
+ cfg->append_inserters = (uint64_t)atol(__wt_optarg);
+ break;
+ default:
+ return (usage());
+ }
+
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ return (usage());
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+
+ if (cfg->vary_nops && !cfg->multiple_files) {
+ fprintf(stderr,
+ "Variable op counts only supported with multiple tables\n");
+ return (usage());
+ }
+
+ /* Clean up on signal. */
+ (void)signal(SIGINT, onint);
+
+ printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) {
+ printf(" %d: %u reverse scanners, %u writers\n", cnt,
+ (int)cfg->reverse_scanners, (int)cfg->append_inserters);
+
+ shutdown(); /* Clean up previous runs */
+
+ wt_connect(cfg, config_open); /* WiredTiger connection */
+
+ if (ops_start(cfg))
+ return (EXIT_FAILURE);
+
+ wt_shutdown(cfg); /* WiredTiger shut down */
+ }
+ return (0);
+}
+
+/*
+ * wt_connect --
+ * Configure the WiredTiger connection.
+ */
+static void
+wt_connect(SHARED_CONFIG *cfg, char *config_open)
+{
+ static WT_EVENT_HANDLER event_handler = {
+ handle_error,
+ handle_message,
+ NULL,
+ NULL /* Close handler. */
+ };
+ int ret;
+ char config[512];
+ size_t print_count;
+
+ testutil_clean_work_dir(home);
+ testutil_make_work_dir(home);
+
+ print_count = (size_t)snprintf(config, sizeof(config),
+ "create,statistics=(all),error_prefix=\"%s\",%s%s",
+ progname,
+ config_open == NULL ? "" : ",",
+ config_open == NULL ? "" : config_open);
+
+ if (print_count >= sizeof(config))
+ testutil_die(EINVAL, "Config string too long");
+
+ if ((ret = wiredtiger_open(
+ home, &event_handler, config, &cfg->conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+}
+
+/*
+ * wt_shutdown --
+ * Flush the file to disk and shut down the WiredTiger connection.
+ */
+static void
+wt_shutdown(SHARED_CONFIG *cfg)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret = session->checkpoint(session, NULL)) != 0)
+ testutil_die(ret, "session.checkpoint");
+
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "conn.close");
+}
+
+/*
+ * shutdown --
+ * Clean up from previous runs.
+ */
+static void
+shutdown(void)
+{
+ testutil_clean_work_dir(home);
+}
+
+static int
+handle_error(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, int error, const char *errmsg)
+{
+ (void)(handler);
+ (void)(session);
+ (void)(error);
+
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+}
+
+static int
+handle_message(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, const char *message)
+{
+ (void)(handler);
+ (void)(session);
+
+ if (logfp != NULL)
+ return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0);
+
+ return (printf("%s\n", message) < 0 ? -1 : 0);
+}
+
+/*
+ * onint --
+ * Interrupt signal handler.
+ */
+static void
+onint(int signo)
+{
+ (void)(signo);
+
+ shutdown();
+
+ fprintf(stderr, "\n");
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * usage --
+ * Display usage statement and exit failure.
+ */
+static int
+usage(void)
+{
+ fprintf(stderr,
+ "usage: %s "
+ "[-FLv] [-C wiredtiger-config] [-k keys] [-l log]\n\t"
+ "[-n ops] [-R reverse_scanners] [-r runs] [-t f|r|v] "
+ "[-W append_inserters]\n",
+ progname);
+ fprintf(stderr, "%s",
+ "\t-C specify wiredtiger_open configuration arguments\n"
+ "\t-F create a file per thread\n"
+ "\t-k set number of keys to load\n"
+ "\t-L log print per operation\n"
+ "\t-l specify a log file\n"
+ "\t-n set number of operations each thread does\n"
+ "\t-R set number of reverse scanner threads\n"
+ "\t-r set number of runs (0 for continuous)\n"
+ "\t-t set a file type (fix | row | var)\n"
+ "\t-v do a different number of operations on different tables\n"
+ "\t-w set number of items to walk in a reverse scan\n"
+ "\t-W set number of threads doing append inserts\n");
+ return (EXIT_FAILURE);
+}
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.h b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h
new file mode 100644
index 00000000000..dd49fce124b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.h
@@ -0,0 +1,54 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <signal.h>
+
+#include "test_util.i"
+
+#define FNAME "file:cursor_order.%03d" /* File name */
+
+typedef enum { FIX, ROW, VAR } __ftype; /* File type */
+
+typedef struct {
+ uint64_t append_inserters; /* Number of append threads */
+ WT_CONNECTION *conn; /* WiredTiger connection */
+ __ftype ftype;
+ uint64_t key_range; /* Current key range */
+ uint64_t max_nops; /* Operations per thread */
+ bool multiple_files; /* File per thread */
+ uint64_t nkeys; /* Keys to load */
+ uint64_t reverse_scanners; /* Number of scan threads */
+ uint64_t reverse_scan_ops; /* Keys to visit per scan */
+ bool thread_finish; /* Signal to finish run. */
+ bool vary_nops; /* Operations per thread */
+
+} SHARED_CONFIG;
+
+void load(SHARED_CONFIG *, const char *);
+int ops_start(SHARED_CONFIG *);
+void verify(SHARED_CONFIG *, const char *);
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c
new file mode 100644
index 00000000000..e5dd76fa1a1
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c
@@ -0,0 +1,130 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static void
+file_create(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+ char *p, *end, config[128];
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ p = config;
+ end = config + sizeof(config);
+ p += snprintf(p, (size_t)(end - p),
+ "key_format=%s,"
+ "internal_page_max=%d,"
+ "split_deepen_min_child=200,"
+ "leaf_page_max=%d,",
+ cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024);
+ if (cfg->ftype == FIX)
+ (void)snprintf(p, (size_t)(end - p), ",value_format=3t");
+
+ if ((ret = session->create(session, name, config)) != 0)
+ if (ret != EEXIST)
+ testutil_die(ret, "session.create");
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
+
+void
+load(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM *value, _value;
+ WT_SESSION *session;
+ char keybuf[64], valuebuf[64];
+ int64_t keyno;
+ int ret;
+
+ conn = cfg->conn;
+
+ file_create(cfg, name);
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret =
+ session->open_cursor(session, name, NULL, "bulk", &cursor)) != 0)
+ testutil_die(ret, "cursor.open");
+
+ value = &_value;
+ for (keyno = 1; keyno <= (int64_t)cfg->nkeys; ++keyno) {
+ if (cfg->ftype == ROW) {
+ snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno);
+ cursor->set_key(cursor, &keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x01);
+ else {
+ value->size = (uint32_t)snprintf(
+ valuebuf, sizeof(valuebuf), "%37u", (u_int)keyno);
+ cursor->set_value(cursor, value);
+ }
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "cursor.insert");
+ }
+
+ /* Setup the starting key range for the workload phase. */
+ cfg->key_range = cfg->nkeys;
+ cursor->close(cursor);
+ session->checkpoint(session, NULL);
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
+
+void
+verify(SHARED_CONFIG *cfg, const char *name)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ int ret;
+
+ conn = cfg->conn;
+
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "conn.session");
+
+ if ((ret = session->verify(session, name, NULL)) != 0)
+ testutil_die(ret, "session.create");
+
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+}
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c
new file mode 100644
index 00000000000..9077f500594
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c
@@ -0,0 +1,364 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "cursor_order.h"
+
+static void *append_insert(void *);
+static void print_stats(SHARED_CONFIG *);
+static void *reverse_scan(void *);
+
+typedef struct {
+ char *name; /* object name */
+ uint64_t nops; /* Thread op count */
+
+ WT_RAND_STATE rnd; /* RNG */
+
+ int append_insert; /* cursor.insert */
+ int reverse_scans; /* cursor.prev sequences */
+ SHARED_CONFIG *cfg;
+} INFO;
+
+static INFO *run_info;
+
+int
+ops_start(SHARED_CONFIG *cfg)
+{
+ struct timeval start, stop;
+ double seconds;
+ pthread_t *tids;
+ uint64_t i, name_index, offset, total_nops;
+ int ret;
+ void *thread_ret;
+
+ tids = NULL; /* Keep GCC 4.1 happy. */
+ total_nops = 0;
+
+ /* Create per-thread structures. */
+ if ((run_info = calloc(
+ (size_t)(cfg->reverse_scanners + cfg->append_inserters),
+ sizeof(*run_info))) == NULL)
+ testutil_die(errno, "calloc");
+
+ if ((tids = calloc(
+ (size_t)(cfg->reverse_scanners + cfg->append_inserters),
+ sizeof(*tids))) == NULL)
+ testutil_die(errno, "calloc");
+
+ /* Create the files and load the initial records. */
+ for (i = 0; i < cfg->append_inserters; ++i) {
+ run_info[i].cfg = cfg;
+ if (i == 0 || cfg->multiple_files) {
+ if ((run_info[i].name = malloc(64)) == NULL)
+ testutil_die(errno, "malloc");
+ snprintf(run_info[i].name, 64, FNAME, (int)i);
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[i].nops =
+ WT_MAX(1000, cfg->max_nops >> i);
+ load(cfg, run_info[i].name);
+ } else
+ run_info[i].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[i].nops == 0)
+ run_info[i].nops = cfg->max_nops;
+ total_nops += run_info[i].nops;
+ }
+
+ /* Setup the reverse scanner configurations */
+ for (i = 0; i < cfg->reverse_scanners; ++i) {
+ offset = i + cfg->append_inserters;
+ run_info[offset].cfg = cfg;
+ if (cfg->multiple_files) {
+ if ((run_info[offset].name = malloc(64)) == NULL)
+ testutil_die(errno, "malloc");
+ /* Have reverse scans read from tables with writes. */
+ name_index = i % cfg->append_inserters;
+ snprintf(
+ run_info[offset].name, 64, FNAME, (int)name_index);
+
+ /* Vary by orders of magnitude */
+ if (cfg->vary_nops)
+ run_info[offset].nops =
+ WT_MAX(1000, cfg->max_nops >> name_index);
+ } else
+ run_info[offset].name = run_info[0].name;
+
+ /* Setup op count if not varying ops. */
+ if (run_info[offset].nops == 0)
+ run_info[offset].nops = cfg->max_nops;
+ total_nops += run_info[offset].nops;
+ }
+
+ (void)gettimeofday(&start, NULL);
+
+ /* Create threads. */
+ for (i = 0; i < cfg->reverse_scanners; ++i)
+ if ((ret = pthread_create(
+ &tids[i], NULL, reverse_scan, (void *)(uintptr_t)i)) != 0)
+ testutil_die(ret, "pthread_create");
+ for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ if ((ret = pthread_create(
+ &tids[i], NULL, append_insert, (void *)(uintptr_t)i)) != 0)
+ testutil_die(ret, "pthread_create");
+ }
+
+ /* Wait for the threads. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i)
+ (void)pthread_join(tids[i], &thread_ret);
+
+ (void)gettimeofday(&stop, NULL);
+ seconds = (stop.tv_sec - start.tv_sec) +
+ (stop.tv_usec - start.tv_usec) * 1e-6;
+ fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
+ seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) *
+ total_nops) / seconds));
+
+ /* Verify the files. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ verify(cfg, run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ /* Output run statistics. */
+ print_stats(cfg);
+
+ /* Free allocated memory. */
+ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) {
+ free(run_info[i].name);
+ if (!cfg->multiple_files)
+ break;
+ }
+
+ free(run_info);
+ free(tids);
+
+ return (0);
+}
+
+/*
+ * reverse_scan_op --
+ * Walk a cursor back from the end of the file.
+ */
+static inline void
+reverse_scan_op(
+ SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+{
+ uint64_t i;
+ int ret;
+ char *strkey;
+ uint64_t initial_key_range;
+ uint64_t prev_key, this_key;
+
+ WT_UNUSED(session);
+ WT_UNUSED(s);
+
+ /* Make GCC 4.1 happy */
+ prev_key = this_key = 0;
+
+ /* Reset the cursor */
+ cursor->reset(cursor);
+
+ /* Save the key range. */
+ initial_key_range = cfg->key_range - cfg->append_inserters;
+
+ for (i = 0; i < cfg->reverse_scan_ops; i++) {
+ if ((ret = cursor->prev(cursor)) != 0) {
+ if (ret == WT_NOTFOUND)
+ break;
+ testutil_die(ret, "cursor.prev");
+ }
+
+ if (cfg->ftype == ROW) {
+ cursor->get_key(cursor, &strkey);
+ this_key = (uint64_t)atol(strkey);
+ } else
+ cursor->get_key(cursor, (uint64_t*)&this_key);
+
+ if (i == 0 && this_key < initial_key_range)
+ testutil_die(ret,
+ "cursor scan start range wrong first prev %" PRIu64
+ " initial range: %" PRIu64,
+ this_key, initial_key_range);
+ if (i != 0 && this_key >= prev_key)
+ testutil_die(ret,
+ "cursor scan out of order this: %" PRIu64
+ " prev: %" PRIu64,
+ this_key, prev_key);
+ prev_key = this_key;
+ }
+}
+
+/*
+ * reverse_scan --
+ * Reader thread start function.
+ */
+static void *
+reverse_scan(void *arg)
+{
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int id, ret;
+ char tid[128];
+ uint64_t i;
+
+ id = (int)(uintptr_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ __wt_thread_id(tid, sizeof(tid));
+ __wt_random_init(&s->rnd);
+
+ printf(" reverse scan thread %2d starting: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if ((ret = cfg->conn->open_session(
+ cfg->conn, NULL, "isolation=snapshot", &session)) != 0)
+ testutil_die(ret, "conn.open_session");
+ if ((ret = session->open_cursor(
+ session, s->name, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "session.open_cursor");
+ for (i = 0; i < s->nops && !cfg->thread_finish;
+ ++i, ++s->reverse_scans, __wt_yield())
+ reverse_scan_op(cfg, session, cursor, s);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+
+ printf(" reverse scan thread %2d stopping: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (NULL);
+}
+
+/*
+ * append_insert_op --
+ * Write operation.
+ */
+static inline void
+append_insert_op(
+ SHARED_CONFIG *cfg, WT_SESSION *session, WT_CURSOR *cursor, INFO *s)
+{
+ WT_ITEM *value, _value;
+ uint64_t keyno;
+ int ret;
+ char keybuf[64], valuebuf[64];
+
+ WT_UNUSED(session);
+
+ value = &_value;
+
+ keyno = __wt_atomic_add64(&cfg->key_range, 1);
+ if (cfg->ftype == ROW) {
+ snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno);
+ cursor->set_key(cursor, &keybuf);
+ } else
+ cursor->set_key(cursor, (uint32_t)keyno);
+
+ ++s->append_insert;
+ value->data = valuebuf;
+ if (cfg->ftype == FIX)
+ cursor->set_value(cursor, 0x10);
+ else {
+ value->size = (uint32_t)snprintf(
+ valuebuf, sizeof(valuebuf), "XXX %37u", (u_int)keyno);
+ cursor->set_value(cursor, value);
+ }
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "cursor.insert");
+}
+
+/*
+ * append_insert --
+ * Writer thread start function.
+ */
+static void *
+append_insert(void *arg)
+{
+ INFO *s;
+ SHARED_CONFIG *cfg;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t i;
+ int id, ret;
+ char tid[128];
+
+ id = (int)(uintptr_t)arg;
+ s = &run_info[id];
+ cfg = s->cfg;
+ __wt_thread_id(tid, sizeof(tid));
+ __wt_random_init(&s->rnd);
+
+ printf("write thread %2d starting: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ __wt_yield(); /* Get all the threads created. */
+
+ if ((ret = cfg->conn->open_session(
+ cfg->conn, NULL, "isolation=snapshot", &session)) != 0)
+ testutil_die(ret, "conn.open_session");
+ if ((ret = session->open_cursor(
+ session, s->name, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "session.open_cursor");
+ for (i = 0; i < s->nops && !cfg->thread_finish; ++i, __wt_yield())
+ append_insert_op(cfg, session, cursor, s);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "session.close");
+
+ printf("write thread %2d stopping: tid: %s, file: %s\n",
+ id, tid, s->name);
+
+ /* Notify all other threads to finish once the first thread is done */
+ cfg->thread_finish = true;
+
+ return (NULL);
+}
+
+/*
+ * print_stats --
+ * Display reverse scan/writer thread stats.
+ */
+static void
+print_stats(SHARED_CONFIG *cfg)
+{
+ INFO *s;
+ uint64_t id, total_threads;
+
+ total_threads = cfg->reverse_scanners + cfg->append_inserters;
+ s = run_info;
+ for (id = 0; id < total_threads; ++id, ++s)
+ printf("%3d: reverse scans %6d, append inserts %6d\n",
+ (int)id, (int)s->reverse_scans, (int)s->append_insert);
+}
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index d8b11b005d4..a17614bc044 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -246,6 +246,10 @@ static CONFIG c[] = {
"minimum gain before prefix compression is used",
0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL },
+ { "quiet",
+ "quiet run (same as -q)",
+ C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL },
+
{ "repeat_data_pct",
"percent duplicate values in row- or var-length column-stores",
0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 41c9de3dd30..03da1a84c9c 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -142,7 +142,6 @@ typedef struct {
FILE *logfp; /* Log file */
int replay; /* Replaying a run. */
- int track; /* Track progress */
int workers_finished; /* Operations completed */
pthread_rwlock_t backup_lock; /* Hot backup running */
@@ -210,6 +209,7 @@ typedef struct {
uint32_t c_merge_max;
uint32_t c_mmap;
uint32_t c_ops;
+ uint32_t c_quiet;
uint32_t c_prefix_compression;
uint32_t c_prefix_compression_min;
uint32_t c_repeat_data_pct;
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index ccbc0442e4a..0c0485c8bfe 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -64,7 +64,7 @@ main(int argc, char *argv[])
#endif
/* Track progress unless we're re-directing output to a file. */
- g.track = isatty(1) ? 1 : 0;
+ g.c_quiet = isatty(1) ? 0 : 1;
/* Set values from the command line. */
home = NULL;
@@ -99,7 +99,7 @@ main(int argc, char *argv[])
g.logging = LOG_OPS;
break;
case 'q': /* Quiet */
- g.track = 0;
+ g.c_quiet = 1;
break;
case 'r': /* Replay a run */
g.replay = 1;
@@ -259,7 +259,7 @@ main(int argc, char *argv[])
wts_salvage();
/* Overwrite the progress line with a completion line. */
- if (g.track)
+ if (!g.c_quiet)
printf("\r%78s\r", " ");
printf("%4d: %s, %s (%.0f seconds)\n",
g.run_cnt, g.c_data_source,
@@ -322,8 +322,8 @@ die(int e, const char *fmt, ...)
(void)pthread_rwlock_wrlock(&g.death_lock);
/* Try and turn off tracking so it doesn't obscure the error message. */
- if (g.track) {
- g.track = 0;
+ if (!g.c_quiet) {
+ g.c_quiet = 1;
fprintf(stderr, "\n");
}
if (fmt != NULL) { /* Death message. */
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 2b6b9d67fc3..82a6de97ab6 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -236,7 +236,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
int len;
char msg[128];
- if (!g.track || tag == NULL)
+ if (g.c_quiet || tag == NULL)
return;
if (tinfo == NULL && cnt == 0)
diff --git a/src/third_party/wiredtiger/test/suite/test_backup05.py b/src/third_party/wiredtiger/test/suite/test_backup05.py
index 8b176d0f7d7..8ffeb6752df 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup05.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup05.py
@@ -71,7 +71,7 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
session.verify(self.uri)
conn.close()
- def test_backup(self):
+ def backup(self):
'''Check manual fsyncLock backup strategy'''
# Here's the strategy:
@@ -95,5 +95,9 @@ class test_backup05(wttest.WiredTigerTestCase, suite_subprocess):
else:
self.session.verify(self.uri)
+ def test_backup(self):
+ with self.expectedStdoutPattern('Recreating metadata'):
+ self.backup()
+
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_bulk02.py b/src/third_party/wiredtiger/test/suite/test_bulk02.py
index eeca6a56967..fe8118209f2 100644
--- a/src/third_party/wiredtiger/test/suite/test_bulk02.py
+++ b/src/third_party/wiredtiger/test/suite/test_bulk02.py
@@ -49,8 +49,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type))
- # Bulk-load handles return EBUSY to the checkpoint code, causing the
- # checkpoint call to find a handle anyway, and create fake checkpoint.
+ # Bulk-load handles are skipped by checkpoints.
# Named and unnamed checkpoint versions.
def test_bulkload_checkpoint(self):
# Open a bulk cursor and insert a few records.
@@ -72,11 +71,8 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
# In the case of named checkpoints, verify they're still there,
# reflecting an empty file.
if self.ckpt_type == 'named':
- cursor = self.session.open_cursor(
- self.uri, None, 'checkpoint=myckpt')
- self.assertEquals(cursor.next(), wiredtiger.WT_NOTFOUND)
- cursor.close()
-
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(self.uri, None, 'checkpoint=myckpt'))
# test_bulkload_backup
# Test bulk-load with hot-backup.
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random.py b/src/third_party/wiredtiger/test/suite/test_cursor_random.py
index 2cef62b218a..1fd30d93c11 100644
--- a/src/third_party/wiredtiger/test/suite/test_cursor_random.py
+++ b/src/third_party/wiredtiger/test/suite/test_cursor_random.py
@@ -137,7 +137,7 @@ class test_cursor_random_column(wttest.WiredTigerTestCase):
def test_cursor_random_column(self):
self.session.create(self.uri, 'key_format=r,value_format=S')
- msg = '/Operation not supported/'
+ msg = '/next_random .* not supported/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
self.session.open_cursor(self.uri, None, "next_random=true"), msg)
diff --git a/src/third_party/wiredtiger/test/suite/test_index01.py b/src/third_party/wiredtiger/test/suite/test_index01.py
index bebeb191ef0..5dfa5506277 100644
--- a/src/third_party/wiredtiger/test/suite/test_index01.py
+++ b/src/third_party/wiredtiger/test/suite/test_index01.py
@@ -226,10 +226,6 @@ class test_index01(wttest.WiredTigerTestCase):
self.assertRaises(wiredtiger.WiredTigerError,
lambda: self.session.create(self.index[0],
'columns=(dept),exclusive'))
- # non-exclusive create with differing configuration
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.create(self.index[0],
- 'columns=(salary)'), '/does not match existing configuration/')
self.drop_table()
if __name__ == '__main__':
diff --git a/src/third_party/wiredtiger/test/suite/test_schema02.py b/src/third_party/wiredtiger/test/suite/test_schema02.py
index 6895e947efe..b404261c066 100644
--- a/src/third_party/wiredtiger/test/suite/test_schema02.py
+++ b/src/third_party/wiredtiger/test/suite/test_schema02.py
@@ -103,10 +103,6 @@ class test_schema02(wttest.WiredTigerTestCase):
self.expect_failure_colgroup("main:c1", "columns=(S1,i2),exclusive",
"")
- # exists with different config
- self.expect_failure_colgroup("main:c1", "columns=(S1,i4)",
- "/does not match existing configuration/")
-
# colgroup not declared in initial create
self.expect_failure_colgroup("main:c3", "columns=(S3,i4)",
"/Column group 'c3' not found in"
diff --git a/src/third_party/wiredtiger/test/suite/test_txn04.py b/src/third_party/wiredtiger/test/suite/test_txn04.py
index de49c5fe235..bbd6ce8c4e2 100644
--- a/src/third_party/wiredtiger/test/suite/test_txn04.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn04.py
@@ -121,17 +121,14 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
cmd += self.backup_dir
self.runWt(cmd.split())
- self.exception='false'
backup_conn_params = 'log=(enabled,file_max=%s)' % self.logmax
backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params)
try:
self.check(backup_conn.open_session(), None, committed)
- except:
- self.exception='true'
finally:
backup_conn.close()
- def test_ops(self):
+ def ops(self):
self.session.create(self.uri, self.create_params)
c = self.session.open_cursor(self.uri, None, 'overwrite')
# Set up the table with entries for 1-5.
@@ -149,7 +146,6 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
# The runWt command closes our connection and sessions so
# we need to reopen them here.
self.hot_backup(None, committed)
- self.assertEqual(True, self.exception == 'false')
c = self.session.open_cursor(self.uri, None, 'overwrite')
c.set_value(1)
# Then do the given modification.
@@ -192,14 +188,13 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
# Check the state after each commit/rollback.
self.check_all(current, committed)
- # Backup the target we modified. We expect that running
- # recovery now will generate an exception if we committed.
+ # Backup the target we modified and verify the data.
# print 'Call hot_backup with ' + self.uri
self.hot_backup(self.uri, committed)
- if txn == 'commit':
- self.assertEqual(True, self.exception == 'true')
- else:
- self.assertEqual(True, self.exception == 'false')
+
+ def test_ops(self):
+ with self.expectedStdoutPattern('Recreating metadata'):
+ self.ops()
if __name__ == '__main__':
wttest.run()