summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/third_party/wiredtiger/README12
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py10
-rw-r--r--src/third_party/wiredtiger/dist/docs.py45
-rw-r--r--src/third_party/wiredtiger/dist/docs_data.py79
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_docs21
-rw-r--r--src/third_party/wiredtiger/dist/s_funcs.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok5
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py23
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c6
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_backup_block.c25
-rwxr-xr-xsrc/third_party/wiredtiger/examples/python/ex_access.py2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rwxr-xr-xsrc/third_party/wiredtiger/lang/python/wiredtiger/packing.py5
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c8
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c45
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c47
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_import.c66
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c38
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c24
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c45
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c75
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup_incr.c53
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_ds.c59
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_hs.c250
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c3
-rw-r--r--src/third_party/wiredtiger/src/docs/Doxyfile5
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-block.dox9
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-cache.dox13
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-column.dox23
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-cursor.dox15
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-data-file.dox17
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-dhandle.dox17
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-eviction.dox15
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-fs-os.dox10
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-hs.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-index.dox132
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-log-file.dox7
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-logging.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-metadata.dox19
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-python.dox18
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-row.dox12
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-schema-ops.dox206
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-schema.dox205
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-snapshot.dox9
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-transaction.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/backup.dox9
-rw-r--r--src/third_party/wiredtiger/src/docs/custom-data-sources.dox6
-rw-r--r--src/third_party/wiredtiger/src/docs/data-sources.dox7
-rw-r--r--src/third_party/wiredtiger/src/docs/file-formats.dox12
-rw-r--r--src/third_party/wiredtiger/src/docs/huffman.dox9
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx21
-rw-r--r--src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.pngbin67012 -> 66162 bytes
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok3
-rw-r--r--src/third_party/wiredtiger/src/docs/style/wiredtiger.css44
-rwxr-xr-xsrc/third_party/wiredtiger/src/docs/tools/doxfilter.py48
-rw-r--r--src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox11
-rw-r--r--src/third_party/wiredtiger/src/history/hs_cursor.c3
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c112
-rw-r--r--src/third_party/wiredtiger/src/history/hs_verify.c16
-rw-r--r--src/third_party/wiredtiger/src/include/block.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h21
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btree_inline.h7
-rw-r--r--src/third_party/wiredtiger/src/include/cell_inline.h5
-rw-r--r--src/third_party/wiredtiger/src/include/ctype_inline.h10
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h41
-rw-r--r--src/third_party/wiredtiger/src/include/cursor_inline.h7
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h19
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h9
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h2
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h3
-rw-r--r--src/third_party/wiredtiger/src/include/session.h51
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h23
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in371
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c59
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c9
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c58
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c97
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c43
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c1
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c15
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c6
-rw-r--r--src/third_party/wiredtiger/test/csuite/incr_backup/main.c50
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c10
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml49
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh151
-rw-r--r--src/third_party/wiredtiger/test/format/CONFIG.stress1
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c41
-rw-r--r--src/third_party/wiredtiger/test/format/config.h3
-rw-r--r--src/third_party/wiredtiger/test/format/config_compat.c2
-rw-r--r--src/third_party/wiredtiger/test/format/config_compat.sed1
-rw-r--r--src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-56371
-rw-r--r--src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-67251
-rw-r--r--src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-67271
-rw-r--r--src/third_party/wiredtiger/test/format/format.h2
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/format.sh4
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup11.py11
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup12.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup13.py26
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup14.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup15.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup16.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup17.py165
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup18.py136
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup19.py290
-rw-r--r--src/third_party/wiredtiger/test/suite/test_base02.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_base05.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_huffman01.py24
-rw-r--r--src/third_party/wiredtiger/test/suite/test_huffman02.py13
-rw-r--r--src/third_party/wiredtiger/test/suite/test_import09.py190
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_jsondump02.py3
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp20.py11
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn25.py86
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_util01.py19
-rw-r--r--src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py603
124 files changed, 3917 insertions, 915 deletions
diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README
index ca559776c70..b70dd32fcf4 100644
--- a/src/third_party/wiredtiger/README
+++ b/src/third_party/wiredtiger/README
@@ -4,26 +4,26 @@ This is version 10.0.0 of WiredTiger.
WiredTiger release packages and documentation can be found at:
- http://source.wiredtiger.com/
+ https://source.wiredtiger.com/
The documentation for this specific release can be found at:
- http://source.wiredtiger.com/10.0.0/index.html
+ https://source.wiredtiger.com/10.0.0/index.html
The WiredTiger source code can be found at:
- http://github.com/wiredtiger/wiredtiger
+ https://github.com/wiredtiger/wiredtiger
WiredTiger uses JIRA for issue management:
- http://jira.mongodb.org/browse/WT
+ https://jira.mongodb.org/browse/WT
Please do not report issues through GitHub.
WiredTiger licensing information can be found at:
- http://source.wiredtiger.com/license.html
+ https://source.wiredtiger.com/license.html
For general questions and discussion, there's a WiredTiger group:
- http://groups.google.com/group/wiredtiger-users
+ https://groups.google.com/group/wiredtiger-users
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 0df17bcfb1f..14132297f13 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -267,10 +267,6 @@ file_config = format_meta + file_runtime_config + [
Config('format', 'btree', r'''
the file format''',
choices=['btree']),
- Config('huffman_key', 'none', r'''
- configure Huffman encoding for keys. Permitted values are
- \c "none", \c "english", \c "utf8<file>" or \c "utf16<file>".
- See @ref huffman for more information'''),
Config('huffman_value', 'none', r'''
configure Huffman encoding for values. Permitted values are
\c "none", \c "english", \c "utf8<file>" or \c "utf16<file>".
@@ -1281,6 +1277,12 @@ methods = {
configure the cursor for block incremental backup usage. These formats
are only compatible with the backup data source; see @ref backup''',
type='category', subconfig=[
+ Config('consolidate', 'false', r'''
+ causes block incremental backup information to be consolidated if adjacent
+ granularity blocks are modified. If false, information will be returned in
+ granularity sized blocks only. This must be set on the primary backup cursor and it
+ applies to all files for this backup''',
+ type='boolean'),
Config('enabled', 'false', r'''
whether to configure this backup as the starting point for a subsequent
incremental backup''',
diff --git a/src/third_party/wiredtiger/dist/docs.py b/src/third_party/wiredtiger/dist/docs.py
new file mode 100644
index 00000000000..1b26c8090d3
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/docs.py
@@ -0,0 +1,45 @@
+# Read and verify the documentation data to make sure path names are valid.
+
+import os, sys
+import docs_data
+
+def check_sort(got, msg, keyfunc=None):
+ if keyfunc:
+ expect = sorted(got, key=keyfunc)
+ else:
+ expect = sorted(got)
+ if got != expect:
+ print(msg)
+ print(' got: ' + str(got))
+ print(' expect: ' + str(expect))
+
+# An include filename will be sorted first.
+def inc_first(f):
+ if '/include/' in f:
+ return '_' + f
+ else:
+ return f
+
+top_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
+pages = docs_data.arch_doc_pages
+all_names = [ page.doxygen_name for page in pages]
+check_sort(all_names, 'arch_doc_pages must be sorted by name.')
+
+for page in pages:
+ name = page.doxygen_name
+ check_sort(page.data_structures, name + ': data structures must be sorted.')
+ for partial in page.files:
+ fullpath = os.path.join(top_dir, partial)
+ if not os.path.exists(fullpath):
+ print(name + ': ' + partial + ': does not exist')
+ elif os.path.isdir(fullpath):
+ if fullpath[-1:] != '/':
+ print(name + ': ' + partial + ': is a directory, must end in /')
+ else:
+ if fullpath[-1:] == '/':
+ print(name + ': ' + partial + ': not a directory, cannot end in /')
+ check_sort(page.files,
+ name + ': sources must be sorted, with include files first.', inc_first)
+
+sys.exit(0)
diff --git a/src/third_party/wiredtiger/dist/docs_data.py b/src/third_party/wiredtiger/dist/docs_data.py
new file mode 100644
index 00000000000..a1301c87057
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/docs_data.py
@@ -0,0 +1,79 @@
+# Create entries used by our doxygen filter to expand the arch_page
+# macros in the documentation.
+
+class ArchDocPage:
+ def __init__(self, doxygen_name, data_structures, files):
+ self.doxygen_name = doxygen_name
+ self.data_structures = data_structures
+ self.files = files
+
+##########################################
+# List of all architecture subsections
+##########################################
+arch_doc_pages = [
+ ArchDocPage('arch-block',
+ ['WT_BLOCK', 'WT_BLOCK_CKPT', 'WT_BLOCK_DESC', 'WT_BLOCK_HEADER',
+ 'WT_BM', 'WT_EXTLIST'],
+ ['src/include/block.h', 'src/include/block_inline.h',
+ 'src/block/']),
+ ArchDocPage('arch-cache',
+ ['WT_CACHE', 'WT_CACHE_POOL'],
+ ['src/include/cache.h', 'src/include/cache_inline.h']),
+ ArchDocPage('arch-column',
+ ['WT_BTREE'],
+ ['src/include/btree.h']),
+ ArchDocPage('arch-cursor',
+ ['WT_CURSOR', 'WT_CURSOR_BACKUP', 'WT_CURSOR_BTREE', 'WT_CURSOR_BULK',
+ 'WT_CURSOR_DATA_SOURCE', 'WT_CURSOR_DUMP', 'WT_CURSOR_INDEX',
+ 'WT_CURSOR_LOG', 'WT_CURSOR_METADATA', 'WT_CURSOR_STAT'],
+ ['src/include/cursor.h', 'src/include/cursor_inline.h',
+ 'src/cursor/']),
+ ArchDocPage('arch-data-file',
+ ['WT_CELL'],
+ ['src/include/block.h', 'src/include/btmem.h',
+ 'src/include/cell.h', 'src/include/cell_inline.h',
+ 'src/reconcile/rec_col.c', 'src/reconcile/rec_row.c']),
+ ArchDocPage('arch-dhandle',
+ ['WT_BTREE', 'WT_DHANDLE'],
+ ['src/include/btree.h', 'src/include/dhandle.h',
+ 'src/conn/conn_dhandle.c', 'src/session/session_dhandle.c']),
+ ArchDocPage('arch-eviction',
+ ['WT_EVICT_ENTRY', 'WT_EVICT_QUEUE'],
+ ['src/include/cache.h',
+ 'src/evict/']),
+ ArchDocPage('arch-fs-os',
+ ['WT_FILE_SYSTEM'],
+ ['src/include/os.h', 'src/include/os_fhandle_inline.h',
+ 'src/include/os_fs_inline.h', 'src/include/os_fstream_inline.h',
+ 'src/include/os_windows.h',
+ 'src/os_posix/', 'src/os_win/']),
+ ArchDocPage('arch-hs',
+ ['WT_CURSOR_HS'],
+ ['src/history/']),
+ ArchDocPage('arch-log-file',
+ ['WT_LOGSLOT', 'WT_LOG_RECORD', 'WT_LSN'],
+ ['src/include/log.h', 'src/log/']),
+ ArchDocPage('arch-logging',
+ ['WT_CURSOR_LOG', 'WT_LOG', 'WT_LOGSLOT', 'WT_LOG_RECORD', 'WT_LSN'],
+ ['src/include/log.h', 'src/include/log_inline.h', 'src/log/']),
+ ArchDocPage('arch-metadata',
+ [],
+ ['src/include/meta.h', 'src/meta/']),
+ ArchDocPage('arch-python',
+ [],
+ ['lang/python/']),
+ ArchDocPage('arch-row',
+ ['WT_BTREE'],
+ ['src/include/btree.h']),
+ ArchDocPage('arch-schema',
+ ['WT_COLGROUP', 'WT_INDEX', 'WT_LSM_TREE', 'WT_TABLE'],
+ ['src/include/intpack_inline.h', 'src/include/packing_inline.h',
+ 'src/include/schema.h',
+ 'src/lsm/', 'src/packing/', 'src/schema/']),
+ ArchDocPage('arch-snapshot',
+ ['WT_TXN'],
+ ['src/include/txn.h']),
+ ArchDocPage('arch-transaction',
+ ['WT_TXN', 'WT_TXN_GLOBAL', 'WT_TXN_OP', 'WT_TXN_SHARED'],
+ ['src/include/txn.h', 'src/include/txn_inline.h', 'src/txn/']),
+]
diff --git a/src/third_party/wiredtiger/dist/s_docs b/src/third_party/wiredtiger/dist/s_docs
index 2bb134d0244..6cb43b3c49e 100755
--- a/src/third_party/wiredtiger/dist/s_docs
+++ b/src/third_party/wiredtiger/dist/s_docs
@@ -96,7 +96,8 @@ spellchk()
type aspell > /dev/null 2>&1 || return
(cd ../src/docs &&
- cat *.dox |
+ # Separate quoted newlines "line\nline" so "nline" is not reported.
+ sed -e 's/\("[^"]*\)\\n\([^"]*"\)/\1 \2/' *.dox | \
aspell --encoding=iso-8859-1 --lang=en_US --personal=./spell.ok list) |
sort -u > $t
test -s $t && {
@@ -124,6 +125,18 @@ valid_build()
done
}
+check_docs_data()
+{
+ python docs.py > $t
+ test -s $t && {
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo 'Documentation data errors in docs_data.py'
+ sed -e 's/^/ /' < $t
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ e=1
+ }
+}
+
build()
{
# Build from scratch on demand.
@@ -153,7 +166,7 @@ EOF
# Any cmapx files that are generated by plantuml need to be referred to
CMAPX=`find ../src/docs/ -type f -name "*.cmapx" 2>/dev/null`
if [ "$CMAPX" != '' ]; then
- cd ../docs
+ (cd ../docs
for f in $CMAPX; do
b=`echo $f | sed -e 's:.*/::' -e 's/.cmapx$//'`
for html in `grep -l $b.png *.html`; do
@@ -165,6 +178,7 @@ EOF
mv $html.NEW $html
done
done
+ )
fi
# Fixup the man pages generated by Doxygen. We want the command line
@@ -225,6 +239,9 @@ wtperf_config
spellchk
structurechk
+# Check the docs data input file.
+check_docs_data
+
# Build the documentation.
build $clean
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list
index fe19e596bf4..0b7db52d26c 100644
--- a/src/third_party/wiredtiger/dist/s_funcs.list
+++ b/src/third_party/wiredtiger/dist/s_funcs.list
@@ -13,6 +13,7 @@ __wt_bulk_insert_fix
__wt_bulk_insert_row
__wt_bulk_insert_var
__wt_config_getone
+__wt_curhs_open
__wt_cursor_get_raw_value
__wt_debug_addr
__wt_debug_addr_print
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 58090bf42be..70e3738e022 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -346,9 +346,11 @@ Redistributions
Refactor
Resize
RocksDB
+Runtime
SIMD
SLIST
SLVG
+SMT
SOURCE's
SPINLOCK
SQL
@@ -633,6 +635,7 @@ curconfig
curdump
curextract
curfile
+curhs
curindex
curjoin
curlog
@@ -894,6 +897,8 @@ io
ip
isalnum
isalpha
+isascii
+isb
iscntrl
isdigit
isgraph
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 6d66318f99c..807516d8eeb 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -284,8 +284,8 @@ connection_stats = [
CacheStat('cache_hs_insert', 'history store table insert calls'),
CacheStat('cache_hs_insert_restart', 'history store table insert calls that returned restart'),
CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'),
- CacheStat('cache_hs_key_truncate_mix_ts', 'history store table truncation to remove range of updates due to mixed timestamps'),
CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'),
+ CacheStat('cache_hs_key_truncate_non_ts', 'history store table truncation to remove range of updates due to non timestamped update on data page'),
CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'),
CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'),
CacheStat('cache_hs_ondisk', 'history store table on-disk size', 'no_clear,no_scale,size'),
@@ -766,7 +766,20 @@ dsrc_stats = [
CacheStat('cache_eviction_walks_gave_up_no_targets', 'eviction walks gave up because they saw too many pages and found no candidates'),
CacheStat('cache_eviction_walks_gave_up_ratio', 'eviction walks gave up because they saw too many pages and found too few candidates'),
CacheStat('cache_eviction_walks_stopped', 'eviction walks gave up because they restarted their walk twice'),
+ CacheStat('cache_hs_insert', 'history store table insert calls'),
+ CacheStat('cache_hs_insert_restart', 'history store table insert calls that returned restart'),
+ CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'),
+ CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'),
+ CacheStat('cache_hs_key_truncate_non_ts', 'history store table truncation to remove range of updates due to non timestamped update on data page'),
+ CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'),
+ CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'),
+ CacheStat('cache_hs_order_fixup_insert', 'history store table out-of-order updates that were fixed up during insertion'),
+ CacheStat('cache_hs_order_fixup_move', 'history store table out-of-order updates that were fixed up by moving existing records'),
+ CacheStat('cache_hs_order_lose_durable_timestamp', 'history store table out-of-order resolved updates that lose their durable timestamp'),
CacheStat('cache_hs_read', 'history store table reads'),
+ CacheStat('cache_hs_read_miss', 'history store table reads missed'),
+ CacheStat('cache_hs_read_squash', 'history store table reads requiring squashed modifies'),
+ CacheStat('cache_hs_write_squash', 'history store table writes requiring squashed modifies'),
CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
CacheStat('cache_pages_requested', 'pages requested from the cache'),
@@ -826,10 +839,12 @@ dsrc_stats = [
CursorStat('cursor_modify_bytes', 'modify key and value bytes affected', 'size'),
CursorStat('cursor_modify_bytes_touch', 'modify value bytes modified', 'size'),
CursorStat('cursor_next', 'next calls'),
+ CursorStat('cursor_next_hs_tombstone', 'cursor next calls that skip due to a globally visible history store tombstone'),
CursorStat('cursor_next_skip_ge_100', 'cursor next calls that skip greater than or equal to 100 entries'),
CursorStat('cursor_next_skip_lt_100', 'cursor next calls that skip less than 100 entries'),
CursorStat('cursor_next_skip_total', 'Total number of entries skipped by cursor next calls'),
CursorStat('cursor_prev', 'prev calls'),
+ CursorStat('cursor_prev_hs_tombstone', 'cursor prev calls that skip due to a globally visible history store tombstone'),
CursorStat('cursor_prev_skip_ge_100', 'cursor prev calls that skip greater than or equal to 100 entries'),
CursorStat('cursor_prev_skip_lt_100', 'cursor prev calls that skip less than 100 entries'),
CursorStat('cursor_prev_skip_total', 'Total number of entries skipped by cursor prev calls'),
@@ -922,6 +937,12 @@ dsrc_stats = [
# Transaction statistics
##########################################
TxnStat('txn_read_race_prepare_update', 'race to read prepared update retry'),
+ TxnStat('txn_rts_hs_removed', 'rollback to stable updates removed from history store'),
+ TxnStat('txn_rts_hs_restore_tombstones', 'rollback to stable restored tombstones from history store'),
+ TxnStat('txn_rts_hs_stop_older_than_newer_start', 'rollback to stable hs records with stop timestamps older than newer records'),
+ TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'),
+ TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'),
+ TxnStat('txn_rts_sweep_hs_keys', 'rollback to stable sweeping history store keys'),
TxnStat('txn_update_conflict', 'update conflicts'),
]
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index a6c6c5f8c43..e63ca868d64 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -686,12 +686,6 @@ session_ops(WT_SESSION *session)
error_check(session->compact(session, "table:mytable", NULL));
/*! [Compact a table] */
-#ifdef MIGHT_NOT_RUN
- /*! [Import a file] */
- error_check(session->import(session, "file:import", NULL));
-/*! [Import a file] */
-#endif
-
error_check(
session->create(session, "table:old", "key_format=r,value_format=S,cache_resident=true"));
/*! [Rename a table] */
diff --git a/src/third_party/wiredtiger/examples/c/ex_backup_block.c b/src/third_party/wiredtiger/examples/c/ex_backup_block.c
index be21c76051d..01d0431fd10 100644
--- a/src/third_party/wiredtiger/examples/c/ex_backup_block.c
+++ b/src/third_party/wiredtiger/examples/c/ex_backup_block.c
@@ -327,13 +327,23 @@ take_incr_backup(WT_SESSION *session, int i)
size_t alloc, count, rdsize, tmp_sz;
int j, ret, rfd, wfd;
char buf[1024], h[256], *tmp;
- const char *filename;
+ const char *filename, *idstr;
bool first;
tmp = NULL;
tmp_sz = 0;
+ /*! [Query existing IDs] */
+ error_check(session->open_cursor(session, "backup:query_id", NULL, NULL, &backup_cur));
+ while ((ret = backup_cur->next(backup_cur)) == 0) {
+ error_check(backup_cur->get_key(backup_cur, &idstr));
+ printf("Existing incremental ID string: %s\n", idstr);
+ }
+ error_check(backup_cur->close(backup_cur));
+ /*! [Query existing IDs] */
+
/* Open the backup data source for incremental backup. */
- (void)snprintf(buf, sizeof(buf), "incremental=(src_id=\"ID%d\",this_id=\"ID%d\")", i - 1, i);
+ (void)snprintf(buf, sizeof(buf), "incremental=(src_id=\"ID%d\",this_id=\"ID%d\"%s)", i - 1, i,
+ i % 2 == 0 ? "" : ",consolidate=true");
error_check(session->open_cursor(session, "backup:", NULL, buf, &backup_cur));
rfd = wfd = -1;
count = 0;
@@ -454,7 +464,7 @@ main(int argc, char *argv[])
WT_CURSOR *backup_cur;
WT_SESSION *session;
int i, j, ret;
- char cmd_buf[256];
+ char cmd_buf[256], *idstr;
(void)argc; /* Unused variable */
(void)testutil_set_progname(argv);
@@ -507,6 +517,15 @@ main(int argc, char *argv[])
error_check(wt_conn->close(wt_conn, NULL));
error_check(wiredtiger_open(home, NULL, CONN_CONFIG, &wt_conn));
error_check(wt_conn->open_session(wt_conn, NULL, NULL, &session));
+
+ printf("Verify query after reopen\n");
+ error_check(session->open_cursor(session, "backup:query_id", NULL, NULL, &backup_cur));
+ while ((ret = backup_cur->next(backup_cur)) == 0) {
+ error_check(backup_cur->get_key(backup_cur, &idstr));
+ printf("Existing incremental ID string: %s\n", idstr);
+ }
+ error_check(backup_cur->close(backup_cur));
+
/*
* We should have an entry for i-1 and i-2. Use the older one.
*/
diff --git a/src/third_party/wiredtiger/examples/python/ex_access.py b/src/third_party/wiredtiger/examples/python/ex_access.py
index 2b765b8141f..c043a419a73 100755
--- a/src/third_party/wiredtiger/examples/python/ex_access.py
+++ b/src/third_party/wiredtiger/examples/python/ex_access.py
@@ -27,6 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
+#! [python simple example]
import os
from wiredtiger import wiredtiger_open
@@ -53,3 +54,4 @@ for key, value in cursor:
print('Got record: %s : %s' % (key, value))
conn.close()
+#! [python simple example]
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 4c20441ef39..dc5f7bf2935 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "d6d4b1cc014252aa4c99db8fddbbc9c5ddc3044f"
+ "commit": "1ba1d9bab805851532495a05f4e2421c8cf30a61"
}
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py b/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py
index 83821f2657f..543326e022f 100755
--- a/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py
@@ -174,7 +174,10 @@ def pack(fmt, *values):
if _is_string(val) and f in 'Ss':
result += str(val[:l]).encode()
else:
- result += val[:l]
+ if type(val) is bytes:
+ result += val[:l]
+ else:
+ result += val[:l].encode()
if f == 'S' and not havesize:
result += x00
elif size > l and havesize:
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 7199a6b4c24..cdabd131e40 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -397,7 +397,8 @@ __ckpt_add_blkmod_entry(
* Add the checkpoint's allocated blocks to all valid incremental backup source identifiers.
*/
static int
-__ckpt_add_blk_mods_alloc(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_BLOCK_CKPT *ci)
+__ckpt_add_blk_mods_alloc(
+ WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_BLOCK_CKPT *ci, WT_BLOCK *block)
{
WT_BLOCK_MODS *blk_mod;
WT_CKPT *ckpt;
@@ -417,10 +418,13 @@ __ckpt_add_blk_mods_alloc(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_BLOCK_
if (!F_ISSET(blk_mod, WT_BLOCK_MODS_VALID))
continue;
+ if (block->created_during_backup)
+ WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, 0, block->allocsize));
WT_EXT_FOREACH (ext, ci->alloc.off) {
WT_RET(__ckpt_add_blkmod_entry(session, blk_mod, ext->off, ext->size));
}
}
+ block->created_during_backup = false;
return (0);
}
@@ -595,7 +599,7 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
* Record the checkpoint's allocated blocks. Do so before skipping any processing and before
* possibly merging in blocks from any previous checkpoint.
*/
- WT_ERR(__ckpt_add_blk_mods_alloc(session, ckptbase, ci));
+ WT_ERR(__ckpt_add_blk_mods_alloc(session, ckptbase, ci, block));
/* Skip the additional processing if we aren't deleting checkpoints. */
if (!deleting)
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 20568ce0941..d97f3a86f7d 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -207,6 +207,13 @@ __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[
/* Set the file's size. */
WT_ERR(__wt_filesize(session, block->fh, &block->size));
+ /*
+ * If we're opening a file and it only contains a header and we're doing incremental backup
+ * indicate this so that the first checkpoint is sure to set all the bits as dirty to cover the
+ * header so that the header gets copied.
+ */
+ if (block->size == allocsize && F_ISSET(conn, WT_CONN_INCR_BACKUP))
+ block->created_during_backup = true;
/* Initialize the live checkpoint's lock. */
WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 42139969802..60cde6ce8b5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -522,16 +522,45 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); /* Checkpoint generation */
/*
- * In the regular case, we'll be initializing to the connection-wide base write generation since
- * this is the largest of all btree write generations from the previous run. This has the nice
- * property of ensuring that the range of write generations used by consecutive runs do not
- * overlap which aids with debugging.
+ * The first time we open a btree, we'll be initializing the write gen to the connection-wide
+ * base write generation since this is the largest of all btree write generations from the
+ * previous run. This has the nice property of ensuring that the range of write generations used
+ * by consecutive runs do not overlap which aids with debugging.
*
- * In the import case, the btree write generation from the last run may actually be ahead of the
- * connection-wide base write generation. In that case, we should initialize our write gen just
- * ahead of our btree specific write generation.
+ * If we're reopening a btree or importing a new one to a running system, the btree write
+ * generation from the last run may actually be ahead of the connection-wide base write
+ * generation. In that case, we should initialize our write gen just ahead of our btree specific
+ * write generation.
+ *
+ * The runtime write generation is important since it's going to determine what we're going to
+ * use as the base write generation (and thus what pages to wipe transaction ids from). The idea
+ * is that we want to initialize it once the first time we open the btree during a run and then
+ * for every subsequent open, we want to reuse it. This so that we're still able to read
+ * transaction ids from the previous time a btree was open in the same run.
+ *
+ * FIXME-WT-6819: When we begin discarding dhandles more aggressively, we need to check that
+ * updates aren't having their transaction ids wiped after reopening the dhandle. The runtime
+ * write generation is relevant here since it should remain static across the entire run.
+ */
+ btree->write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen);
+ WT_ASSERT(session, ckpt->write_gen >= ckpt->run_write_gen);
+
+ /* If this is the first time opening the tree this run. */
+ if (F_ISSET(session, WT_SESSION_IMPORT) || ckpt->run_write_gen < conn->base_write_gen)
+ btree->base_write_gen = btree->run_write_gen = btree->write_gen;
+ else
+ btree->base_write_gen = btree->run_write_gen = ckpt->run_write_gen;
+
+ /*
+ * We've just overwritten the runtime write generation based off the fact that know that we're
+ * importing and therefore, the checkpoint data's runtime write generation is meaningless. We
+ * need to ensure that the underlying dhandle doesn't get discarded without being included in a
+ * subsequent checkpoint including the new overwritten runtime write generation. Otherwise,
+ * we'll reopen, won't know that we're in the import case and will incorrectly use the old
+ * system's runtime write generation.
*/
- btree->write_gen = btree->base_write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen);
+ if (F_ISSET(session, WT_SESSION_IMPORT))
+ btree->modified = true;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index 8496339d666..13e4952ce33 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -197,7 +197,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session)
{
struct __wt_huffman_table *table;
WT_BTREE *btree;
- WT_CONFIG_ITEM key_conf, value_conf;
+ WT_CONFIG_ITEM value_conf;
WT_DECL_RET;
u_int entries, numbytes;
const char **cfg;
@@ -205,11 +205,9 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session)
btree = S2BT(session);
cfg = btree->dhandle->cfg;
- WT_RET(__wt_config_gets_none(session, cfg, "huffman_key", &key_conf));
- WT_RET(__huffman_confchk(session, &key_conf));
WT_RET(__wt_config_gets_none(session, cfg, "huffman_value", &value_conf));
WT_RET(__huffman_confchk(session, &value_conf));
- if (key_conf.len == 0 && value_conf.len == 0)
+ if (value_conf.len == 0)
return (0);
switch (btree->type) { /* Check file type compatibility. */
@@ -217,43 +215,10 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session)
WT_RET_MSG(session, EINVAL, "fixed-size column-store files may not be Huffman encoded");
/* NOTREACHED */
case BTREE_COL_VAR:
- if (key_conf.len != 0)
- WT_RET_MSG(session, EINVAL,
- "the keys of variable-length column-store files may not be Huffman encoded");
- break;
case BTREE_ROW:
break;
}
- if (key_conf.len == 0) {
- ;
- } else if (strncmp(key_conf.str, "english", key_conf.len) == 0) {
- struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)];
-
- memcpy(copy, __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish));
- WT_RET(__wt_huffman_open(
- session, copy, WT_ELEMENTS(__wt_huffman_nytenglish), 1, &btree->huffman_key));
-
- /* Check for a shared key/value table. */
- if (value_conf.len != 0 && strncmp(value_conf.str, "english", value_conf.len) == 0) {
- btree->huffman_value = btree->huffman_key;
- return (0);
- }
- } else {
- WT_RET(__wt_huffman_read(session, &key_conf, &table, &entries, &numbytes));
- ret = __wt_huffman_open(session, table, entries, numbytes, &btree->huffman_key);
- __wt_free(session, table);
- if (ret != 0)
- return (ret);
-
- /* Check for a shared key/value table. */
- if (value_conf.len != 0 && key_conf.len == value_conf.len &&
- memcmp(key_conf.str, value_conf.str, key_conf.len) == 0) {
- btree->huffman_value = btree->huffman_key;
- return (0);
- }
- }
-
if (value_conf.len == 0) {
;
} else if (strncmp(value_conf.str, "english", value_conf.len) == 0) {
@@ -375,14 +340,6 @@ __wt_btree_huffman_close(WT_SESSION_IMPL *session)
btree = S2BT(session);
- if (btree->huffman_key != NULL) {
- /* Key and data may use the same table, only close it once. */
- if (btree->huffman_value == btree->huffman_key)
- btree->huffman_value = NULL;
-
- __wt_huffman_close(session, btree->huffman_key);
- btree->huffman_key = NULL;
- }
if (btree->huffman_value != NULL) {
__wt_huffman_close(session, btree->huffman_value);
btree->huffman_value = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c
index 39d0dda368a..e779d90fc66 100644
--- a/src/third_party/wiredtiger/src/btree/bt_import.c
+++ b/src/third_party/wiredtiger/src/btree/bt_import.c
@@ -13,7 +13,7 @@
* Import a WiredTiger file into the database and reconstruct its metadata.
*/
int
-__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
+__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp)
{
WT_BM *bm;
WT_CKPT *ckpt, *ckptbase;
@@ -24,13 +24,12 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
WT_DECL_RET;
WT_KEYED_ENCRYPTOR *kencryptor;
uint32_t allocsize;
- char *checkpoint_list, *fileconf, *metadata, fileid[64];
- const char *filecfg[] = {
- WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL};
+ char *checkpoint_list, *config, *config_tmp, *metadata, fileid[64];
+ const char *cfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL};
const char *filename;
ckptbase = NULL;
- checkpoint_list = fileconf = metadata = NULL;
+ checkpoint_list = config = config_tmp = metadata = NULL;
WT_ERR(__wt_scr_alloc(session, 0, &a));
WT_ERR(__wt_scr_alloc(session, 0, &b));
@@ -45,13 +44,11 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
* size, but 512B allows us to read the descriptor block and that's all we care about.
*/
F_SET(session, WT_SESSION_IMPORT_REPAIR);
- WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, 512, &bm));
+ WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, 512, &bm));
ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint);
WT_TRET(bm->close(bm, session));
F_CLR(session, WT_SESSION_IMPORT_REPAIR);
WT_ERR(ret);
- __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", metadata);
- __wt_verbose(session, WT_VERB_CHECKPOINT, "import checkpoint-list: %s", checkpoint_list);
/*
* The metadata may have been encrypted, in which case it's also hexadecimal encoded. The
@@ -59,7 +56,7 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
* diagnosis.
*/
WT_ERR(__wt_config_getones(session, metadata, "block_metadata_encrypted", &v));
- WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
+ WT_ERR(__wt_btree_config_encryptor(session, cfg, &kencryptor));
if ((kencryptor == NULL && v.val != 0) || (kencryptor != NULL && v.val == 0))
WT_ERR_MSG(session, EINVAL,
"%s: loaded object's encryption configuration doesn't match the database's encryption "
@@ -99,20 +96,18 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
* Strip out the checkpoint LSN, an imported file isn't associated with any log files. Assign a
* unique file ID.
*/
- filecfg[1] = a->data;
- filecfg[2] = checkpoint_list;
- filecfg[3] = "checkpoint_backup_info=";
- filecfg[4] = "checkpoint_lsn=";
+ cfg[1] = a->data;
+ cfg[2] = checkpoint_list;
+ cfg[3] = "checkpoint_backup_info=";
+ cfg[4] = "checkpoint_lsn=";
WT_WITH_SCHEMA_LOCK(session,
ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id));
WT_ERR(ret);
- filecfg[5] = fileid;
- WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
- WT_ERR(__wt_metadata_insert(session, uri, fileconf));
- __wt_verbose(session, WT_VERB_CHECKPOINT, "import configuration: %s/%s", uri, fileconf);
+ cfg[5] = fileid;
+ WT_ERR(__wt_config_collapse(session, cfg, &config_tmp));
/* Now that we've retrieved the configuration, let's get the real allocation size. */
- WT_ERR(__wt_config_getones(session, fileconf, "allocation_size", &v));
+ WT_ERR(__wt_config_getones(session, config_tmp, "allocation_size", &v));
allocsize = (uint32_t)v.val;
/*
@@ -120,32 +115,21 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
* size. When we did this earlier, we were able to read the descriptor block properly but the
* checkpoint's byte representation was wrong because it was using the wrong allocation size.
*/
- WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, allocsize, &bm));
+ WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, allocsize, &bm));
+ __wt_free(session, checkpoint_list);
+ __wt_free(session, metadata);
ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint);
WT_TRET(bm->close(bm, session));
/*
- * The just inserted metadata was correct as of immediately before the final checkpoint, but
- * it's not quite right. The block manager returned the corrected final checkpoint, put it all
- * together.
+ * The metadata was correct as of immediately before the final checkpoint, but it's not quite
+ * right. The block manager returned the corrected final checkpoint, put it all together.
*
* Get the checkpoint information from the file's metadata as an array of WT_CKPT structures.
- *
- * XXX There's a problem here. If a file is imported from our future (leaf pages with unstable
- * entries that have write-generations ahead of the current database's base write generation),
- * we'll read the values and treat them as stable. A restart will fix this: when we added the
- * imported file to our metadata, the write generation in the imported file's checkpoints
- * updated our database's maximum write generation, and so a restart will have a maximum
- * generation newer than the imported file's write generation. An alternative solution is to add
- * a "base write generation" value to the imported file's metadata, and use that value instead
- * of the connection's base write generation when deciding what page items should be read. Since
- * all future writes to the imported file would be ahead of that write generation, it would have
- * the effect we want.
- *
* Update the last checkpoint with the corrected information. Update the file's metadata with
* the new checkpoint information.
*/
- WT_ERR(__wt_meta_ckptlist_get(session, uri, false, &ckptbase));
+ WT_ERR(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config_tmp));
WT_CKPT_FOREACH (ckptbase, ckpt)
if (ckpt->name == NULL || (ckpt + 1)->name == NULL)
break;
@@ -153,20 +137,20 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
WT_ERR_MSG(session, EINVAL, "no checkpoint information available to import");
F_SET(ckpt, WT_CKPT_UPDATE);
WT_ERR(__wt_buf_set(session, &ckpt->raw, checkpoint->data, checkpoint->size));
- WT_ERR(__wt_meta_ckptlist_set(session, uri, ckptbase, NULL));
-
- WT_ASSERT(session, fileconfp != NULL);
- *fileconfp = fileconf;
+ WT_ERR(__wt_meta_ckptlist_update_config(session, ckptbase, config_tmp, &config));
+ __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", config);
+ *configp = config;
err:
F_CLR(session, WT_SESSION_IMPORT_REPAIR);
__wt_meta_ckptlist_free(session, &ckptbase);
+ __wt_free(session, checkpoint_list);
if (ret != 0)
- __wt_free(session, fileconf);
+ __wt_free(session, config);
+ __wt_free(session, config_tmp);
__wt_free(session, metadata);
- __wt_free(session, checkpoint_list);
__wt_scr_free(session, &a);
__wt_scr_free(session, &b);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 5a9cc9cea41..75855d8a465 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -563,10 +563,10 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
continue;
case WT_CELL_KEY:
/*
- * Simple keys without compression (not Huffman encoded or prefix compressed), can be
- * directly referenced on the page to avoid repeatedly unpacking their cells.
+ * Simple keys without prefix compression can be directly referenced on the page to
+ * avoid repeatedly unpacking their cells.
*/
- if (!btree->huffman_key && unpack.prefix == 0)
+ if (unpack.prefix == 0)
__wt_row_leaf_key_set(page, rip, &unpack);
else
__wt_row_leaf_key_set_cell(page, rip, unpack.cell);
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 660652a8ab2..13abe891dd8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -283,7 +283,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
if (ret == 0 && (ckpt + 1)->name == NULL && !skip_hs) {
/* Open a history store cursor. */
WT_ERR(__wt_hs_cursor_open(session));
- WT_TRET(__wt_history_store_verify_one(session));
+ WT_TRET(__wt_hs_verify_one(session));
WT_TRET(__wt_hs_cursor_close(session));
/*
* We cannot error out here. If we got an error verifying the history store, we need
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index d8d6b4e899d..22782c35041 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -452,7 +452,6 @@ __verify_dsk_row_leaf(
WT_DECL_RET;
WT_ITEM *last;
enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
- void *huffman;
size_t prefix;
uint32_t cell_num, cell_type, i, key_cnt, last_cell_num;
uint8_t *end;
@@ -460,7 +459,6 @@ __verify_dsk_row_leaf(
btree = S2BT(session);
bm = btree->bm;
unpack = &_unpack;
- huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;
WT_ERR(__wt_scr_alloc(session, 0, &current));
WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
@@ -575,37 +573,15 @@ __verify_dsk_row_leaf(
cell_num, tag, prefix, last->size);
/*
- * If Huffman decoding required, unpack the cell to build the key, then resolve the prefix.
- * Else, we can do it faster internally because we don't have to shuffle memory around as
- * much.
+ * Get the cell's data/length and make sure we have enough buffer space.
*/
- if (huffman != NULL) {
- WT_ERR(__wt_dsk_cell_data_ref(session, dsk->type, unpack, current));
+ WT_ERR(__wt_buf_init(session, current, prefix + unpack->size));
- /*
- * If there's a prefix, make sure there's enough buffer space, then shift the decoded
- * data past the prefix and copy the prefix into place. Take care with the pointers:
- * current->data may be pointing inside the buffer.
- */
- if (prefix != 0) {
- WT_ERR(__wt_buf_grow(session, current, prefix + current->size));
- memmove((uint8_t *)current->mem + prefix, current->data, current->size);
- memcpy(current->mem, last->data, prefix);
- current->data = current->mem;
- current->size += prefix;
- }
- } else {
- /*
- * Get the cell's data/length and make sure we have enough buffer space.
- */
- WT_ERR(__wt_buf_init(session, current, prefix + unpack->size));
-
- /* Copy the prefix then the data into place. */
- if (prefix != 0)
- memcpy(current->mem, last->data, prefix);
- memcpy((uint8_t *)current->mem + prefix, unpack->data, unpack->size);
- current->size = prefix + unpack->size;
- }
+ /* Copy the prefix then the data into place. */
+ if (prefix != 0)
+ memcpy(current->mem, last->data, prefix);
+ memcpy((uint8_t *)current->mem + prefix, unpack->data, unpack->size);
+ current->size = prefix + unpack->size;
key_compare:
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 9494d7ddc69..8e2f2173bbe 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -359,8 +359,10 @@ restart:
* done.
*/
if (__wt_ref_is_root(ref)) {
- if (!LF_ISSET(WT_READ_SKIP_INTL))
+ if (!LF_ISSET(WT_READ_SKIP_INTL)) {
*refp = ref;
+ WT_ASSERT(session, ref != ref_orig);
+ }
goto done;
}
@@ -392,6 +394,7 @@ restart:
/* Success, "couple" released. */
couple = NULL;
*refp = ref;
+ WT_ASSERT(session, ref != ref_orig);
goto done;
}
@@ -464,6 +467,7 @@ descend:
/* Return leaf pages to our caller. */
if (F_ISSET(ref, WT_REF_FLAG_LEAF)) {
*refp = ref;
+ WT_ASSERT(session, ref != ref_orig);
goto done;
}
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index b9113df8f44..132059c28cb 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -345,20 +345,8 @@ switch_and_jump:
}
}
if (direction == FORWARD) {
- /*
- * Get a reference to the current key's bytes. Usually we want bytes from the page,
- * fast-path that case.
- */
- if (btree->huffman_key == NULL) {
- p = unpack->data;
- size = unpack->size;
- } else {
- if (tmp == NULL)
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, tmp));
- p = tmp->data;
- size = tmp->size;
- }
+ p = unpack->data;
+ size = unpack->size;
/*
* Grow the buffer as necessary as well as ensure data has been copied into local buffer
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index 3b10493780f..652c59c71a0 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -113,6 +113,18 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
/* Avoid WT_CURSOR.update data copy. */
__wt_upd_value_assign(cbt->modify_update, upd);
} else {
+ /*
+ * We only update history store records in two cases:
+ * 1) Delete the record with a tombstone with WT_TS_NONE.
+ * 2) Update the record's stop time point if the prepared update written to the data
+ * store is committed.
+ */
+ WT_ASSERT(session,
+ !WT_IS_HS(S2BT(session)) ||
+ (upd_arg->type == WT_UPDATE_TOMBSTONE && upd_arg->start_ts == WT_TS_NONE &&
+ upd_arg->next == NULL) ||
+ (upd_arg->type == WT_UPDATE_TOMBSTONE && upd_arg->next != NULL &&
+ upd_arg->next->type == WT_UPDATE_STANDARD && upd_arg->next->next == NULL));
upd_size = __wt_update_list_memsize(upd);
/* If there are existing updates, append them after the new updates. */
@@ -176,8 +188,18 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
/* Avoid WT_CURSOR.update data copy. */
__wt_upd_value_assign(cbt->modify_update, upd);
- } else
+ } else {
+ /*
+ * We either insert a tombstone with a standard update or only a standard update to the
+ * history store if we write a prepared update to the data store.
+ */
+ WT_ASSERT(session,
+ !WT_IS_HS(S2BT(session)) ||
+ (upd_arg->type == WT_UPDATE_TOMBSTONE && upd_arg->next != NULL &&
+ upd_arg->next->type == WT_UPDATE_STANDARD && upd_arg->next->next == NULL) ||
+ (upd_arg->type == WT_UPDATE_STANDARD && upd_arg->next == NULL));
upd_size = __wt_update_list_memsize(upd);
+ }
ins->upd = upd;
ins_size += upd_size;
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 69a33d7ac6a..7ec7b27170c 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -251,7 +251,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
{"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
{"exclusive", "boolean", NULL, NULL, NULL, 0}, {"extractor", "string", NULL, NULL, NULL, 0},
{"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
- {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"huffman_value", "string", NULL, NULL, NULL, 0},
{"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
{"immutable", "boolean", NULL, NULL, NULL, 0},
{"import", "category", NULL, NULL, confchk_WT_SESSION_create_import_subconfigs, 3},
@@ -298,8 +298,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_log_flush[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor_incremental_subconfigs[] = {
- {"enabled", "boolean", NULL, NULL, NULL, 0}, {"file", "string", NULL, NULL, NULL, 0},
- {"force_stop", "boolean", NULL, NULL, NULL, 0},
+ {"consolidate", "boolean", NULL, NULL, NULL, 0}, {"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"file", "string", NULL, NULL, NULL, 0}, {"force_stop", "boolean", NULL, NULL, NULL, 0},
{"granularity", "int", NULL, "min=4KB,max=2GB", NULL, 0},
{"src_id", "string", NULL, NULL, NULL, 0}, {"this_id", "string", NULL, NULL, NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -309,7 +309,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{"checkpoint", "string", NULL, NULL, NULL, 0},
{"checkpoint_wait", "boolean", NULL, NULL, NULL, 0},
{"dump", "string", NULL, "choices=[\"hex\",\"json\",\"pretty\",\"print\"]", NULL, 0},
- {"incremental", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_incremental_subconfigs, 6},
+ {"incremental", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_incremental_subconfigs, 7},
{"next_random", "boolean", NULL, NULL, NULL, 0},
{"next_random_sample_size", "string", NULL, NULL, NULL, 0},
{"overwrite", "boolean", NULL, NULL, NULL, 0}, {"raw", "boolean", NULL, NULL, NULL, 0},
@@ -380,7 +380,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
{"dictionary", "int", NULL, "min=0", NULL, 0},
{"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
{"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
- {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"huffman_value", "string", NULL, NULL, NULL, 0},
{"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
{"internal_item_max", "int", NULL, "min=0", NULL, 0},
{"internal_key_max", "int", NULL, "min=0", NULL, 0},
@@ -419,8 +419,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{"dictionary", "int", NULL, "min=0", NULL, 0},
{"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
{"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
- {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
- {"id", "string", NULL, NULL, NULL, 0},
+ {"huffman_value", "string", NULL, NULL, NULL, 0}, {"id", "string", NULL, NULL, NULL, 0},
{"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
{"internal_item_max", "int", NULL, "min=0", NULL, 0},
{"internal_key_max", "int", NULL, "min=0", NULL, 0},
@@ -466,7 +465,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
{"columns", "list", NULL, NULL, NULL, 0}, {"dictionary", "int", NULL, "min=0", NULL, 0},
{"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
{"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
- {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"huffman_value", "string", NULL, NULL, NULL, 0},
{"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
{"internal_item_max", "int", NULL, "min=0", NULL, 0},
{"internal_key_max", "int", NULL, "min=0", NULL, 0},
@@ -897,7 +896,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"read_timestamp=none),block_allocation=best,block_compressor=,"
"cache_resident=false,checksum=uncompressed,colgroups=,collator=,"
"columns=,dictionary=0,encryption=(keyid=,name=),exclusive=false,"
- "extractor=,format=btree,huffman_key=,huffman_value=,"
+ "extractor=,format=btree,huffman_value=,"
"ignore_in_memory_cache_size=false,immutable=false,"
"import=(enabled=false,file_metadata=,repair=false),"
"internal_item_max=0,internal_key_max=0,"
@@ -912,7 +911,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"prefix_compression=false,prefix_compression_min=4,source=,"
"split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
"type=file,value_format=u",
- confchk_WT_SESSION_create, 45},
+ confchk_WT_SESSION_create, 44},
{"WT_SESSION.drop",
"checkpoint_wait=true,force=false,lock_wait=true,"
"remove_files=true",
@@ -926,11 +925,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
{"WT_SESSION.log_printf", "", NULL, 0},
{"WT_SESSION.open_cursor",
"append=false,bulk=false,checkpoint=,checkpoint_wait=true,dump=,"
- "incremental=(enabled=false,file=,force_stop=false,"
- "granularity=16MB,src_id=,this_id=),next_random=false,"
- "next_random_sample_size=0,overwrite=true,raw=false,"
- "read_once=false,readonly=false,skip_sort_check=false,statistics="
- ",target=",
+ "incremental=(consolidate=false,enabled=false,file=,"
+ "force_stop=false,granularity=16MB,src_id=,this_id=),"
+ "next_random=false,next_random_sample_size=0,overwrite=true,"
+ "raw=false,read_once=false,readonly=false,skip_sort_check=false,"
+ "statistics=,target=",
confchk_WT_SESSION_open_cursor, 15},
{"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction,
1},
@@ -961,8 +960,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"assert=(commit_timestamp=none,durable_timestamp=none,"
"read_timestamp=none),block_allocation=best,block_compressor=,"
"cache_resident=false,checksum=uncompressed,collator=,columns=,"
- "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
- ",huffman_value=,ignore_in_memory_cache_size=false,"
+ "dictionary=0,encryption=(keyid=,name=),format=btree,"
+ "huffman_value=,ignore_in_memory_cache_size=false,"
"internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
@@ -971,15 +970,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"prefix_compression=false,prefix_compression_min=4,"
"split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
"value_format=u",
- confchk_file_config, 37},
+ confchk_file_config, 36},
{"file.meta",
"access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
"assert=(commit_timestamp=none,durable_timestamp=none,"
"read_timestamp=none),block_allocation=best,block_compressor=,"
"cache_resident=false,checkpoint=,checkpoint_backup_info=,"
"checkpoint_lsn=,checksum=uncompressed,collator=,columns=,"
- "dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
- ",huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "dictionary=0,encryption=(keyid=,name=),format=btree,"
+ "huffman_value=,id=,ignore_in_memory_cache_size=false,"
"internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
@@ -988,7 +987,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"prefix_compression=false,prefix_compression_min=4,"
"split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
"value_format=u,version=(major=0,minor=0)",
- confchk_file_meta, 42},
+ confchk_file_meta, 41},
{"index.meta",
"app_metadata=,collator=,columns=,extractor=,immutable=false,"
"index_key_columns=,key_format=u,source=,type=file,value_format=u",
@@ -999,7 +998,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"read_timestamp=none),block_allocation=best,block_compressor=,"
"cache_resident=false,checksum=uncompressed,chunks=,collator=,"
"columns=,dictionary=0,encryption=(keyid=,name=),format=btree,"
- "huffman_key=,huffman_value=,ignore_in_memory_cache_size=false,"
+ "huffman_value=,ignore_in_memory_cache_size=false,"
"internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
@@ -1013,7 +1012,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"prefix_compression=false,prefix_compression_min=4,"
"split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
"value_format=u",
- confchk_lsm_meta, 41},
+ confchk_lsm_meta, 40},
{"table.meta",
"app_metadata=,colgroups=,collator=,columns=,key_format=u,"
"value_format=u",
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index bc2ad006ab6..9b5e2394b61 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2762,7 +2762,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
*/
if (verify_meta) {
WT_ERR(__wt_open_internal_session(conn, "verify hs", false, 0, &verify_session));
- ret = __wt_history_store_verify(verify_session);
+ ret = __wt_hs_verify(verify_session);
WT_TRET(__wt_session_close_internal(verify_session));
WT_ERR(ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 77b0cf94268..a32b5ddf974 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -195,6 +195,7 @@ __curbackup_close(WT_CURSOR *cursor)
WT_CURSOR_BACKUP *cb;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ const char *cfg[3] = {NULL, NULL, NULL};
cb = (WT_CURSOR_BACKUP *)cursor;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
@@ -204,6 +205,14 @@ err:
__wt_verbose(
session, WT_VERB_BACKUP, "%s", "Releasing resources from forced stop incremental");
__wt_backup_destroy(session);
+ /*
+ * We need to force a checkpoint to the metadata to make the force stop durable. Without it,
+ * the backup information could reappear if we crash and restart.
+ */
+ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_checkpoint);
+ cfg[1] = "force=true";
+ WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
+ WT_WITH_METADATA_LOCK(session, ret = __wt_checkpoint(session, cfg)));
}
/*
@@ -274,6 +283,15 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other,
if (othercb != NULL)
WT_CURSOR_BACKUP_CHECK_STOP(othercb);
+ /* Special backup cursor to query incremental IDs. */
+ if (strcmp(uri, "backup:query_id") == 0) {
+ /* Top level cursor code does not allow a URI and cursor. We don't need to check here. */
+ WT_ASSERT(session, othercb == NULL);
+ if (!F_ISSET(S2C(session), WT_CONN_INCR_BACKUP))
+ WT_RET_MSG(session, EINVAL, "Incremental backup is not configured");
+ F_SET(cb, WT_CURBACKUP_QUERYID);
+ }
+
/*
* Start the backup and fill in the cursor's list. Acquire the schema lock, we need a consistent
* view when creating a copy.
@@ -431,7 +449,7 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
WT_DECL_ITEM(tmp);
WT_DECL_RET;
const char *uri;
- bool incremental_config, is_dup, log_config, target_list;
+ bool consolidate, incremental_config, is_dup, log_config, target_list;
*foundp = *incr_only = *log_only = false;
@@ -457,6 +475,19 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
}
/*
+ * Consolidation can be on a per incremental basis or a per-file duplicate cursor basis.
+ */
+ WT_RET(__wt_config_gets(session, cfg, "incremental.consolidate", &cval));
+ consolidate = F_MASK(cb, WT_CURBACKUP_CONSOLIDATE);
+ if (cval.val) {
+ if (is_dup)
+ WT_RET_MSG(session, EINVAL,
+ "Incremental consolidation can only be specified on a primary backup cursor");
+ F_SET(cb, WT_CURBACKUP_CONSOLIDATE);
+ incremental_config = true;
+ }
+
+ /*
* Specifying an incremental file means we're opening a duplicate backup cursor.
*/
WT_RET(__wt_config_gets(session, cfg, "incremental.file", &cval));
@@ -575,13 +606,36 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
F_SET(cb, WT_CURBACKUP_INCR);
}
err:
- if (ret != 0 && cb->incr_src != NULL)
+ if (ret != 0 && cb->incr_src != NULL) {
F_CLR(cb->incr_src, WT_BLKINCR_INUSE);
+ F_CLR(cb, WT_CURBACKUP_CONSOLIDATE);
+ F_SET(cb, consolidate);
+ }
__wt_scr_free(session, &tmp);
return (ret);
}
/*
+ * __backup_query_setup --
+ * Setup the names to return with a backup query cursor.
+ */
+static int
+__backup_query_setup(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
+{
+ WT_BLKINCR *blkincr;
+ u_int i;
+
+ for (i = 0; i < WT_BLKINCR_MAX; ++i) {
+ blkincr = &S2C(session)->incr_backups[i];
+ /* If it isn't valid, skip it. */
+ if (!F_ISSET(blkincr, WT_BLKINCR_VALID))
+ continue;
+ WT_RET(__backup_list_append(session, cb, blkincr->id_str));
+ }
+ return (0);
+}
+
+/*
* __backup_start --
* Start a backup.
*/
@@ -622,7 +676,7 @@ __backup_start(
* set a flag and we're done. Actions will be performed on cursor close.
*/
WT_RET_NOTFOUND_OK(__wt_config_gets(session, cfg, "incremental.force_stop", &cval));
- if (cval.val) {
+ if (!F_ISSET(cb, WT_CURBACKUP_QUERYID) && cval.val) {
/*
* If we're force stopping incremental backup, set the flag. The resources involved in
* incremental backup will be released on cursor close and that is the only expected usage
@@ -652,7 +706,16 @@ __backup_start(
/* We're the lock holder, we own cleanup. */
F_SET(cb, WT_CURBACKUP_LOCKER);
-
+ /*
+ * If we are a query backup cursor there are no configuration settings and it will set up
+ * its own list of strings to return. We don't have to do any of the other processing. A
+ * query creates a list to return but does not create the backup file. After appending the
+ * list of IDs we are done.
+ */
+ if (F_ISSET(cb, WT_CURBACKUP_QUERYID)) {
+ ret = __backup_query_setup(session, cb);
+ goto query_done;
+ }
/*
* Create a temporary backup file. This must be opened before generating the list of targets
* in backup_config. This file will later be renamed to the correct name depending on
@@ -719,6 +782,7 @@ __backup_start(
WT_ERR(__backup_list_append(session, cb, WT_WIREDTIGER));
}
+query_done:
err:
/* Close the hot backup file. */
if (srcfs != NULL)
@@ -726,7 +790,8 @@ err:
/*
* Sync and rename the temp file into place.
*/
- if (ret == 0)
+ WT_TRET(__wt_fs_exist(session, WT_BACKUP_TMP, &exist));
+ if (ret == 0 && exist)
ret = __wt_sync_and_rename(session, &cb->bfs, WT_BACKUP_TMP, dest);
if (ret == 0) {
WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = cb->list);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
index c71676b2082..63ee8c0eb2c 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
@@ -111,8 +111,10 @@ __curbackup_incr_next(WT_CURSOR *cursor)
WT_DECL_RET;
WT_SESSION_IMPL *session;
wt_off_t size;
+ uint64_t start_bitoff, total_len;
uint32_t raw;
const char *file;
+ bool found;
cb = (WT_CURSOR_BACKUP *)cursor;
btree = cb->incr_cursor == NULL ? NULL : CUR2BT(cb->incr_cursor);
@@ -144,18 +146,7 @@ __curbackup_incr_next(WT_CURSOR *cursor)
F_SET(cb, WT_CURBACKUP_INCR_INIT);
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
} else {
- if (F_ISSET(cb, WT_CURBACKUP_INCR_INIT)) {
- /* Look for the next chunk that had modifications. */
- while (cb->bit_offset < cb->nbits)
- if (__bit_test(cb->bitstring.mem, cb->bit_offset))
- break;
- else
- ++cb->bit_offset;
-
- /* We either have this object's incremental information or we're done. */
- if (cb->bit_offset >= cb->nbits)
- WT_ERR(WT_NOTFOUND);
- } else {
+ if (!F_ISSET(cb, WT_CURBACKUP_INCR_INIT)) {
/*
* We don't have this object's incremental information, and it's not a full file copy.
* Get a list of the block modifications for the file. The block modifications are from
@@ -186,8 +177,37 @@ __curbackup_incr_next(WT_CURSOR *cursor)
WT_ERR(WT_NOTFOUND);
}
}
- __wt_cursor_set_key(cursor, cb->offset + cb->granularity * cb->bit_offset++,
- cb->granularity, WT_BACKUP_RANGE);
+ /* We have initialized incremental information. */
+ start_bitoff = cb->bit_offset;
+ total_len = cb->granularity;
+ found = false;
+ /* The bit offset can be less than or equal to but never greater than the number of bits. */
+ WT_ASSERT(session, cb->bit_offset <= cb->nbits);
+ /* Look for the next chunk that had modifications. */
+ while (cb->bit_offset < cb->nbits)
+ if (__bit_test(cb->bitstring.mem, cb->bit_offset)) {
+ found = true;
+ /*
+ * Care must be taken to leave the bit_offset field set to the next offset bit so
+ * that the next call is set to the correct offset.
+ */
+ start_bitoff = cb->bit_offset++;
+ if (F_ISSET(cb, WT_CURBACKUP_CONSOLIDATE)) {
+ while (
+ cb->bit_offset < cb->nbits && __bit_test(cb->bitstring.mem, cb->bit_offset++))
+ total_len += cb->granularity;
+ }
+ break;
+ } else
+ ++cb->bit_offset;
+
+ /* We either have this object's incremental information or we're done. */
+ if (!found)
+ WT_ERR(WT_NOTFOUND);
+ WT_ASSERT(session, cb->granularity != 0);
+ WT_ASSERT(session, total_len != 0);
+ __wt_cursor_set_key(
+ cursor, cb->offset + cb->granularity * start_bitoff, total_len, WT_BACKUP_RANGE);
}
done:
@@ -249,6 +269,11 @@ __wt_curbackup_open_incr(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *o
cb->incr_file, other_cb->incr_src->id_str);
F_SET(cb, WT_CURBACKUP_FORCE_FULL);
}
+ if (F_ISSET(other_cb, WT_CURBACKUP_CONSOLIDATE))
+ F_SET(cb, WT_CURBACKUP_CONSOLIDATE);
+ else
+ F_CLR(cb, WT_CURBACKUP_CONSOLIDATE);
+
/*
* Set up the incremental backup information, if we are not forcing a full file copy. We need an
* open cursor on the file. Open the backup checkpoint, confirming it exists.
diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c
index 581caa37413..6bb3b3e1e73 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_ds.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c
@@ -9,34 +9,6 @@
#include "wt_internal.h"
/*
- * __curds_txn_enter --
- * Do transactional initialization when starting an operation.
- */
-static int
-__curds_txn_enter(WT_SESSION_IMPL *session, bool update)
-{
- /* Check if we need to start an autocommit transaction. */
- if (update)
- WT_RET(__wt_txn_autocommit_check(session));
-
- session->ncursors++; /* XXX */
- __wt_txn_cursor_op(session);
-
- return (0);
-}
-
-/*
- * __curds_txn_leave --
- * Do transactional cleanup when ending an operation.
- */
-static void
-__curds_txn_leave(WT_SESSION_IMPL *session)
-{
- if (--session->ncursors == 0) /* XXX */
- __wt_txn_read_last(session);
-}
-
-/*
* __curds_key_set --
* Set the key for the data-source.
*/
@@ -183,14 +155,10 @@ __curds_next(WT_CURSOR *cursor)
WT_STAT_CONN_INCR(session, cursor_next);
WT_STAT_DATA_INCR(session, cursor_next);
- WT_ERR(__curds_txn_enter(session, false));
-
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
ret = __curds_cursor_resolve(cursor, source->next(source));
err:
- __curds_txn_leave(session);
-
API_END_RET(session, ret);
}
@@ -212,13 +180,10 @@ __curds_prev(WT_CURSOR *cursor)
WT_STAT_CONN_INCR(session, cursor_prev);
WT_STAT_DATA_INCR(session, cursor_prev);
- WT_ERR(__curds_txn_enter(session, false));
-
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
ret = __curds_cursor_resolve(cursor, source->prev(source));
err:
- __curds_txn_leave(session);
API_END_RET(session, ret);
}
@@ -266,14 +231,10 @@ __curds_search(WT_CURSOR *cursor)
WT_STAT_CONN_INCR(session, cursor_search);
WT_STAT_DATA_INCR(session, cursor_search);
- WT_ERR(__curds_txn_enter(session, false));
-
WT_ERR(__curds_key_set(cursor));
ret = __curds_cursor_resolve(cursor, source->search(source));
err:
- __curds_txn_leave(session);
-
API_END_RET(session, ret);
}
@@ -295,14 +256,10 @@ __curds_search_near(WT_CURSOR *cursor, int *exact)
WT_STAT_CONN_INCR(session, cursor_search_near);
WT_STAT_DATA_INCR(session, cursor_search_near);
- WT_ERR(__curds_txn_enter(session, false));
-
WT_ERR(__curds_key_set(cursor));
ret = __curds_cursor_resolve(cursor, source->search_near(source, exact));
err:
- __curds_txn_leave(session);
-
API_END_RET(session, ret);
}
@@ -321,8 +278,6 @@ __curds_insert(WT_CURSOR *cursor)
CURSOR_UPDATE_API_CALL(cursor, session, insert);
- WT_ERR(__curds_txn_enter(session, true));
-
WT_STAT_CONN_INCR(session, cursor_insert);
WT_STAT_DATA_INCR(session, cursor_insert);
WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size);
@@ -333,8 +288,6 @@ __curds_insert(WT_CURSOR *cursor)
ret = __curds_cursor_resolve(cursor, source->insert(source));
err:
- __curds_txn_leave(session);
-
CURSOR_UPDATE_API_END(session, ret);
return (ret);
}
@@ -359,15 +312,11 @@ __curds_update(WT_CURSOR *cursor)
WT_STAT_CONN_INCRV(session, cursor_update_bytes, cursor->value.size);
WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size);
- WT_ERR(__curds_txn_enter(session, true));
-
WT_ERR(__curds_key_set(cursor));
WT_ERR(__curds_value_set(cursor));
ret = __curds_cursor_resolve(cursor, source->update(source));
err:
- __curds_txn_leave(session);
-
CURSOR_UPDATE_API_END(session, ret);
return (ret);
}
@@ -392,14 +341,10 @@ __curds_remove(WT_CURSOR *cursor)
WT_STAT_CONN_INCRV(session, cursor_remove_bytes, cursor->key.size);
WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
- WT_ERR(__curds_txn_enter(session, true));
-
WT_ERR(__curds_key_set(cursor));
ret = __curds_cursor_resolve(cursor, source->remove(source));
err:
- __curds_txn_leave(session);
-
CURSOR_UPDATE_API_END(session, ret);
return (ret);
}
@@ -422,14 +367,10 @@ __curds_reserve(WT_CURSOR *cursor)
WT_STAT_CONN_INCR(session, cursor_reserve);
WT_STAT_DATA_INCR(session, cursor_reserve);
- WT_ERR(__curds_txn_enter(session, true));
-
WT_ERR(__curds_key_set(cursor));
ret = __curds_cursor_resolve(cursor, source->reserve(source));
err:
- __curds_txn_leave(session);
-
CURSOR_UPDATE_API_END(session, ret);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c
index ba2799e2127..500b9208b98 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_hs.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c
@@ -141,3 +141,253 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa
session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp));
return (ret);
}
+
+/*
+ * __curhs_close --
+ * WT_CURSOR->close method for the hs cursor type.
+ */
+static int
+__curhs_close(WT_CURSOR *cursor)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(
+ cursor, session, close, file_cursor == NULL ? NULL : CUR2BT(file_cursor));
+err:
+ if (file_cursor != NULL)
+ WT_TRET(file_cursor->close(file_cursor));
+ __wt_cursor_close(cursor);
+
+ API_END_RET(session, ret);
+}
+
+/*
+ * __curhs_reset --
+ * Reset a history store cursor.
+ */
+static int
+__curhs_reset(WT_CURSOR *cursor)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, CUR2BT(file_cursor));
+
+ ret = file_cursor->reset(file_cursor);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
+
+err:
+ API_END_RET(session, ret);
+}
+
+/*
+ * __curhs_set_key --
+ * WT_CURSOR->set_key method for the hs cursor type.
+ */
+static void
+__curhs_set_key(WT_CURSOR *cursor, ...)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ va_list ap;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+
+ va_start(ap, cursor);
+ file_cursor->set_key(file_cursor, va_arg(ap, uint32_t), va_arg(ap, WT_ITEM *),
+ va_arg(ap, wt_timestamp_t), va_arg(ap, uint64_t));
+ va_end(ap);
+}
+
+/*
+ * __curhs_get_key --
+ * WT_CURSOR->get_key method for the hs cursor type.
+ */
+static int
+__curhs_get_key(WT_CURSOR *cursor, ...)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+ va_list ap;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+
+ va_start(ap, cursor);
+ ret = file_cursor->get_key(file_cursor, va_arg(ap, uint32_t *), va_arg(ap, WT_ITEM **),
+ va_arg(ap, wt_timestamp_t *), va_arg(ap, uint64_t *));
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __curhs_get_value --
+ * WT_CURSOR->get_value method for the hs cursor type.
+ */
+static int
+__curhs_get_value(WT_CURSOR *cursor, ...)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+ va_list ap;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+
+ va_start(ap, cursor);
+ ret = file_cursor->get_value(file_cursor, va_arg(ap, wt_timestamp_t *),
+ va_arg(ap, wt_timestamp_t *), va_arg(ap, uint64_t *), va_arg(ap, WT_ITEM **));
+ va_end(ap);
+
+ return (ret);
+}
+
+/*
+ * __curhs_set_value --
+ * WT_CURSOR->set_value method for the hs cursor type.
+ */
+static void
+__curhs_set_value(WT_CURSOR *cursor, ...)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_HS *hs_cursor;
+ va_list ap;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+ va_start(ap, cursor);
+ hs_cursor->time_window = *va_arg(ap, WT_TIME_WINDOW *);
+
+ file_cursor->set_value(file_cursor, va_arg(ap, wt_timestamp_t), va_arg(ap, wt_timestamp_t),
+ va_arg(ap, uint64_t), va_arg(ap, WT_ITEM *));
+ va_end(ap);
+}
+
+/*
+ * __curhs_insert --
+ * WT_CURSOR->insert method for the hs cursor type.
+ */
+static int
+__curhs_insert(WT_CURSOR *cursor)
+{
+ WT_CURSOR *file_cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_UPDATE *hs_tombstone, *hs_upd;
+
+ hs_cursor = (WT_CURSOR_HS *)cursor;
+ file_cursor = hs_cursor->file_cursor;
+ cbt = (WT_CURSOR_BTREE *)file_cursor;
+ hs_tombstone = hs_upd = NULL;
+
+ CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, insert, CUR2BT(file_cursor));
+
+ /* Allocate a tombstone only when there is a valid stop time point. */
+ if (WT_TIME_WINDOW_HAS_STOP(&hs_cursor->time_window)) {
+ /*
+ * Insert a delete record to represent stop time point for the actual record to be inserted.
+ * Set the stop time point as the commit time point of the history store delete record.
+ */
+ WT_ERR(__wt_upd_alloc_tombstone(session, &hs_tombstone, NULL));
+ hs_tombstone->start_ts = hs_cursor->time_window.stop_ts;
+ hs_tombstone->durable_ts = hs_cursor->time_window.durable_stop_ts;
+ hs_tombstone->txnid = hs_cursor->time_window.stop_txn;
+ }
+
+ /*
+ * Append to the delete record, the actual record to be inserted into the history store. Set the
+ * current update start time point as the commit time point to the history store record.
+ */
+ WT_ERR(__wt_upd_alloc(session, &file_cursor->value, WT_UPDATE_STANDARD, &hs_upd, NULL));
+ hs_upd->start_ts = hs_cursor->time_window.start_ts;
+ hs_upd->durable_ts = hs_cursor->time_window.durable_start_ts;
+ hs_upd->txnid = hs_cursor->time_window.start_txn;
+
+ /* Insert the standard update as next update if there is a tombstone. */
+ if (hs_tombstone != NULL) {
+ hs_tombstone->next = hs_upd;
+ hs_upd = hs_tombstone;
+ hs_tombstone = NULL;
+ }
+
+ /* Search the page and insert the updates. */
+ WT_WITH_PAGE_INDEX(session, ret = __wt_hs_row_search(cbt, &file_cursor->key, true));
+ WT_ERR(ret);
+ WT_ERR(__wt_hs_modify(cbt, hs_upd));
+
+ if (0) {
+err:
+ __wt_free(session, hs_tombstone);
+ __wt_free(session, hs_upd);
+ }
+ API_END_RET(session, ret);
+}
+
+/*
+ * __wt_curhs_open --
+ * Initialize a history store cursor.
+ */
+int
+__wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
+{
+ WT_CURSOR_STATIC_INIT(iface, __curhs_get_key, /* get-key */
+ __curhs_get_value, /* get-value */
+ __curhs_set_key, /* set-key */
+ __curhs_set_value, /* set-value */
+ __wt_cursor_compare_notsup, /* compare */
+ __wt_cursor_equals_notsup, /* equals */
+ __wt_cursor_notsup, /* next */
+ __wt_cursor_notsup, /* prev */
+ __curhs_reset, /* reset */
+ __wt_cursor_notsup, /* search */
+ __wt_cursor_search_near_notsup, /* search-near */
+ __curhs_insert, /* insert */
+ __wt_cursor_modify_value_format_notsup, /* modify */
+ __wt_cursor_notsup, /* update */
+ __wt_cursor_notsup, /* remove */
+ __wt_cursor_notsup, /* reserve */
+ __wt_cursor_reconfigure_notsup, /* reconfigure */
+ __wt_cursor_notsup, /* cache */
+ __wt_cursor_reopen_notsup, /* reopen */
+ __curhs_close); /* close */
+ WT_CURSOR *cursor;
+ WT_CURSOR_HS *hs_cursor;
+ WT_DECL_RET;
+
+ WT_RET(__wt_calloc_one(session, &hs_cursor));
+ cursor = (WT_CURSOR *)hs_cursor;
+ *cursor = iface;
+ cursor->session = (WT_SESSION *)session;
+ cursor->key_format = WT_HS_KEY_FORMAT;
+ cursor->value_format = WT_HS_VALUE_FORMAT;
+
+ /* Open the file cursor for operations on the regular history store .*/
+ WT_ERR(__hs_cursor_open_int(session, &hs_cursor->file_cursor));
+
+ WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp));
+
+ WT_TIME_WINDOW_INIT(&hs_cursor->time_window);
+
+ if (0) {
+err:
+ WT_TRET(__curhs_close(cursor));
+ *cursorp = NULL;
+ }
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
index 89e5ecba4d3..ef24017b02d 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_json.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -315,7 +315,8 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
u_char abbrev;
if (!force_unicode) {
- if (__wt_isprint(ch) && ch != '\\' && ch != '"') {
+ /* We treat all non-ASCII characters as non-printable. */
+ if (__wt_isascii(ch) && __wt_isprint(ch) && ch != '\\' && ch != '"') {
if (bufsz >= 1)
*buf = ch;
return (1);
diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile
index 378b1fd5f50..7a855b2f94d 100644
--- a/src/third_party/wiredtiger/src/docs/Doxyfile
+++ b/src/third_party/wiredtiger/src/docs/Doxyfile
@@ -206,6 +206,8 @@ TAB_SIZE = 8
# You can put \n's in the value part of an alias to insert newlines.
ALIASES = \
+ arch_page_table{2}="<div class="arch_head"><table><tr><th rowspan=2 style=\"width:10%;\">@htmlonly <div><a href=\"arch-index.html\"><img class=\"arch_thumbnail\" src=\"wt_diagram.png\" style=\"background-image: url(wt_diagram.png)\"></a></div>@endhtmlonly</th><th style=\"width:44%\">Data Structures</th><th style=\"width:45%\">Source Location</th></tr><tr><td><code>\1</code></td><td><code>\2</code></td></tr></table></div>" \
+ arch_page_top{2}="@page \1 \2 (Architecture Guide)" \
config{3}=" @row{<tt>\1</tt>,\2,\3}" \
configempty{2}="@param config configuration string, see @ref config_strings. No values currently permitted." \
configend=" </table>" \
@@ -794,7 +796,8 @@ EXCLUDE_SYMBOLS = __F \
EXAMPLE_PATH = ../../examples/c \
../../ext/compressors/nop \
../../ext/encryptors/nop \
- ../../ext/encryptors/rotn
+ ../../ext/encryptors/rotn \
+ ../../examples/python
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
diff --git a/src/third_party/wiredtiger/src/docs/arch-block.dox b/src/third_party/wiredtiger/src/docs/arch-block.dox
new file mode 100644
index 00000000000..7c8fdf5d72b
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-block.dox
@@ -0,0 +1,9 @@
+/*! @arch_page arch-block Block Manager
+
+The Block Manager manages the reading and writing of disk blocks
+in WiredTiger. It does compression and encryption when these
+are configured.
+
+The state of the block manager is represented by the \c WT_BM structure.
+Individual blocks being tracked are in \c WT_BLOCK structures.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-cache.dox b/src/third_party/wiredtiger/src/docs/arch-cache.dox
new file mode 100644
index 00000000000..94888260cfe
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-cache.dox
@@ -0,0 +1,13 @@
+/*! @arch_page arch-cache Cache
+
+Cache in WiredTiger is represented by the various shared data structures
+that make up in-memory Btrees and subordinate data structures.
+
+Memory used to read in and write out the on-disk representations of Btrees
+is not cached, it only exists temporarily during the I/O operation and
+while the data is transferred to or from the on-disk format.
+
+Internally, the current cache state is represented by the WT_CACHE structure,
+which contains various counters that drive statistics and information
+used for eviction.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-column.dox b/src/third_party/wiredtiger/src/docs/arch-column.dox
new file mode 100644
index 00000000000..4b3b399349a
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-column.dox
@@ -0,0 +1,23 @@
+/*! @arch_page arch-column Column Store
+
+Column Stores are Btrees stored in WiredTiger that have as their
+key a record id, that is, a 64 bit unsigned integer. Thus, they implement
+a specialized version of a Btree, where the key is a predictable length.
+
+A particular kind of column store is the fixed length column store.
+As its name implies, the value is fixed length, and furthermore the
+value is restricted to 1 and 8 bits in length. The bit length is specified
+when the column store is created. The fixed length column store
+has specialized use cases like bitmaps.
+
+The more general case is the variable length column store which allows
+for values that have any length, and may have arbitrary types, including
+aggregates of various types.
+
+Internally, row stores and both kinds of column stores all use a common
+\c WT_BTREE structure. Column stores are distinguished in that
+<code>WT_BTREE->type == BTREE_COL_VAR</code> for variable length column stores
+and <code>WT_BTREE->type == BTREE_COL_FIX</code> for fixed length column stores.
+Internal functions that navigate, access and manipulate Btrees have
+code sprinkled throughout that is conditional on <code>WT_BTREE->type</code>.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-cursor.dox b/src/third_party/wiredtiger/src/docs/arch-cursor.dox
new file mode 100644
index 00000000000..60e47c5a8ad
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-cursor.dox
@@ -0,0 +1,15 @@
+/*! @arch_page arch-cursor Cursor
+
+Cursors are used in WiredTiger to get and modify data.
+A caller of WiredTiger uses WT_SESSION::open_cursor to create
+a WT_CURSOR. Methods on the WT_CURSOR can then be used to
+position, iterate, get, and set data.
+
+Depending on the <code>uri</code> used when creating a cursor, the cursor will
+be internally implemented as one of the many cursor structures that include
+WT_CURSOR_BTREE, WT_CURSOR_BACKUP, WT_CURSOR_INDEX, WT_CURSOR_LOG,
+WT_CURSOR_METADATA, WT_CURSOR_STAT. Each of these structures starts
+with the common \c %WT_CURSOR structure, which contain all
+of the data and method pointers that make up the public part of the API.
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-data-file.dox b/src/third_party/wiredtiger/src/docs/arch-data-file.dox
new file mode 100644
index 00000000000..834329c981f
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-data-file.dox
@@ -0,0 +1,17 @@
+/*! @arch_page arch-data-file Data File Format
+
+The format of the WiredTiger data file is given by structures
+in \c block.h , defining the overall structure of the file and
+its blocks. The \c WT_BLOCK_DESC starts the file. Following that,
+individual pages appear, each with a \c WT_PAGE_HEADER defined in \c btmem.h
+and a \c WT_BLOCK_HEADER defined in \c block.h . Individual cells then
+sequentially appear as defined in \c cell.h . Each cell encodes
+a key or value. Ordering is important, values are all associated with
+the key that precedes them. Multiple values may be present, which
+can represent multiple versions. Extremely large values may be represented
+as a reference to another page.
+
+The exact encoding is rather complex, and beyond what can be described here.
+The encoding strikes a balance between data that can be compacted efficiently
+in time and space, extensibility, and compatibility with previous versions.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-dhandle.dox b/src/third_party/wiredtiger/src/docs/arch-dhandle.dox
new file mode 100644
index 00000000000..118285ec014
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-dhandle.dox
@@ -0,0 +1,17 @@
+/*! @arch_page arch-dhandle Data Handles and Btrees
+
+An internal structure called a Data Handle (dhandle) is used to represent and
+access Btrees and other data sources in WiredTiger. A dhandle is created
+when a table is accessed for the first time. It is kept in a global list
+and is shared across the sessions. When a dhandle is not needed anymore
+and has been idle for a while, it is closed and destroyed, releasing all the
+resources associated with it.
+
+A Btree is one kind of dhandle. It embodies both the on-disk and in-memory
+representations of the Btree.
+
+@subpage arch-dhandle-lifecycle
+
+This section describes how dhandles are created, and how and when they are destroyed.
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-eviction.dox b/src/third_party/wiredtiger/src/docs/arch-eviction.dox
new file mode 100644
index 00000000000..5c877e6322f
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-eviction.dox
@@ -0,0 +1,15 @@
+/*! @arch_page arch-eviction Eviction
+
+Eviction represents the process or removing old data from the cache,
+writing it to disk if it is dirty. The general strategy uses a dedicated
+set of eviction threads that are tasked with identifying candidate data.
+If the data needs to be written, it is reconciled (converting the
+in-memory format to on-disk format), and then written. Clean memory
+can be freed if needed.
+
+Eviction cannot be triggered directly by API calls, but happens as a result
+of enough data being dirtied.
+
+Eviction is managed using \c WT_EVICT_QUEUE structures,
+each of which contains a list of \c WT_EVICT_ENTRY structures.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-fs-os.dox b/src/third_party/wiredtiger/src/docs/arch-fs-os.dox
new file mode 100644
index 00000000000..cedade13248
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-fs-os.dox
@@ -0,0 +1,10 @@
+/*! @arch_page arch-fs-os File System and Operating System Interface
+
+Internally, a layer of abstraction is above all operating system calls,
+allowing main line WiredTiger code to make a call to single set of interfaces.
+There are currently OS specific APIs for POSIX and Windows.
+
+Additionally, a program can use WT_CONNECTION::set_file_system to register
+a set of functions that will be called for each file system operation.
+WiredTiger will then call these functions at the appropriate time.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-hs.dox b/src/third_party/wiredtiger/src/docs/arch-hs.dox
new file mode 100644
index 00000000000..6ed50c5fbf6
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-hs.dox
@@ -0,0 +1,8 @@
+/*! @arch_page arch-hs History Store
+
+The History Store in WiredTiger tracks old (all but the latest committed)
+versions of records. By having these records in storage separate from
+the current version, they can be used to service long running transactions,
+and be evicted as necessary, without interfering with activity that uses
+the most recent committed versions.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-index.dox b/src/third_party/wiredtiger/src/docs/arch-index.dox
index f5430ccb7e7..21af5d1f62f 100644
--- a/src/third_party/wiredtiger/src/docs/arch-index.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-index.dox
@@ -5,6 +5,7 @@ use the API interface to direct WiredTiger's operations (see @ref basic_api).
Here is an overview of the software components in WiredTiger and how they are organized.
An arrow indicates the "from" component uses "to" component.
+<div class="arch_diagram">
@plantuml_start{wt_diagram.png}
@startuml{wt_diagram.png}
@@ -27,46 +28,50 @@ skinparam fileShadowing false
' with most nesting within the "engine" rectangle.
together {
- rectangle "Python API" as python_api
+ rectangle "[[arch-python.html Python API]]" as python_api
' "storage" displays as an oval.
storage " C/C++ \n applications " as application
- rectangle "wt Utility" as utility
+ rectangle "[[command_line.html wt Utility]]" as utility
}
' Trailing spaces for this label puts the text to the left.
rectangle "**WiredTiger Engine** " as wt_engine {
' Leading and trailing spaces make a wide rectangle.
together {
+ ' Putting two invisible file boxes on either side centers the middle box.
file "____" as SPACE_api
- rectangle " C API " as c_api
+ rectangle " [[modules.html C API]] " as c_api
file "____" as SPACE_api2
+ ' Influence the ordering of the invisible boxes using (hidden) arrows.
+ SPACE_api -[hidden]right-> c_api
+ c_api -[hidden]right-> SPACE_api2
}
- rectangle "[[#component-schema Schema]]" as schema
- rectangle "Cursor" as cursor
- rectangle "Transactions" as txn
- rectangle "Metadata" as meta
- rectangle "[[#component-dhandle dhandle/\n Btree]]" as btree
- rectangle " Row\n storage" as row
- rectangle " Column\n storage" as column
- rectangle "History\n Store" as history
- rectangle "Snapshots" as snapshot
- rectangle "Cache" as cache
- rectangle "Eviction" as evict
+ rectangle "[[arch-schema.html Schema]]" as schema
+ rectangle "[[arch-cursor.html Cursor]]" as cursor
+ rectangle "[[arch-transaction.html Transactions]]" as txn
+ rectangle "[[arch-metadata.html Metadata]]" as meta
+ rectangle "[[arch-dhandle.html dhandle/\nBtree]]" as btree
+ rectangle "[[arch-row.html Row\nStorage]]" as row
+ rectangle "[[arch-column.html Column\nStorage]]" as column
+ rectangle "[[arch-hs.html History\nStore]]" as history
+ rectangle "[[arch-snapshot.html Snapshots]]" as snapshot
+ rectangle "[[arch-cache.html Cache]]" as cache
+ rectangle "[[arch-eviction.html Eviction]]" as evict
together {
- rectangle " Block\n Manager" as block
+ rectangle "[[arch-block.html Block\nManager]]" as block
file "__________" as SPACE_log
- rectangle "Logging" as log
+ rectangle "[[arch-logging.html Logging]]" as log
file "___" as SPACE_log2
}
- rectangle " File System & OS \n interface" as os
+ rectangle " [[arch-fs-os.html File System & OS interface]] " as os
}
together {
- database "Database\n Files" as wt_file
- database " Log \n Files" as log_file
+ database "[[arch-data-file.html Database\nFiles]]" as wt_file
+ database " [[arch-log-file.html Log\nFiles]]" as log_file
}
-' Influence the ordering at the top using (hidden) directed labels
+' Influence the ordering at the top using (hidden) arrows.
python_api -[hidden]right-> application
application -[hidden]right-> utility
@@ -77,8 +82,6 @@ utility -down-> c_api
c_api -down-> schema
c_api -down-> cursor
c_api -down-> txn
-SPACE_api -[hidden]right-> c_api
-c_api -[hidden]right-> SPACE_api2
schema -down-> meta
schema -down-> btree
@@ -116,27 +119,86 @@ wt_file -[hidden]right-> log_file
@enduml
@plantuml_end
+</div>
We go into some detail for some of the internal components.
-@subpage arch-glossary
+@subpage arch-block
-WiredTiger assigns specific meanings to certain words. Here we decode them.
+The Block Manager manages the reading and writing of disk blocks.
+
+@subpage arch-cache
+
+Cache is represented by the various shared data structures that
+make up in-memory Btrees and subordinate data structures.
+
+@subpage arch-column
+
+Column Stores are Btrees that have as their key a record id.
+
+@subpage arch-cursor
+
+Cursors are used to get and modify data.
+
+@subpage arch-data-file
+
+The format of the data file is given by structures in \c block.h .
+
+@subpage arch-dhandle
+
+An internal structure called a Data Handle (dhandle) is used to represent and
+access Btrees and other data sources in WiredTiger.
+
+@subpage arch-eviction
+
+Eviction represents the process or removing old data from the cache,
+writing it to disk if it is dirty.
+
+@subpage arch-fs-os
+
+A layer of abstraction is above all operating system calls and
+a set of functions can be registered to be called for each file system
+operation.
+
+@subpage arch-hs
+
+The History Store tracks old versions of records.
+
+@subpage arch-logging
+
+WiredTiger writes all changes into a write-ahead log when configured.
+
+@subpage arch-log-file
+
+The format of a log file is defined in \c log.h .
+
+@subpage arch-metadata
+
+Metadata is stored as <code>uri, config</code> K/V pairs in a designated table.
+
+@subpage arch-python
+
+WiredTiger has a Python API that is useful for scripting and experimentation.
+
+@subpage arch-row
+
+Row Stores are Btrees that have a variable size key and data.
-\anchor component-schema
@subpage arch-schema
-Most applications begin to make use of WiredTiger by creating a table (or other
-data object) to store their data in. Create is one of several schema operations
-available in WiredTiger.
+A schema defines the format of the application data in WiredTiger.
+
+@subpage arch-snapshot
+
+Snapshots are implemented by storing transaction ids committed before
+the transaction started.
-\anchor component-dhandle
-@subpage arch-dhandle-lifecycle
+@subpage arch-transaction
-An internal structure called Data Handle (dhandle) is used to represent and
-access a table in WiredTiger. A dhandle gets created when a table is accessed
-for the first time. It is kept in a global list and is shared across the
-sessions. When a dhandle is not needed anymore and has been idle for a while,
-it is closed and destroyed, releasing all the resources associated with it.
+Transactions provide a powerful abstraction for multiple threads to operate on data concurrently.
+
+@subpage arch-glossary
+
+WiredTiger assigns specific meanings to certain words. Here we decode them.
*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-log-file.dox b/src/third_party/wiredtiger/src/docs/arch-log-file.dox
new file mode 100644
index 00000000000..a684e023d76
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-log-file.dox
@@ -0,0 +1,7 @@
+/*! @arch_page arch-log-file Log File Format
+
+The format of a WiredTiger log file is defined in \c log.h .
+Each file begins with a fixed length header, followed by a set of
+variable length log records. Though the records may have varying
+length, they all begin on boundaries aligned to 128 bytes.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-logging.dox b/src/third_party/wiredtiger/src/docs/arch-logging.dox
new file mode 100644
index 00000000000..41c96f3ad78
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-logging.dox
@@ -0,0 +1,8 @@
+/*! @arch_page arch-logging Logging
+
+WiredTiger writes all changes into a write-ahead log when configured.
+Log files are shared among all writers, so changes made to
+different tables, by different threads, are interleaved.
+
+Internally, the current logging state is represented by the WT_LOG structure.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-metadata.dox b/src/third_party/wiredtiger/src/docs/arch-metadata.dox
new file mode 100644
index 00000000000..d709dc78810
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-metadata.dox
@@ -0,0 +1,19 @@
+/*! @arch_page arch-metadata Metadata
+
+Metadata in WiredTiger is stored as a table in the \c "WiredTiger.wt" file.
+The table's key is a \c uri string, and the value is its complete
+configuration string. The configuration itself is a list of key/value
+pairs in string form. The configuration's keys are dependent on the
+type of \c uri. Thus, a metadata entry with a \c uri key beginning with
+\c "table:" will be a configuration string
+having configuration entries like \c key_format and \c value_format to
+describe the data encoding for the uri. A metadata key beginning with
+\c "file:" will have a different set of configuration entries associated
+with it.
+
+A caller of WiredTiger can use WT_SESSION::open_cursor with a \c uri equal to
+\c "meta:" to read the metadata. Using this interface, metadata can only
+be queried, not changed. Changes to the metadata are affected by API calls
+such as WT_SESSION::create, WT_SESSION::drop, WT_SESSION::rename.
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-python.dox b/src/third_party/wiredtiger/src/docs/arch-python.dox
new file mode 100644
index 00000000000..c70cef8968a
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-python.dox
@@ -0,0 +1,18 @@
+/*! @arch_page arch-python Python API
+
+WiredTiger includes a Python API, which is useful for scripting and
+experimentation.
+
+The following simple example show some highlights of the API:
+
+@snippet ex_access.py python simple example
+
+The API is implemented using SWIG. SWIG imports the \c \<wiredtiger.h\> file,
+and from it creates the classes and C support files needed to allow
+Python to import the WiredTiger library.
+
+Not every facility that is present in the C API is available in Python.
+In particular, setting up custom collators, extractors, compressors,
+encryptors and file system implementations must all be done within
+the C framework.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-row.dox b/src/third_party/wiredtiger/src/docs/arch-row.dox
new file mode 100644
index 00000000000..ae3ffe22589
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-row.dox
@@ -0,0 +1,12 @@
+/*! @arch_page arch-row Row Store
+
+Row Stores are Btrees stored in WiredTiger that do not have a record id
+as key. Thus, they implement a generalized version of a Btree, where
+the key and data can be arbitrary length.
+
+Internally, a row store and column store both use a common \c WT_BTREE
+structure. The fundamental difference is that
+<code>WT_BTREE->type == BTREE_ROW</code> for row stores.
+Internal functions that navigate, access and manipulate Btrees have
+code sprinkled throughout that is conditional on <code>WT_BTREE->type</code>.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox b/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox
new file mode 100644
index 00000000000..8428208c6d9
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-schema-ops.dox
@@ -0,0 +1,206 @@
+/*! @page arch-schema-ops Schema Operations
+
+A @ref arch-schema defines the format of the application data in WiredTiger.
+This section details the internals of various schema operations.
+
+Schema operations cause an update to the metadata and are performed under a
+schema lock to avoid concurrent operations on the same object. The following
+sequence of steps define a generic schema operation:
+
+@plantuml_start{schema_generic.png}
+@startuml{schema_generic.png}
+:A schema operation;
+partition with-schema-lock {
+ :perform operation on underlying data-object;
+ :update metadata-file;
+ :checkpoint and sync metadata;
+}
+stop
+@enduml
+@plantuml_end
+
+@section schema_create Schema Create
+
+The create schema operation is responsible for creating the underlying data
+object on the filesystem with the right parameters and then creating an entry
+for this new object into the metadata. The sequence of operations involved in a
+create for various schema types are as follows:
+
+@plantuml_start{schema_create.png}
+@startuml{schema_create.png}
+:WT_SESSION->create(.,name,.)
+(__session_create());
+
+partition session-API-call {
+ :API session init with NULL dhandle;
+ :exit if PANIC flag set;
+ :exit if invalid configuration;
+}
+
+:validate "name" and if passed "type" config parameter;
+note right
+ "name" parameter is called as "uri" internally.
+ "type" is usually not passed and generally
+ implied from the uri.
+end note
+
+partition with-schema-lock {
+ partition with-table-lock {
+ :turn on meta tracking;
+ :check uri}
+
+ split
+ :uri matches "file:"
+ ("file" is the underlying
+ type for all the objects);
+ split again
+ :uri matches "colgroup:";
+ :__create_colgroup();
+ split again
+ :uri matches "table:";
+ :__create_table();
+ split again
+ :uri matches "lsm:";
+ :__wt_lsm_tree_create();
+ split again
+ :uri matches "index:";
+ :__create_index();
+ split again
+ :matches a named data-source;
+ :__create_data_source();
+ end split
+
+ partition __create_file() {
+ :exit if file exists;
+ :validate allocation size;
+ :block manager creates the file:
+ 1.create file using __wt_open()
+ 2.write an initial descriptor to file
+ 3.fsync and close the file handle;
+ if (metadata-file?) then (yes)
+ else (no)
+ :update metadata with file
+ configuration and version;
+ endif
+ :check if file setup correctly by
+ getting btree handle with
+ WT_DHANDLE_EXCLUSIVE set;
+ if (metatracking on?) then (yes)
+ :track locked handle*;
+ else (no)
+ :release btree -
+ sync and close;
+ endif
+ }
+
+ partition turn-off-meta-tracking {
+ if (errors?) then (yes)
+ :unroll operations;
+ else (no)
+ if (logging?) then (yes)
+ :sync log;
+ else (no)
+ endif
+ :checkpoint and sync;
+ endif
+ :apply post-commit ops:
+ release tracked (handle) btree* -
+ sync and close;
+ note right
+ if meta tracking is on, this btree
+ was being tracked as locked. As part
+ of tuning off meta tracking, we sync
+ and close this btree
+ end note
+ }
+ }
+}
+
+:API-end;
+
+stop
+@enduml
+@plantuml_end
+
+@section schema_rename Schema Rename
+
+The rename schema operation is responsible for renaming the underlying data
+object on the filesystem and updating the metadata accordingly. The sequence of
+operations involved in a rename for various schema types are as follows:
+
+@plantuml_start{schema_rename.png}
+@startuml{schema_rename.png}
+:WT_SESSION->rename(old-uri, new-uri, .)
+(__session_rename());
+:session-API-call;
+
+partition with-checkpoint-lock {
+ partition with-schema-lock {
+ partition with-table-write-lock {
+ :validate new uri-type to match the old type;
+ :turn on meta tracking;
+ :check uri}
+
+ split
+ :uri matches "file:"
+ ("file" is the underlying
+ type for all the objects);
+ split again
+ :uri matches "lsm:";
+ :__wt_lsm_tree_rename();
+ split again
+ :matches a named data-source;
+ :WT_DATA_SOURCE::rename();
+ split again
+ :uri matches "table:";
+ partition __rename_table() {
+ :rename colgroups and indices represented by the table:
+ 1. extract names from the uri
+ 2. create new uri with existing types and configuration
+ 3. recursive call the rename operation on individual
+ colgroup and index with the old and the new uri
+ 4. remove old entry for colgroups and indices from
+ the metadata table and add the new ones;
+ :close and remove table handle from the session;
+ :remove old table entry from the metadata table
+ and add a new one;
+ }
+ end split
+
+ partition __rename_file() {
+ :fail if backup cursor open and schema operations will conflict;
+ :close btree handles in the file;
+ :fail if file with the old name doesn't exist or with the new
+ name exists;
+ :remove old file entries and add new in the metadata;
+ :rename the underlying file;
+ if (meta-tracking?) then (yes)
+ :track filesystem op;
+ else (no)
+ endif
+ }
+
+ :bump schema generation number to ignore stale data;
+
+ partition turn-off-meta-tracking {
+ if (errors?) then (yes)
+ :unroll operations;
+ else (no)
+ if (logging?) then (yes)
+ :sync log;
+ else (no)
+ endif
+ :checkpoint and sync;
+ endif
+ }
+ }
+ }
+}
+
+:API-end;
+
+stop
+@enduml
+@plantuml_end
+
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-schema.dox b/src/third_party/wiredtiger/src/docs/arch-schema.dox
index f6ead3ac824..6494da597a2 100644
--- a/src/third_party/wiredtiger/src/docs/arch-schema.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-schema.dox
@@ -1,208 +1,11 @@
-/*! @page arch-schema Schema Operations
+/*! @arch_page arch-schema Schema
A schema defines the format of the application data in WiredTiger. WiredTiger
supports various types of schemas (See @ref schema for more
-information), operated upon through a WT_SESSION reference. This section details
-the internals of these various schema operations.
+information), operated upon through a WT_SESSION reference.
-Schema operations cause an update to the metadata and are performed under a
-schema lock to avoid concurrent operations on the same object. The following
-sequence of steps define a generic schema operation:
+@subpage arch-schema-ops
-@plantuml_start{schema_generic.png}
-@startuml{schema_generic.png}
-:A schema operation;
-partition with-schema-lock {
- :perform operation on underlying data-object;
- :update metadata-file;
- :checkpoint and sync metadata;
-}
-stop
-@enduml
-@plantuml_end
-
-@section schema_create Schema Create
-
-The create schema operation is responsible for creating the underlying data
-object on the filesystem with the right parameters and then creating an entry
-for this new object into the metadata. The sequence of operations involved in a
-create for various schema types are as follows:
-
-@plantuml_start{schema_create.png}
-@startuml{schema_create.png}
-:WT_SESSION->create(.,name,.)
-(__session_create());
-
-partition session-API-call {
- :API session init with NULL dhandle;
- :exit if PANIC flag set;
- :exit if invalid configuration;
-}
-
-:validate "name" and if passed "type" config parameter;
-note right
- "name" parameter is called as "uri" internally.
- "type" is usually not passed and generally
- implied from the uri.
-end note
-
-partition with-schema-lock {
- partition with-table-lock {
- :turn on meta tracking;
- :check uri}
-
- split
- :uri matches "file:"
- ("file" is the underlying
- type for all the objects);
- split again
- :uri matches "colgroup:";
- :__create_colgroup();
- split again
- :uri matches "table:";
- :__create_table();
- split again
- :uri matches "lsm:";
- :__wt_lsm_tree_create();
- split again
- :uri matches "index:";
- :__create_index();
- split again
- :matches a named data-source;
- :__create_data_source();
- end split
-
- partition __create_file() {
- :exit if file exists;
- :validate allocation size;
- :block manager creates the file:
- 1.create file using __wt_open()
- 2.write an initial descriptor to file
- 3.fsync and close the file handle;
- if (metadata-file?) then (yes)
- else (no)
- :update metadata with file
- configuration and version;
- endif
- :check if file setup correctly by
- getting btree handle with
- WT_DHANDLE_EXCLUSIVE set;
- if (metatracking on?) then (yes)
- :track locked handle*;
- else (no)
- :release btree -
- sync and close;
- endif
- }
-
- partition turn-off-meta-tracking {
- if (errors?) then (yes)
- :unroll operations;
- else (no)
- if (logging?) then (yes)
- :sync log;
- else (no)
- endif
- :checkpoint and sync;
- endif
- :apply post-commit ops:
- release tracked (handle) btree* -
- sync and close;
- note right
- if meta tracking is on, this btree
- was being tracked as locked. As part
- of tuning off meta tracking, we sync
- and close this btree
- end note
- }
- }
-}
-
-:API-end;
-
-stop
-@enduml
-@plantuml_end
-
-@section schema_rename Schema Rename
-
-The rename schema operation is responsible for renaming the underlying data
-object on the filesystem and updating the metadata accordingly. The sequence of
-operations involved in a rename for various schema types are as follows:
-
-@plantuml_start{schema_rename.png}
-@startuml{schema_rename.png}
-:WT_SESSION->rename(old-uri, new-uri, .)
-(__session_rename());
-:session-API-call;
-
-partition with-checkpoint-lock {
- partition with-schema-lock {
- partition with-table-write-lock {
- :validate new uri-type to match the old type;
- :turn on meta tracking;
- :check uri}
-
- split
- :uri matches "file:"
- ("file" is the underlying
- type for all the objects);
- split again
- :uri matches "lsm:";
- :__wt_lsm_tree_rename();
- split again
- :matches a named data-source;
- :WT_DATA_SOURCE::rename();
- split again
- :uri matches "table:";
- partition __rename_table() {
- :rename colgroups and indices represented by the table:
- 1. extract names from the uri
- 2. create new uri with existing types and configuration
- 3. recursive call the rename operation on individual
- colgroup and index with the old and the new uri
- 4. remove old entry for colgroups and indices from
- the metadata table and add the new ones;
- :close and remove table handle from the session;
- :remove old table entry from the metadata table
- and add a new one;
- }
- end split
-
- partition __rename_file() {
- :fail if backup cursor open and schema operations will conflict;
- :close btree handles in the file;
- :fail if file with the old name doesn't exist or with the new
- name exists;
- :remove old file entries and add new in the metadata;
- :rename the underlying file;
- if (meta-tracking?) then (yes)
- :track filesystem op;
- else (no)
- endif
- }
-
- :bump schema generation number to ignore stale data;
-
- partition turn-off-meta-tracking {
- if (errors?) then (yes)
- :unroll operations;
- else (no)
- if (logging?) then (yes)
- :sync log;
- else (no)
- endif
- :checkpoint and sync;
- endif
- }
- }
- }
-}
-
-:API-end;
-
-stop
-@enduml
-@plantuml_end
+This section details the internals of various API operations that operate on schemas, such as WT_SESSION::create and WT_SESSION::rename.
*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-snapshot.dox b/src/third_party/wiredtiger/src/docs/arch-snapshot.dox
new file mode 100644
index 00000000000..8f8142d1f33
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-snapshot.dox
@@ -0,0 +1,9 @@
+/*! @arch_page arch-snapshot Snapshot
+
+When the snapshot isolation level is used in WiredTiger, the transaction
+will see versions of records committed before the transaction started.
+
+Snapshots are implemented by storing the set of transaction ids committed
+before the transaction started in the \c WT_TXN structure. Transaction
+ids older than a certain point need not be stored.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/arch-transaction.dox b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
new file mode 100644
index 00000000000..bc3c4e59722
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
@@ -0,0 +1,8 @@
+/*! @arch_page arch-transaction Transactions
+
+Transactions provide a powerful abstraction for multiple threads to operate on data concurrently.
+A caller of WiredTiger uses @ref transactions within the API to start and stop transactions within
+a session (thread of control).
+
+Internally, the current transaction state is represented by the WT_TXN structure.
+*/
diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox
index 01223283e60..82979f16423 100644
--- a/src/third_party/wiredtiger/src/docs/backup.dox
+++ b/src/third_party/wiredtiger/src/docs/backup.dox
@@ -175,6 +175,15 @@ An example of opening the backup data source for block-based incremental backup:
@snippet ex_all.c incremental block backup
+The URI \c backup:query_id can be used to return existing block incremental identifier
+strings. It operates like a backup cursor but will return the identifier strings
+as the keys of the cursor. There are no values. As with all backup cursors, there
+can only be one backup cursor of any type open at a time.
+
+An example of opening the backup data source to query incremental identifiers:
+
+@snippet ex_backup_block.c Query existing IDs
+
@section backup_incremental Log-based Incremental backup
Once a backup has been done, it can be rolled forward incrementally by
diff --git a/src/third_party/wiredtiger/src/docs/custom-data-sources.dox b/src/third_party/wiredtiger/src/docs/custom-data-sources.dox
index 340f25997a5..00eca5fcf38 100644
--- a/src/third_party/wiredtiger/src/docs/custom-data-sources.dox
+++ b/src/third_party/wiredtiger/src/docs/custom-data-sources.dox
@@ -233,4 +233,10 @@ responsibility of the implementation to protect any shared data. For
example, object operations such as WT_DATA_SOURCE::drop might not be
permitted while there are open cursors for the WT_DATA_SOURCE object.
+@section custom_data_source_transactions Transactions
+
+WiredTiger does not start implicit transactions to the WT_DATA_SOURCE
+methods. It is the responsibility of the custom data source implementation
+to support the transaction semantics.
+
*/
diff --git a/src/third_party/wiredtiger/src/docs/data-sources.dox b/src/third_party/wiredtiger/src/docs/data-sources.dox
index f14886eccf9..6b55f6f24ec 100644
--- a/src/third_party/wiredtiger/src/docs/data-sources.dox
+++ b/src/third_party/wiredtiger/src/docs/data-sources.dox
@@ -22,8 +22,9 @@ Some administrative tasks can be accomplished using the following special
cursor types that give access to data managed by WiredTiger:
<table>
@hrow{URI, Type, Notes}
-@row{<tt>backup:</tt>,
- backup cursor,
+@row{<tt>backup:[query_id]</tt>,
+ backup cursor (optionally only returning block incremental ids if
+ <code>query_id</code> is appended),
key=<code>string</code>\, see @ref backup for details}
@row{<code>log:</code>,
log cursor,
@@ -34,7 +35,7 @@ cursor types that give access to data managed by WiredTiger:
see @ref cursor_log for details}
@row{<tt>metadata:[create]</tt>,
metadata cursor (optionally only returning configuration strings for
- WT_SESSION::create if <code>create</code> is appended,
+ WT_SESSION::create if <code>create</code> is appended),
key=<code>string</code>\, value=<code>string</code>\,<br>
see @ref metadata for details}
@row{<tt>statistics:[\<data source URI\>]</tt>,
diff --git a/src/third_party/wiredtiger/src/docs/file-formats.dox b/src/third_party/wiredtiger/src/docs/file-formats.dox
index 9e66293365a..10433703835 100644
--- a/src/third_party/wiredtiger/src/docs/file-formats.dox
+++ b/src/third_party/wiredtiger/src/docs/file-formats.dox
@@ -96,13 +96,11 @@ pages to disk.
Dictionary compression is disabled by default.
- Huffman encoding reduces the size requirement of both the in-memory
-and on-disk objects by compressing individual key/value items, and can
-be separately configured either or both keys and values. The cost is
-additional CPU and memory use when searching the in-memory tree (if keys
-are encoded), and additional CPU and memory use when returning values
-from the in-memory tree and when writing pages to disk. Note the
-additional CPU cost of Huffman encoding can be high, and should be
-considered. (See @subpage_single huffman for details.)
+and on-disk objects by compressing individual value items. The cost is
+additional CPU and memory use when returning values from the in-memory
+tree and when writing pages to disk. Note the additional CPU cost of
+Huffman encoding can be high, and should be considered. (See
+@subpage_single huffman for details.)
Huffman encoding is disabled by default.
diff --git a/src/third_party/wiredtiger/src/docs/huffman.dox b/src/third_party/wiredtiger/src/docs/huffman.dox
index 41e4be5ced3..e2912394d1f 100644
--- a/src/third_party/wiredtiger/src/docs/huffman.dox
+++ b/src/third_party/wiredtiger/src/docs/huffman.dox
@@ -1,18 +1,13 @@
/*! @page huffman Huffman Encoding
-Keys in row-stores and variable-length values in either row- or
-column-stores can be compressed with Huffman encoding.
+Variable-length values in either row- or column-stores can be
+compressed with Huffman encoding.
Huffman compression is maintained in memory as well as on disk, and can
increase the amount of usable data the cache can hold as well as
decrease the size of the data on disk. The additional CPU cost of
Huffman coding can be high, and should be considered.
-To configure Huffman encoding for the key in a row-store, specify \c
-huffman_key=english, \c huffman_key=utf8<file> or \c
-huffman_key=utf16<file> in the configuration passed to \c
-WT_SESSION::create.
-
To configure Huffman encoding for a variable-length value in either a
row-store or a column-store, specify \c huffman_value=english, \c
huffman_value=utf8<file> or \c huffman_value=utf16<file> in the
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
index 411d3905b8e..0400be53354 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.cmapx
@@ -1,4 +1,21 @@
<map id="wt_diagram_map" name="wt_diagram_map">
-<area shape="rect" id="id1" href="#component-schema" title="#component-schema" alt="" coords="98,224,151,241"/>
-<area shape="rect" id="id2" href="#component-dhandle" title="#component-dhandle" alt="" coords="181,320,244,353"/>
+<area shape="rect" id="id1" href="modules.html" title="modules.html" alt="" coords="248,128,284,145"/>
+<area shape="rect" id="id2" href="arch-cache.html" title="arch-cache.html" alt="" coords="198,546,240,563"/>
+<area shape="rect" id="id3" href="arch-cursor.html" title="arch-cursor.html" alt="" coords="206,224,253,241"/>
+<area shape="rect" id="id4" href="arch-eviction.html" title="arch-eviction.html" alt="" coords="295,546,348,563"/>
+<area shape="rect" id="id5" href="arch-schema.html" title="arch-schema.html" alt="" coords="98,224,151,241"/>
+<area shape="rect" id="id6" href="arch-logging.html" title="arch-logging.html" alt="" coords="388,651,444,667"/>
+<area shape="rect" id="id7" href="command_line.html" title="command_line.html" alt="" coords="374,24,433,40"/>
+<area shape="rect" id="id8" href="arch-log-file.html" title="arch-log-file.html" alt="" coords="307,865,338,898"/>
+<area shape="rect" id="id9" href="arch-metadata.html" title="arch-metadata.html" alt="" coords="25,329,88,345"/>
+<area shape="rect" id="id10" href="arch-snapshot.html" title="arch-snapshot.html" alt="" coords="381,442,452,458"/>
+<area shape="rect" id="id11" href="arch-python.html" title="arch-python.html" alt="" coords="84,24,157,40"/>
+<area shape="rect" id="id12" href="arch-transaction.html" title="arch-transaction.html" alt="" coords="340,329,428,345"/>
+<area shape="rect" id="id13" href="arch-hs.html" title="arch-hs.html" alt="" coords="93,642,141,675"/>
+<area shape="rect" id="id14" href="arch-row.html" title="arch-row.html" alt="" coords="163,433,216,466"/>
+<area shape="rect" id="id15" href="arch-column.html" title="arch-column.html" alt="" coords="272,433,326,466"/>
+<area shape="rect" id="id16" href="arch-block.html" title="arch-block.html" alt="" coords="196,642,256,675"/>
+<area shape="rect" id="id17" href="arch-dhandle.html" title="arch-dhandle.html" alt="" coords="181,320,244,353"/>
+<area shape="rect" id="id18" href="arch-data-file.html" title="arch-data-file.html" alt="" coords="181,865,244,898"/>
+<area shape="rect" id="id19" href="arch-fs-os.html" title="arch-fs-os.html" alt="" coords="177,755,354,772"/>
</map>
diff --git a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
index 57480c2156f..99ae107fa95 100644
--- a/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
+++ b/src/third_party/wiredtiger/src/docs/images/plantuml_gen_img/wt_diagram.png
Binary files differ
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index b153ba12668..be45196ff97 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -142,6 +142,7 @@ bool
boolean
booleans
br
+btmem
btree
btrees
bufs
@@ -238,6 +239,7 @@ eq
erlang
errno
exe
+extensibility
fadvise
failchk
fallocate
@@ -264,6 +266,7 @@ fnv
forw
fput
freelist
+fs
fsync
ftruncate
fvisibility
diff --git a/src/third_party/wiredtiger/src/docs/style/wiredtiger.css b/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
index 984e525f42f..9757f79b7d8 100644
--- a/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
+++ b/src/third_party/wiredtiger/src/docs/style/wiredtiger.css
@@ -26,6 +26,50 @@ img {
height: auto;
}
+div.arch_diagram {
+ display: block;
+ background: #FFFFFF;
+ background: radial-gradient(#FFDDAA,#FFFFFF);
+}
+
+/* match just one image */
+img[src="wt_diagram.png"] {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
+
+.arch_head table {
+ background-color: #FFDDAA;
+ border-color: #624D32;
+ /*border: solid 1px;*/
+ border-spacing: 5px;
+}
+
+.arch_head td {
+ border-color: #FFFFFF;
+ /*border: solid 1px;*/
+ vertical-align: top;
+ text-align: left;
+}
+
+.arch_head th {
+ border-color: #624D32;
+ vertical-align: top;
+ text-align: left;
+}
+
+.arch_thumbnail {
+ padding: 0px 0px 0px 0px;
+ border: 3px solid #fff;
+ outline: 1px solid #000;
+ margin: 3px;
+ width: 70px;
+ height: 70px;
+ display: block;
+ text-align: center;
+}
+
.tablist {
width: 800px;
}
diff --git a/src/third_party/wiredtiger/src/docs/tools/doxfilter.py b/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
index efe5c3ca80a..42f47426578 100755
--- a/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
+++ b/src/third_party/wiredtiger/src/docs/tools/doxfilter.py
@@ -32,7 +32,19 @@
# (i.e., from "/*!" to "/**"), because the latter are configured to not
# search for brief descriptions at the beginning of pages.
-import re, sys
+import os, re, sys
+
+# We want to import the docs_data.py page from the dist directory.
+# First get our (src/doc/tools) directory.
+doc_tools_dir = os.path.dirname(os.path.realpath(__file__))
+top_dir = os.path.dirname(os.path.dirname(os.path.dirname(doc_tools_dir)))
+dist_dir = os.path.join(top_dir, 'dist')
+sys.path.insert(1, dist_dir)
+import docs_data
+
+arch_doc_lookup = {}
+for page in docs_data.arch_doc_pages:
+ arch_doc_lookup[page.doxygen_name] = page
progname = 'doxfilter.py'
linenum = 0
@@ -42,8 +54,40 @@ def err(arg):
sys.stderr.write(filename + ':' + str(linenum) + ': ERROR: ' + arg + '\n')
sys.exit(1)
+# Convert @arch_page to @arch_page_expanded, adding in information
+# from docs_data.py.
+def process_arch(source):
+ result = ''
+ mpage_content = []
+ arch_page_pat = re.compile(r'^(.*)@arch_page *([^ ]*) *(.*)')
+ for line in source.split('\n'):
+ m = re.search(arch_page_pat, line)
+ if line.count('@arch_page') > 0 and not m:
+ err('@arch_page incorrect syntax, need identifier and title')
+ if m:
+ groups = m.groups()
+ prefix = groups[0]
+ doxy_name = groups[1]
+ title = groups[2]
+
+ page_info = arch_doc_lookup[doxy_name]
+ data_structures_str = '<code>' + '<br>'.join(page_info.data_structures) + '</code>'
+ files_str = '<code>' + '<br>'.join(page_info.files) + '</code>'
+ result += prefix + '@arch_page_top{' + \
+ doxy_name + ',' + \
+ title + '}\n'
+ result += '@arch_page_table{' + \
+ data_structures_str + ',' + \
+ files_str + '}\n'
+ else:
+ result += line + '\n'
+ return result
+
def process(source):
- return source.replace(r'/*!', r'/**')
+ source = source.replace(r'/*!', r'/**')
+ if '@arch_page' in source:
+ source = process_arch(source)
+ return source
if __name__ == '__main__':
for f in sys.argv[1:]:
diff --git a/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox b/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox
index 9659d389bf0..b39a9de2594 100644
--- a/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox
+++ b/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox
@@ -356,20 +356,19 @@ such a configuration string is as follows:
- Huffman
\n
-Reduces the size requirement by compressing individual key/value items, and
-can be separately configured either or both keys and values. The additional
-CPU cost of Huffman encoding can be high, and should be considered. (See @ref
-huffman for details.)
+Reduces the size requirement by compressing individual value items, and
+can be configured for values. The additional CPU cost of Huffman encoding
+can be high, and should be considered. (See @ref huffman for details.)
\n\n
Huffman configuration:
\n
-Specified using the \c huffman_key and \c huffman_value configuration
+Specified using the \c huffman_value configuration
options to WT_SESSION::create. These options can take values of "english"
(to use a built-in English language frequency table), "utf8<file>" or
"utf16<file>" (to use a custom UTF-8 or UTF-16 symbol frequency table file).
An example of such a configuration string is as follows:
<pre>
-"key_format=S,value_format=S,huffman_key=english,huffman_value=english"
+"key_format=S,value_format=S,huffman_value=english"
</pre>
- Block Compression
diff --git a/src/third_party/wiredtiger/src/history/hs_cursor.c b/src/third_party/wiredtiger/src/history/hs_cursor.c
index aa3f4aea5cb..3923b8b2bdf 100644
--- a/src/third_party/wiredtiger/src/history/hs_cursor.c
+++ b/src/third_party/wiredtiger/src/history/hs_cursor.c
@@ -266,6 +266,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
*/
if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
+ WT_STAT_DATA_INCR(session, cursor_prev_hs_tombstone);
continue;
}
/*
@@ -382,6 +383,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
mod_upd = NULL;
}
WT_STAT_CONN_INCR(session, cache_hs_read_squash);
+ WT_STAT_DATA_INCR(session, cache_hs_read_squash);
}
/*
@@ -417,6 +419,7 @@ err:
if (upd == NULL) {
ret = WT_NOTFOUND;
WT_STAT_CONN_INCR(session, cache_hs_read_miss);
+ WT_STAT_DATA_INCR(session, cache_hs_read_miss);
} else {
WT_STAT_CONN_INCR(session, cache_hs_read);
WT_STAT_DATA_INCR(session, cache_hs_read);
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 08856b361a2..cc067d4007c 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -25,12 +25,12 @@ static int __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR
const WT_ITEM *srch_key);
/*
- * __hs_insert_updates_verbose --
+ * __hs_verbose_cache_stats --
* Display a verbose message once per checkpoint with details about the cache state when
* performing a history store table write.
*/
static void
-__hs_insert_updates_verbose(WT_SESSION_IMPL *session, WT_BTREE *btree)
+__hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
@@ -132,6 +132,7 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, u
* incrementing the history store insert statistic by one.
*/
WT_STAT_CONN_INCR(session, cache_hs_insert);
+ WT_STAT_DATA_INCR(session, cache_hs_insert);
err:
if (ret != 0) {
@@ -159,10 +160,21 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
const WT_ITEM *key, const uint8_t type, const WT_ITEM *hs_value,
WT_HS_TIME_POINT *start_time_point, WT_HS_TIME_POINT *stop_time_point)
{
+#ifdef HAVE_DIAGNOSTIC
+ WT_CURSOR_BTREE *hs_cbt;
+#endif
WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(srch_key);
+#ifdef HAVE_DIAGNOSTIC
+ WT_DECL_ITEM(existing_val);
+#endif
WT_DECL_RET;
wt_timestamp_t hs_start_ts;
+#ifdef HAVE_DIAGNOSTIC
+ wt_timestamp_t durable_timestamp_diag;
+ wt_timestamp_t hs_stop_durable_ts_diag;
+ uint64_t upd_type_full_diag;
+#endif
uint64_t counter, hs_counter;
uint32_t hs_btree_id;
int cmp;
@@ -173,6 +185,12 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &srch_key));
+#ifdef HAVE_DIAGNOSTIC
+ /* Allocate buffer for the existing history store value for the same key. */
+ WT_ERR(__wt_scr_alloc(session, 0, &existing_val));
+ hs_cbt = (WT_CURSOR_BTREE *)cursor;
+#endif
+
/*
* The session should be pointing at the history store btree since this is the one that we'll be
* inserting into. The btree parameter that we're passing in should is the btree that the
@@ -204,7 +222,6 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
true);
if (ret == 0) {
WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
-
/*
* Check the whether the existing record is also from the same timestamp.
*
@@ -213,8 +230,23 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
*/
if (hs_btree_id == btree->id && start_time_point->ts == hs_start_ts) {
WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+
+#ifdef HAVE_DIAGNOSTIC
+ if (cmp == 0) {
+ WT_ERR(cursor->get_value(cursor, &hs_stop_durable_ts_diag, &durable_timestamp_diag,
+ &upd_type_full_diag, existing_val));
+ WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
+ if (cmp == 0)
+ WT_ASSERT(session,
+ start_time_point->txnid == WT_TXN_NONE ||
+ start_time_point->txnid != hs_cbt->upd_value->tw.start_txn ||
+ start_time_point->ts != hs_cbt->upd_value->tw.start_ts);
+ counter = hs_counter + 1;
+ }
+#else
if (cmp == 0)
counter = hs_counter + 1;
+#endif
}
}
@@ -229,11 +261,30 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
session, cursor, btree, key, start_time_point->ts, &counter, srch_key));
}
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We may have fixed out of order keys. Make sure that we haven't accidentally added a duplicate
+ * of the key we are about to insert.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
+ WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
+ if (hs_btree_id == btree->id && start_time_point->ts == hs_start_ts &&
+ hs_counter == counter) {
+ WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+ WT_ASSERT(session, cmp != 0);
+ }
+ }
+#endif
/* The tree structure can change while we try to insert the mod list, retry if that happens. */
while ((ret = __hs_insert_record_with_btree_int(session, cursor, btree->id, key, type, hs_value,
- start_time_point, stop_time_point, counter)) == WT_RESTART)
+ start_time_point, stop_time_point, counter)) == WT_RESTART) {
WT_STAT_CONN_INCR(session, cache_hs_insert_restart);
+ WT_STAT_DATA_INCR(session, cache_hs_insert_restart);
+ }
err:
+#ifdef HAVE_DIAGNOSTIC
+ __wt_scr_free(session, &existing_val);
+#endif
__wt_scr_free(session, &hs_key);
__wt_scr_free(session, &srch_key);
/* We did a row search, release the cursor so that the page doesn't continue being held. */
@@ -434,6 +485,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
if (upd->start_ts != upd->durable_ts) {
WT_ASSERT(session, min_insert_ts < upd->durable_ts);
WT_STAT_CONN_INCR(session, cache_hs_order_lose_durable_timestamp);
+ WT_STAT_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
}
__wt_verbose(session, WT_VERB_TIMESTAMP,
"fixing out-of-order updates during insertion; start_ts=%s, durable_start_ts=%s, "
@@ -443,6 +495,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
__wt_timestamp_to_string(min_insert_ts, ts_string[2]));
upd->start_ts = upd->durable_ts = min_insert_ts;
WT_STAT_CONN_INCR(session, cache_hs_order_fixup_insert);
+ WT_STAT_DATA_INCR(session, cache_hs_order_fixup_insert);
} else if (upd->start_ts != WT_TS_NONE)
/*
* Don't reset to WT_TS_NONE as we don't want to clear the timestamps for updates
@@ -516,12 +569,14 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
!F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
/* We can only delete history store entries that have timestamps. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
} else if (first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
(list->ins == NULL || ts_updates_in_hs)) {
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
}
@@ -618,6 +673,31 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
continue;
}
+ /* We should never write a prepared update to the history store. */
+ WT_ASSERT(session,
+ upd->prepare_state != WT_PREPARE_INPROGRESS &&
+ upd->prepare_state != WT_PREPARE_LOCKED);
+
+ /*
+ * Ensure all the updates inserted to the history store are committed.
+ *
+ * Sometimes the application and the checkpoint threads will fall behind the eviction
+ * threads, and they may choose an invisible update to write to the data store if the
+ * update was previously selected by a failed eviction pass. Also the eviction may run
+ * without a snapshot if the checkpoint is running concurrently. In those cases, check
+ * whether the history transaction is committed or not against the global transaction
+ * list. We expect the transaction is committed before the check. However, though very
+ * rare, it is possible that the check may race with transaction commit and in this case
+ * we may fail to catch the failure.
+ */
+#ifdef HAVE_DIAGNOSTIC
+ if (!F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT) ||
+ !__txn_visible_id(session, list->onpage_upd->txnid))
+ WT_ASSERT(session, !__wt_txn_active(session, upd->txnid));
+ else
+ WT_ASSERT(session, __txn_visible_id(session, upd->txnid));
+#endif
+
/*
* Calculate reverse modify and clear the history store records with timestamps when
* inserting the first update.
@@ -642,12 +722,15 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
++insert_cnt;
if (squashed) {
WT_STAT_CONN_INCR(session, cache_hs_write_squash);
+ WT_STAT_DATA_INCR(session, cache_hs_write_squash);
squashed = false;
}
}
- if (modifies.size > 0)
+ if (modifies.size > 0) {
WT_STAT_CONN_INCR(session, cache_hs_write_squash);
+ WT_STAT_DATA_INCR(session, cache_hs_write_squash);
+ }
}
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
@@ -660,7 +743,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
err:
if (ret == 0 && insert_cnt > 0)
- __hs_insert_updates_verbose(session, btree);
+ __hs_verbose_cache_stats(session, btree);
__wt_scr_free(session, &key);
/* modify_value is allocated in __wt_modify_pack. Free it if it is allocated. */
@@ -748,8 +831,11 @@ __wt_hs_delete_key_from_ts(
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
/* The tree structure can change while we try to insert the mod list, retry if that happens. */
- while ((ret = __hs_delete_key_from_ts_int(session, btree_id, key, ts, reinsert)) == WT_RESTART)
+ while (
+ (ret = __hs_delete_key_from_ts_int(session, btree_id, key, ts, reinsert)) == WT_RESTART) {
WT_STAT_CONN_INCR(session, cache_hs_insert_restart);
+ WT_STAT_DATA_INCR(session, cache_hs_insert_restart);
+ }
return (ret);
}
@@ -847,6 +933,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
*/
if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone);
+ WT_STAT_DATA_INCR(session, cursor_next_hs_tombstone);
continue;
}
/*
@@ -873,8 +960,10 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* will be clobbered by our fix-up process. Keep track of how often this is happening.
*/
if (hs_cbt->upd_value->tw.start_ts != hs_cbt->upd_value->tw.durable_start_ts ||
- hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts)
+ hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts) {
WT_STAT_CONN_INCR(session, cache_hs_order_lose_durable_timestamp);
+ WT_STAT_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
+ }
__wt_verbose(session, WT_VERB_TIMESTAMP,
"fixing existing out-of-order updates by moving them; start_ts=%s, durable_start_ts=%s, "
@@ -918,6 +1007,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
WT_ERR(ret);
tombstone = NULL;
WT_STAT_CONN_INCR(session, cache_hs_order_fixup_move);
+ WT_STAT_DATA_INCR(session, cache_hs_order_fixup_move);
}
if (ret == WT_NOTFOUND)
ret = 0;
@@ -1006,6 +1096,7 @@ __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_
*/
if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_next_hs_tombstone);
+ WT_STAT_DATA_INCR(session, cursor_next_hs_tombstone);
continue;
}
@@ -1059,6 +1150,7 @@ __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_
}
upd = NULL;
WT_STAT_CONN_INCR(session, cache_hs_key_truncate);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate);
}
if (ret == WT_NOTFOUND)
ret = 0;
diff --git a/src/third_party/wiredtiger/src/history/hs_verify.c b/src/third_party/wiredtiger/src/history/hs_verify.c
index d31c0c51afd..d5b0100dd95 100644
--- a/src/third_party/wiredtiger/src/history/hs_verify.c
+++ b/src/third_party/wiredtiger/src/history/hs_verify.c
@@ -9,13 +9,13 @@
#include "wt_internal.h"
/*
- * __verify_history_store_id --
+ * __hs_verify_id --
* Verify the history store for a single btree. Given a cursor to the tree, walk all history
* store keys. This function assumes any caller has already opened a cursor to the history
* store.
*/
static int
-__verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id)
+__hs_verify_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *ds_cbt, uint32_t this_btree_id)
{
WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -104,12 +104,12 @@ err:
}
/*
- * __wt_history_store_verify_one --
+ * __wt_hs_verify_one --
* Verify the history store for the btree that is set up in this session. This must be called
* when we are known to have exclusive access to the btree.
*/
int
-__wt_history_store_verify_one(WT_SESSION_IMPL *session)
+__wt_hs_verify_one(WT_SESSION_IMPL *session)
{
WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE ds_cbt;
@@ -141,19 +141,19 @@ __wt_history_store_verify_one(WT_SESSION_IMPL *session)
if (ret == 0) {
__wt_btcur_init(session, &ds_cbt);
__wt_btcur_open(&ds_cbt);
- ret = __verify_history_store_id(session, &ds_cbt, btree_id);
+ ret = __hs_verify_id(session, &ds_cbt, btree_id);
WT_TRET(__wt_btcur_close(&ds_cbt, false));
}
return (ret == WT_NOTFOUND ? 0 : ret);
}
/*
- * __wt_history_store_verify --
+ * __wt_hs_verify --
* Verify the history store. There can't be an entry in the history store without having the
* latest value for the respective key in the data store.
*/
int
-__wt_history_store_verify(WT_SESSION_IMPL *session)
+__wt_hs_verify(WT_SESSION_IMPL *session)
{
WT_CURSOR *ds_cursor, *hs_cursor;
WT_DECL_ITEM(buf);
@@ -198,7 +198,7 @@ __wt_history_store_verify(WT_SESSION_IMPL *session)
}
WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &ds_cursor));
F_SET(ds_cursor, WT_CURSOR_RAW_OK);
- ret = __verify_history_store_id(session, (WT_CURSOR_BTREE *)ds_cursor, btree_id);
+ ret = __hs_verify_id(session, (WT_CURSOR_BTREE *)ds_cursor, btree_id);
if (ret == WT_NOTFOUND)
stop = true;
WT_TRET(ds_cursor->close(ds_cursor));
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index cd34baec52c..f35d09f05f3 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -231,6 +231,8 @@ struct __wt_block {
wt_off_t extend_size; /* File extended size */
wt_off_t extend_len; /* File extend chunk size */
+ bool created_during_backup; /* Created during incremental backup */
+
/* Configuration information, set when the file is opened. */
uint32_t allocfirst; /* Allocation is first-fit */
uint32_t allocsize; /* Allocation size */
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index ca789921b1d..3cc12d78e74 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -235,11 +235,13 @@ struct __wt_ovfl_reuse {
#else
#define WT_HS_COMPRESSOR "none"
#endif
-#define WT_HS_CONFIG \
- "key_format=" WT_UNCHECKED_STRING(IuQQ) ",value_format=" WT_UNCHECKED_STRING( \
- QQQu) ",block_compressor=" WT_HS_COMPRESSOR \
- ",leaf_value_max=64MB" \
- ",prefix_compression=false"
+#define WT_HS_KEY_FORMAT WT_UNCHECKED_STRING(IuQQ)
+#define WT_HS_VALUE_FORMAT WT_UNCHECKED_STRING(QQQu)
+#define WT_HS_CONFIG \
+ "key_format=" WT_HS_KEY_FORMAT ",value_format=" WT_HS_VALUE_FORMAT \
+ ",block_compressor=" WT_HS_COMPRESSOR \
+ ",leaf_value_max=64MB" \
+ ",prefix_compression=false"
/*
* WT_PAGE_MODIFY --
@@ -1015,11 +1017,10 @@ struct __wt_col {
/*
* WT_IKEY --
- * Instantiated key: row-store keys are usually prefix compressed and
- * sometimes Huffman encoded or overflow objects. Normally, a row-store
- * page in-memory key points to the on-page WT_CELL, but in some cases,
- * we instantiate the key in memory, in which case the row-store page
- * in-memory key points to a WT_IKEY structure.
+ * Instantiated key: row-store keys are usually prefix compressed or overflow objects.
+ * Normally, a row-store page in-memory key points to the on-page WT_CELL, but in some
+ * cases, we instantiate the key in memory, in which case the row-store page in-memory
+ * key points to a WT_IKEY structure.
*/
struct __wt_ikey {
uint32_t size; /* Key length */
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index 238cb7e6fd1..4af6ea90f67 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -112,7 +112,6 @@ struct __wt_btree {
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t assert_flags; /* Debugging assertion information */
- void *huffman_key; /* Key huffman encoding */
void *huffman_value; /* Value huffman encoding */
enum {
@@ -168,6 +167,7 @@ struct __wt_btree {
uint64_t write_gen; /* Write generation */
uint64_t base_write_gen; /* Write generation on startup. */
+ uint64_t run_write_gen; /* Runtime write generation. */
uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
wt_timestamp_t rec_max_timestamp;
diff --git a/src/third_party/wiredtiger/src/include/btree_inline.h b/src/third_party/wiredtiger/src/include/btree_inline.h
index 9a18ae24d7b..dfa6adf21af 100644
--- a/src/third_party/wiredtiger/src/include/btree_inline.h
+++ b/src/third_party/wiredtiger/src/include/btree_inline.h
@@ -887,10 +887,9 @@ __wt_row_leaf_key_info(
* A row-store leaf page key is in one of two places: if instantiated,
* the WT_ROW pointer references a WT_IKEY structure, otherwise, it
* references an on-page offset. Further, on-page keys are in one of
- * two states: if the key is a simple key (not an overflow key, prefix
- * compressed or Huffman encoded, all of which are likely), the key's
- * offset/size is encoded in the pointer. Otherwise, the offset is to
- * the key's on-page cell.
+ * two states: if the key is a simple key (not an overflow key or prefix
+ * compressed, all of which are likely), the key's offset/size is encoded
+ * in the pointer. Otherwise, the offset is to the key's on-page cell.
*
* Now the magic: allocated memory must be aligned to store any standard
* type, and we expect some standard type to require at least quad-byte
diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h
index 37a8f31305e..b065b72c472 100644
--- a/src/third_party/wiredtiger/src/include/cell_inline.h
+++ b/src/third_party/wiredtiger/src/include/cell_inline.h
@@ -1081,6 +1081,7 @@ __cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, int page_type,
void *huffman;
btree = S2BT(session);
+ huffman = NULL;
/* Reference the cell's data, optionally decode it. */
switch (unpack->type) {
@@ -1089,8 +1090,6 @@ __cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, int page_type,
store->size = unpack->size;
if (page_type == WT_PAGE_ROW_INT)
return (0);
-
- huffman = btree->huffman_key;
break;
case WT_CELL_VALUE:
store->data = unpack->data;
@@ -1101,8 +1100,6 @@ __cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, int page_type,
WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
if (page_type == WT_PAGE_ROW_INT || decoded)
return (0);
-
- huffman = btree->huffman_key;
break;
case WT_CELL_VALUE_OVFL:
WT_RET(__wt_ovfl_read(session, page, unpack, store, &decoded));
diff --git a/src/third_party/wiredtiger/src/include/ctype_inline.h b/src/third_party/wiredtiger/src/include/ctype_inline.h
index e751eedc090..0daf748f3b2 100644
--- a/src/third_party/wiredtiger/src/include/ctype_inline.h
+++ b/src/third_party/wiredtiger/src/include/ctype_inline.h
@@ -29,6 +29,16 @@ __wt_isalpha(u_char c)
}
/*
+ * __wt_isascii --
+ * Wrap the ctype function without sign extension.
+ */
+static inline bool
+__wt_isascii(u_char c)
+{
+ return (isascii(c) != 0);
+}
+
+/*
* __wt_isdigit --
* Wrap the ctype function without sign extension.
*/
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index c344710120d..2edff95e833 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -62,14 +62,16 @@ struct __wt_cursor_backup {
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_CURBACKUP_CKPT_FAKE 0x001u /* Object has fake checkpoint */
-#define WT_CURBACKUP_DUP 0x002u /* Duplicated backup cursor */
-#define WT_CURBACKUP_FORCE_FULL 0x004u /* Force full file copy for this cursor */
-#define WT_CURBACKUP_FORCE_STOP 0x008u /* Force stop incremental backup */
-#define WT_CURBACKUP_HAS_CB_INFO 0x010u /* Object has checkpoint backup info */
-#define WT_CURBACKUP_INCR 0x020u /* Incremental backup cursor */
-#define WT_CURBACKUP_INCR_INIT 0x040u /* Cursor traversal initialized */
-#define WT_CURBACKUP_LOCKER 0x080u /* Hot-backup started */
-#define WT_CURBACKUP_RENAME 0x100u /* Object had a rename */
+#define WT_CURBACKUP_CONSOLIDATE 0x002u /* Consolidate returned info on this object */
+#define WT_CURBACKUP_DUP 0x004u /* Duplicated backup cursor */
+#define WT_CURBACKUP_FORCE_FULL 0x008u /* Force full file copy for this cursor */
+#define WT_CURBACKUP_FORCE_STOP 0x010u /* Force stop incremental backup */
+#define WT_CURBACKUP_HAS_CB_INFO 0x020u /* Object has checkpoint backup info */
+#define WT_CURBACKUP_INCR 0x040u /* Incremental backup cursor */
+#define WT_CURBACKUP_INCR_INIT 0x080u /* Cursor traversal initialized */
+#define WT_CURBACKUP_LOCKER 0x100u /* Hot-backup started */
+#define WT_CURBACKUP_QUERYID 0x200u /* Backup cursor for incremental ids */
+#define WT_CURBACKUP_RENAME 0x400u /* Object had a rename */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
};
@@ -157,14 +159,14 @@ struct __wt_cursor_btree {
WT_COL *cip_saved; /* Last iteration reference */
/*
- * We don't instantiate prefix-compressed keys on pages where there's no Huffman encoding
- * because we don't want to waste memory if only moving a cursor through the page, and it's
- * faster to build keys while moving through the page than to roll-forward from a previously
- * instantiated key (we don't instantiate all of the keys, just the ones at binary search
- * points). We can't use the application's WT_CURSOR key field as a copy of the last-returned
- * key because it may have been altered by the API layer, for example, dump cursors. Instead we
- * store the last-returned key in a temporary buffer. The rip_saved field is used to determine
- * if the key in the temporary buffer has the prefix needed for building the current key.
+ * We don't instantiate prefix-compressed keys on pages because we don't want to waste memory if
+ * only moving a cursor through the page, and it's faster to build keys while moving through the
+ * page than to roll-forward from a previously instantiated key (we don't instantiate all of the
+ * keys, just the ones at binary search points). We can't use the application's WT_CURSOR key
+ * field as a copy of the last-returned key because it may have been altered by the API layer,
+ * for example, dump cursors. Instead we store the last-returned key in a temporary buffer. The
+ * rip_saved field is used to determine if the key in the temporary buffer has the prefix needed
+ * for building the current key.
*/
WT_ROW *rip_saved; /* Last-returned key reference */
@@ -280,6 +282,13 @@ struct __wt_cursor_dump {
WT_CURSOR *child;
};
+struct __wt_cursor_hs {
+ WT_CURSOR iface;
+
+ WT_CURSOR *file_cursor; /* Queries of regular history store data */
+ WT_TIME_WINDOW time_window;
+};
+
struct __wt_cursor_index {
WT_CURSOR iface;
diff --git a/src/third_party/wiredtiger/src/include/cursor_inline.h b/src/third_party/wiredtiger/src/include/cursor_inline.h
index ad2ffcc6538..7675b495b44 100644
--- a/src/third_party/wiredtiger/src/include/cursor_inline.h
+++ b/src/third_party/wiredtiger/src/include/cursor_inline.h
@@ -423,7 +423,6 @@ static inline int
__cursor_row_slot_key_return(
WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_CELL_UNPACK_KV *kpack, bool *kpack_used)
{
- WT_BTREE *btree;
WT_CELL *cell;
WT_ITEM *kb;
WT_PAGE *page;
@@ -433,7 +432,6 @@ __cursor_row_slot_key_return(
*kpack_used = false;
session = CUR2S(cbt);
- btree = S2BT(session);
page = cbt->ref->page;
kb = &cbt->iface.key;
@@ -453,10 +451,6 @@ __cursor_row_slot_key_return(
if (__wt_row_leaf_key_info(page, copy, NULL, &cell, &kb->data, &kb->size))
return (0);
- /* Huffman encoded keys are a slow path in all cases. */
- if (btree->huffman_key != NULL)
- goto slow;
-
/*
* Unpack the cell and deal with overflow and prefix-compressed keys. Inline building simple
* prefix-compressed keys from a previous key, otherwise build from scratch.
@@ -487,7 +481,6 @@ __cursor_row_slot_key_return(
* Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we already did
* __wt_row_leaf_key's fast-path checks inline.
*/
-slow:
WT_RET(__wt_row_leaf_key_work(session, page, rip, cbt->row_key, false));
}
kb->data = cbt->row_key->data;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 044c32d7e98..d902d72ff01 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -35,6 +35,8 @@ extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_txn_active(WT_SESSION_IMPL *session, uint64_t txnid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern char *__wt_time_aggregate_to_string(WT_TIME_AGGREGATE *ta, char *ta_string)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern char *__wt_time_point_to_string(wt_timestamp_t ts, wt_timestamp_t durable_ts,
@@ -484,6 +486,8 @@ extern int __wt_curfile_next_random(WT_CURSOR *cursor)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx,
@@ -730,10 +734,6 @@ extern int __wt_hex2byte(const u_char *from, u_char *to)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_history_store_verify(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_history_store_verify_one(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_cursor_cache(WT_SESSION_IMPL *session)
@@ -767,13 +767,17 @@ extern int __wt_hs_open(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_row_search(WT_CURSOR_BTREE *hs_cbt, WT_ITEM *srch_key, bool insert)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_verify(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_verify_one(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt,
u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
+extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag)
WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1034,12 +1038,14 @@ extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char *
const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update,
WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_ckptlist_get_with_config(WT_SESSION_IMPL *session, bool update,
+extern int __wt_meta_ckptlist_get_from_config(WT_SESSION_IMPL *session, bool update,
WT_CKPT **ckptbasep, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase,
WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_ckptlist_update_config(WT_SESSION_IMPL *session, WT_CKPT *ckptbase,
+ const char *oldcfg, char **newcfgp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
@@ -1809,6 +1815,7 @@ static inline bool __wt_eviction_updates_needed(WT_SESSION_IMPL *session, double
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalnum(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isalpha(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_isascii(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isdigit(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isprint(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_isspace(u_char c) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 85ba3214d3e..b620f614b07 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -217,7 +217,14 @@ WT_ATOMIC_FUNC(size, size_t, size_t *vp, size_t v)
} while (0)
#elif defined(__aarch64__)
-#define WT_PAUSE() __asm__ volatile("yield" ::: "memory")
+/*
+ * Use an isb instruction here to be closer to the original x86 pause instruction. The yield
+ * instruction that was previously here is a nop that is intended to provide a hint that a
+ * thread in a SMT system could yield. This is different from the x86 pause instruction
+ * which delays execution by O(100) cycles. The isb will typically delay execution by about
+ * 50 cycles so it's a reasonable alternative.
+ */
+#define WT_PAUSE() __asm__ volatile("isb" ::: "memory")
/*
* dmb are chosen here because they are sufficient to guarantee the ordering described above. We
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index 9b3dba57dc7..5b91e001892 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -131,7 +131,7 @@ struct __wt_lsm_chunk {
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
-#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */
+#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Allow eviction of pinned chunk */
#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */
#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */
#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 9e274e9f3cc..b4902740837 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -137,7 +137,8 @@ struct __wt_ckpt {
uint64_t size; /* Checkpoint size */
- uint64_t write_gen; /* Write generation */
+ uint64_t write_gen; /* Write generation */
+ uint64_t run_write_gen; /* Runtime write generation. */
char *block_metadata; /* Block-stored metadata */
char *block_checkpoint; /* Block-stored checkpoint */
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 3492905ec0b..0186b98f4cc 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -170,31 +170,32 @@ struct __wt_session_impl {
#define WT_SESSION_CACHE_CURSORS 0x00000004u
#define WT_SESSION_CAN_WAIT 0x00000008u
#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000010u
-#define WT_SESSION_IMPORT_REPAIR 0x00000020u
-#define WT_SESSION_INSTANTIATE_PREPARE 0x00000040u
-#define WT_SESSION_INTERNAL 0x00000080u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000100u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000200u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000400u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000800u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00001000u
-#define WT_SESSION_LOCKED_METADATA 0x00002000u
-#define WT_SESSION_LOCKED_PASS 0x00004000u
-#define WT_SESSION_LOCKED_SCHEMA 0x00008000u
-#define WT_SESSION_LOCKED_SLOT 0x00010000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x00020000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00040000u
-#define WT_SESSION_LOCKED_TURTLE 0x00080000u
-#define WT_SESSION_LOGGING_INMEM 0x00100000u
-#define WT_SESSION_NO_DATA_HANDLES 0x00200000u
-#define WT_SESSION_NO_LOGGING 0x00400000u
-#define WT_SESSION_NO_RECONCILE 0x00800000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x01000000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u
-#define WT_SESSION_READ_WONT_NEED 0x04000000u
-#define WT_SESSION_RESOLVING_TXN 0x08000000u
-#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u
-#define WT_SESSION_SCHEMA_TXN 0x20000000u
+#define WT_SESSION_IMPORT 0x00000020u
+#define WT_SESSION_IMPORT_REPAIR 0x00000040u
+#define WT_SESSION_INSTANTIATE_PREPARE 0x00000080u
+#define WT_SESSION_INTERNAL 0x00000100u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000200u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000400u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000800u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00001000u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00002000u
+#define WT_SESSION_LOCKED_METADATA 0x00004000u
+#define WT_SESSION_LOCKED_PASS 0x00008000u
+#define WT_SESSION_LOCKED_SCHEMA 0x00010000u
+#define WT_SESSION_LOCKED_SLOT 0x00020000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x00040000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x00080000u
+#define WT_SESSION_LOCKED_TURTLE 0x00100000u
+#define WT_SESSION_LOGGING_INMEM 0x00200000u
+#define WT_SESSION_NO_DATA_HANDLES 0x00400000u
+#define WT_SESSION_NO_LOGGING 0x00800000u
+#define WT_SESSION_NO_RECONCILE 0x01000000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x02000000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x04000000u
+#define WT_SESSION_READ_WONT_NEED 0x08000000u
+#define WT_SESSION_RESOLVING_TXN 0x10000000u
+#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000000u
+#define WT_SESSION_SCHEMA_TXN 0x40000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index eaff9f721c4..200d63759a1 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -401,7 +401,7 @@ struct __wt_connection_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_mix_ts;
+ int64_t cache_hs_key_truncate_non_ts;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -836,7 +836,20 @@ struct __wt_dsrc_stats {
int64_t cache_eviction_walk_from_root;
int64_t cache_eviction_walk_saved_pos;
int64_t cache_eviction_hazard;
+ int64_t cache_hs_insert;
+ int64_t cache_hs_insert_restart;
+ int64_t cache_hs_order_lose_durable_timestamp;
+ int64_t cache_hs_order_fixup_move;
+ int64_t cache_hs_order_fixup_insert;
int64_t cache_hs_read;
+ int64_t cache_hs_read_miss;
+ int64_t cache_hs_read_squash;
+ int64_t cache_hs_key_truncate_rts_unstable;
+ int64_t cache_hs_key_truncate_rts;
+ int64_t cache_hs_key_truncate;
+ int64_t cache_hs_key_truncate_onpage_removal;
+ int64_t cache_hs_key_truncate_non_ts;
+ int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
int64_t cache_eviction_internal;
@@ -893,8 +906,10 @@ struct __wt_dsrc_stats {
int64_t cursor_reopen;
int64_t cursor_cache;
int64_t cursor_create;
+ int64_t cursor_next_hs_tombstone;
int64_t cursor_next_skip_ge_100;
int64_t cursor_next_skip_lt_100;
+ int64_t cursor_prev_hs_tombstone;
int64_t cursor_prev_skip_ge_100;
int64_t cursor_prev_skip_lt_100;
int64_t cursor_insert;
@@ -956,6 +971,12 @@ struct __wt_dsrc_stats {
int64_t rec_time_window_stop_txn;
int64_t session_compact;
int64_t txn_read_race_prepare_update;
+ int64_t txn_rts_hs_stop_older_than_newer_start;
+ int64_t txn_rts_keys_removed;
+ int64_t txn_rts_keys_restored;
+ int64_t txn_rts_hs_restore_tombstones;
+ int64_t txn_rts_sweep_hs_keys;
+ int64_t txn_rts_hs_removed;
int64_t txn_update_conflict;
};
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 7d4aed9df66..f5568a8b7e1 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -877,29 +877,33 @@ struct __wt_session {
* @config{incremental = (, configure the cursor for block incremental backup usage. These
* formats are only compatible with the backup data source; see @ref backup., a set of
* related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled,
- * whether to configure this backup as the starting point for a subsequent incremental
- * backup., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;file, the
- * file name when opening a duplicate incremental backup cursor. That duplicate cursor will
- * return the block modifications relevant to the given file name., a string; default
- * empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;force_stop, causes all block incremental backup
- * information to be released. This is on an open_cursor call and the resources will be
- * released when this cursor is closed. No other operations should be done on this open
- * cursor., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;granularity,
- * this setting manages the granularity of how WiredTiger maintains modification maps
- * internally. The larger the granularity\, the smaller amount of information WiredTiger
- * need to maintain., an integer between 4KB and 2GB; default \c 16MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;src_id, a string that identifies a previous checkpoint
- * backup source as the source of this incremental backup. This identifier must have
- * already been created by use of the 'this_id' configuration in an earlier backup. A
- * source id is required to begin an incremental backup., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;this_id, a string that identifies the current system
- * state as a future backup source for an incremental backup via 'src_id'. This identifier
- * is required when opening an incremental backup cursor and an error will be returned if
- * one is not provided., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;
+ * consolidate, causes block incremental backup information to be consolidated if adjacent
+ * granularity blocks are modified. If false\, information will be returned in granularity
+ * sized blocks only. This must be set on the primary backup cursor and it applies to all
+ * files for this backup., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, whether to configure this backup as the starting
+ * point for a subsequent incremental backup., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;file, the file name when opening a duplicate incremental
+ * backup cursor. That duplicate cursor will return the block modifications relevant to the
+ * given file name., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;force_stop,
+ * causes all block incremental backup information to be released. This is on an
+ * open_cursor call and the resources will be released when this cursor is closed. No other
+ * operations should be done on this open cursor., a boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;granularity, this setting manages the granularity of how
+ * WiredTiger maintains modification maps internally. The larger the granularity\, the
+ * smaller amount of information WiredTiger need to maintain., an integer between 4KB and
+ * 2GB; default \c 16MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;src_id, a string that identifies a
+ * previous checkpoint backup source as the source of this incremental backup. This
+ * identifier must have already been created by use of the 'this_id' configuration in an
+ * earlier backup. A source id is required to begin an incremental backup., a string;
+ * default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;this_id, a string that identifies the
+ * current system state as a future backup source for an incremental backup via 'src_id'.
+ * This identifier is required when opening an incremental backup cursor and an error will
+ * be returned if one is not provided., a string; default empty.}
* @config{ ),,}
* @config{next_random, configure the cursor to return a pseudo-random record from the
* object when the WT_CURSOR::next method is called; valid only for row-store cursors. See
@@ -1075,9 +1079,6 @@ struct __wt_session {
* \c none.}
* @config{format, the file format., a string\, chosen from the following options: \c
* "btree"; default \c btree.}
- * @config{huffman_key, configure Huffman encoding for keys. Permitted values are \c
- * "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more
- * information., a string; default \c none.}
* @config{huffman_value, configure Huffman encoding for values. Permitted values are \c
* "none"\, \c "english"\, \c "utf8<file>" or \c "utf16<file>". See @ref huffman for more
* information., a string; default \c none.}
@@ -4928,9 +4929,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1105
/*!
* cache: history store table truncation to remove range of updates due
- * to mixed timestamps
+ * to non timestamped update on data page
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_MIX_TS 1106
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1106
/*! cache: history store table writes requiring squashed modifies */
#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1107
/*! cache: in-memory page passed criteria to be split */
@@ -5960,366 +5961,438 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2061
/*! cache: hazard pointer blocked page eviction */
#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2062
+/*! cache: history store table insert calls */
+#define WT_STAT_DSRC_CACHE_HS_INSERT 2063
+/*! cache: history store table insert calls that returned restart */
+#define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2064
+/*!
+ * cache: history store table out-of-order resolved updates that lose
+ * their durable timestamp
+ */
+#define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2065
+/*!
+ * cache: history store table out-of-order updates that were fixed up by
+ * moving existing records
+ */
+#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_MOVE 2066
+/*!
+ * cache: history store table out-of-order updates that were fixed up
+ * during insertion
+ */
+#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_INSERT 2067
/*! cache: history store table reads */
-#define WT_STAT_DSRC_CACHE_HS_READ 2063
+#define WT_STAT_DSRC_CACHE_HS_READ 2068
+/*! cache: history store table reads missed */
+#define WT_STAT_DSRC_CACHE_HS_READ_MISS 2069
+/*! cache: history store table reads requiring squashed modifies */
+#define WT_STAT_DSRC_CACHE_HS_READ_SQUASH 2070
+/*!
+ * cache: history store table truncation by rollback to stable to remove
+ * an unstable update
+ */
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 2071
+/*!
+ * cache: history store table truncation by rollback to stable to remove
+ * an update
+ */
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_RTS 2072
+/*! cache: history store table truncation to remove an update */
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE 2073
+/*!
+ * cache: history store table truncation to remove range of updates due
+ * to key being removed from the data page during reconciliation
+ */
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 2074
+/*!
+ * cache: history store table truncation to remove range of updates due
+ * to non timestamped update on data page
+ */
+#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_NON_TS 2075
+/*! cache: history store table writes requiring squashed modifies */
+#define WT_STAT_DSRC_CACHE_HS_WRITE_SQUASH 2076
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2064
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2077
/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2065
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2078
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2066
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2079
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2067
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2080
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2068
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2081
/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2069
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2082
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2070
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2083
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2071
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2084
/*! cache: page written requiring history store records */
-#define WT_STAT_DSRC_CACHE_WRITE_HS 2072
+#define WT_STAT_DSRC_CACHE_WRITE_HS 2085
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2073
+#define WT_STAT_DSRC_CACHE_READ 2086
/*! cache: pages read into cache after truncate */
-#define WT_STAT_DSRC_CACHE_READ_DELETED 2074
+#define WT_STAT_DSRC_CACHE_READ_DELETED 2087
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2075
+#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2088
/*! cache: pages requested from the cache */
-#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2076
+#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2089
/*! cache: pages seen by eviction walk */
-#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2077
+#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2090
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2078
+#define WT_STAT_DSRC_CACHE_WRITE 2091
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2079
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2092
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2080
+#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2093
/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2081
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2094
/*!
* cache_walk: Average difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2082
+#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2095
/*!
* cache_walk: Average on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2083
+#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2096
/*!
* cache_walk: Average time in cache for pages that have been visited by
* the eviction server, only reported if cache_walk or all statistics are
* enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2084
+#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2097
/*!
* cache_walk: Average time in cache for pages that have not been visited
* by the eviction server, only reported if cache_walk or all statistics
* are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2085
+#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2098
/*!
* cache_walk: Clean pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2086
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2099
/*!
* cache_walk: Current eviction generation, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2087
+#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2100
/*!
* cache_walk: Dirty pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2088
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2101
/*!
* cache_walk: Entries in the root page, only reported if cache_walk or
* all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2089
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2102
/*!
* cache_walk: Internal pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2090
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2103
/*!
* cache_walk: Leaf pages currently in cache, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2091
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2104
/*!
* cache_walk: Maximum difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2092
+#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2105
/*!
* cache_walk: Maximum page size seen, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2093
+#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2106
/*!
* cache_walk: Minimum on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2094
+#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2107
/*!
* cache_walk: Number of pages never visited by eviction server, only
* reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2095
+#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2108
/*!
* cache_walk: On-disk page image sizes smaller than a single allocation
* unit, only reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2096
+#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2109
/*!
* cache_walk: Pages created in memory and never written, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2097
+#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2110
/*!
* cache_walk: Pages currently queued for eviction, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2098
+#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2111
/*!
* cache_walk: Pages that could not be queued for eviction, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2099
+#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2112
/*!
* cache_walk: Refs skipped during cache traversal, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2100
+#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2113
/*!
* cache_walk: Size of the root page, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2101
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2114
/*!
* cache_walk: Total number of pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES 2102
+#define WT_STAT_DSRC_CACHE_STATE_PAGES 2115
/*! checkpoint-cleanup: pages added for eviction */
-#define WT_STAT_DSRC_CC_PAGES_EVICT 2103
+#define WT_STAT_DSRC_CC_PAGES_EVICT 2116
/*! checkpoint-cleanup: pages removed */
-#define WT_STAT_DSRC_CC_PAGES_REMOVED 2104
+#define WT_STAT_DSRC_CC_PAGES_REMOVED 2117
/*! checkpoint-cleanup: pages skipped during tree walk */
-#define WT_STAT_DSRC_CC_PAGES_WALK_SKIPPED 2105
+#define WT_STAT_DSRC_CC_PAGES_WALK_SKIPPED 2118
/*! checkpoint-cleanup: pages visited */
-#define WT_STAT_DSRC_CC_PAGES_VISITED 2106
+#define WT_STAT_DSRC_CC_PAGES_VISITED 2119
/*!
* compression: compressed page maximum internal page size prior to
* compression
*/
-#define WT_STAT_DSRC_COMPRESS_PRECOMP_INTL_MAX_PAGE_SIZE 2107
+#define WT_STAT_DSRC_COMPRESS_PRECOMP_INTL_MAX_PAGE_SIZE 2120
/*!
* compression: compressed page maximum leaf page size prior to
* compression
*/
-#define WT_STAT_DSRC_COMPRESS_PRECOMP_LEAF_MAX_PAGE_SIZE 2108
+#define WT_STAT_DSRC_COMPRESS_PRECOMP_LEAF_MAX_PAGE_SIZE 2121
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2109
+#define WT_STAT_DSRC_COMPRESS_READ 2122
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2110
+#define WT_STAT_DSRC_COMPRESS_WRITE 2123
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2111
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2124
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2112
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2125
/*! cursor: Total number of entries skipped by cursor next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_TOTAL 2113
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_TOTAL 2126
/*! cursor: Total number of entries skipped by cursor prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_TOTAL 2114
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_TOTAL 2127
/*!
* cursor: Total number of entries skipped to position the history store
* cursor
*/
-#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2115
+#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2128
/*! cursor: bulk loaded cursor insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2116
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2129
/*! cursor: cache cursors reuse count */
-#define WT_STAT_DSRC_CURSOR_REOPEN 2117
+#define WT_STAT_DSRC_CURSOR_REOPEN 2130
/*! cursor: close calls that result in cache */
-#define WT_STAT_DSRC_CURSOR_CACHE 2118
+#define WT_STAT_DSRC_CURSOR_CACHE 2131
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2119
+#define WT_STAT_DSRC_CURSOR_CREATE 2132
+/*!
+ * cursor: cursor next calls that skip due to a globally visible history
+ * store tombstone
+ */
+#define WT_STAT_DSRC_CURSOR_NEXT_HS_TOMBSTONE 2133
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2120
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2134
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2121
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2135
+/*!
+ * cursor: cursor prev calls that skip due to a globally visible history
+ * store tombstone
+ */
+#define WT_STAT_DSRC_CURSOR_PREV_HS_TOMBSTONE 2136
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2122
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2137
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2123
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2138
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2124
+#define WT_STAT_DSRC_CURSOR_INSERT 2139
/*! cursor: insert key and value bytes */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2125
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2140
/*! cursor: modify */
-#define WT_STAT_DSRC_CURSOR_MODIFY 2126
+#define WT_STAT_DSRC_CURSOR_MODIFY 2141
/*! cursor: modify key and value bytes affected */
-#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES 2127
+#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES 2142
/*! cursor: modify value bytes modified */
-#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES_TOUCH 2128
+#define WT_STAT_DSRC_CURSOR_MODIFY_BYTES_TOUCH 2143
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2129
+#define WT_STAT_DSRC_CURSOR_NEXT 2144
/*! cursor: open cursor count */
-#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2130
+#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2145
/*! cursor: operation restarted */
-#define WT_STAT_DSRC_CURSOR_RESTART 2131
+#define WT_STAT_DSRC_CURSOR_RESTART 2146
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2132
+#define WT_STAT_DSRC_CURSOR_PREV 2147
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2133
+#define WT_STAT_DSRC_CURSOR_REMOVE 2148
/*! cursor: remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2134
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2149
/*! cursor: reserve calls */
-#define WT_STAT_DSRC_CURSOR_RESERVE 2135
+#define WT_STAT_DSRC_CURSOR_RESERVE 2150
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2136
+#define WT_STAT_DSRC_CURSOR_RESET 2151
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2137
+#define WT_STAT_DSRC_CURSOR_SEARCH 2152
/*! cursor: search history store calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_HS 2138
+#define WT_STAT_DSRC_CURSOR_SEARCH_HS 2153
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2139
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2154
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2140
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2155
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2141
+#define WT_STAT_DSRC_CURSOR_UPDATE 2156
/*! cursor: update key and value bytes */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2142
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2157
/*! cursor: update value size change */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES_CHANGED 2143
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES_CHANGED 2158
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2144
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2159
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2145
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2160
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2146
+#define WT_STAT_DSRC_REC_DICTIONARY 2161
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2147
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2162
/*!
* reconciliation: internal page key bytes discarded using suffix
* compression
*/
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2148
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2163
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2149
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2164
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2150
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2165
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2151
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2166
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2152
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2167
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2153
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2168
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2154
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2169
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2155
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2170
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2156
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2171
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2157
+#define WT_STAT_DSRC_REC_PAGES 2172
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2158
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2173
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2159
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2174
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2160
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2175
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2161
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2176
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2162
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2177
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2163
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2178
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2164
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2179
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2165
+#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2180
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2166
+#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2181
/*! reconciliation: pages written including at least one prepare */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_PREPARED 2167
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_PREPARED 2182
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2168
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2183
/*! reconciliation: pages written including at least one start timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TS 2169
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TS 2184
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2170
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2185
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2171
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2186
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2172
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2187
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2173
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2188
/*! reconciliation: records written including a prepare */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PREPARED 2174
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PREPARED 2189
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2175
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2190
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2176
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2191
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2177
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2192
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2178
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2193
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2179
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2194
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2180
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2195
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2181
+#define WT_STAT_DSRC_SESSION_COMPACT 2196
/*! transaction: race to read prepared update retry */
-#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2182
+#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2197
+/*!
+ * transaction: rollback to stable hs records with stop timestamps older
+ * than newer records
+ */
+#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2198
+/*! transaction: rollback to stable keys removed */
+#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2199
+/*! transaction: rollback to stable keys restored */
+#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2200
+/*! transaction: rollback to stable restored tombstones from history store */
+#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2201
+/*! transaction: rollback to stable sweeping history store keys */
+#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2202
+/*! transaction: rollback to stable updates removed from history store */
+#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2203
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2183
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2204
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index b0e0b4c4585..a64fb5acdfe 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -141,6 +141,8 @@ struct __wt_cursor_data_source;
typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE;
struct __wt_cursor_dump;
typedef struct __wt_cursor_dump WT_CURSOR_DUMP;
+struct __wt_cursor_hs;
+typedef struct __wt_cursor_hs WT_CURSOR_HS;
struct __wt_cursor_index;
typedef struct __wt_cursor_index WT_CURSOR_INDEX;
struct __wt_cursor_join;
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index aa9191bdc6a..150880625a6 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -501,7 +501,7 @@ __wt_meta_ckptlist_get(
config = NULL;
WT_ERR(__wt_metadata_search(session, fname, &config));
- WT_ERR(__wt_meta_ckptlist_get_with_config(session, update, ckptbasep, config));
+ WT_ERR(__wt_meta_ckptlist_get_from_config(session, update, ckptbasep, config));
err:
__wt_free(session, config);
@@ -509,11 +509,11 @@ err:
}
/*
- * __wt_meta_ckptlist_get_with_config --
+ * __wt_meta_ckptlist_get_from_config --
* Provided a metadata config, load all available checkpoint information for a file.
*/
int
-__wt_meta_ckptlist_get_with_config(
+__wt_meta_ckptlist_get_from_config(
WT_SESSION_IMPL *session, bool update, WT_CKPT **ckptbasep, const char *config)
{
WT_CKPT *ckpt, *ckptbase;
@@ -697,6 +697,16 @@ __ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_C
goto format;
ckpt->write_gen = (uint64_t)a.val;
+ /*
+ * If runtime write generation isn't supplied, this means that we're doing an upgrade and that
+ * we're opening the tree for the first time. We should just leave it as 0 so it is recognized
+ * as part of a previous run.
+ */
+ ret = __wt_config_subgets(session, v, "run_write_gen", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->run_write_gen = (uint64_t)a.val;
+
return (0);
format:
@@ -789,12 +799,13 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM
"=(addr=\"%.*s\",order=%" PRId64 ",time=%" PRIu64 ",size=%" PRId64
",newest_start_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64 ",newest_txn=%" PRId64
",newest_stop_durable_ts=%" PRId64 ",newest_stop_ts=%" PRId64 ",newest_stop_txn=%" PRId64
- ",prepare=%d,write_gen=%" PRId64 ")",
+ ",prepare=%d,write_gen=%" PRId64 ",run_write_gen=%" PRId64 ")",
(int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec,
(int64_t)ckpt->size, (int64_t)ckpt->ta.newest_start_durable_ts,
(int64_t)ckpt->ta.oldest_start_ts, (int64_t)ckpt->ta.newest_txn,
(int64_t)ckpt->ta.newest_stop_durable_ts, (int64_t)ckpt->ta.newest_stop_ts,
- (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen));
+ (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen,
+ (int64_t)ckpt->run_write_gen));
}
WT_RET(__wt_buf_catfmt(session, buf, ")"));
@@ -860,6 +871,44 @@ __wt_ckpt_blkmod_to_meta(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckpt)
}
/*
+ * __wt_meta_ckptlist_update_config --
+ * Provided a metadata config and list of checkpoints, set a file's checkpoint value.
+ */
+int
+__wt_meta_ckptlist_update_config(
+ WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *oldcfg, char **newcfgp)
+{
+ WT_CKPT *ckpt;
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ char *newcfg;
+ const char *cfg[3];
+
+ newcfg = NULL;
+ WT_RET(__wt_scr_alloc(session, 1024, &buf));
+ WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf));
+
+ /* Add backup block modifications for any added checkpoint. */
+ WT_CKPT_FOREACH (ckptbase, ckpt)
+ if (F_ISSET(ckpt, WT_CKPT_ADD))
+ WT_ERR(__wt_ckpt_blkmod_to_meta(session, buf, ckpt));
+
+ /* Replace the checkpoint entry. */
+ cfg[0] = oldcfg;
+ cfg[1] = buf->mem;
+ cfg[2] = NULL;
+ WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
+
+ *newcfgp = newcfg;
+
+err:
+ if (ret != 0)
+ __wt_free(session, newcfg);
+ __wt_scr_free(session, &buf);
+ return (ret);
+}
+
+/*
* __wt_meta_ckptlist_set --
* Set a file's checkpoint value from the WT_CKPT list.
*/
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index cac21e793bc..9f6c0449059 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -156,11 +156,6 @@ __rec_cell_build_leaf_key(
WT_RET(__wt_buf_set(session, &key->buf, (uint8_t *)data + pfx, size - pfx));
}
- /* Optionally compress the key using the Huffman engine. */
- if (btree->huffman_key != NULL)
- WT_RET(__wt_huffman_encode(
- session, btree->huffman_key, key->buf.data, (uint32_t)key->buf.size, &key->buf));
-
/* Create an overflow object if the data won't fit. */
if (key->buf.size > btree->maxleafkey) {
/*
@@ -923,6 +918,7 @@ __wt_rec_row_leaf(
session, btree->id, tmpkey, WT_TS_NONE, false));
WT_ERR(__wt_hs_cursor_close(session));
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
}
}
@@ -971,8 +967,7 @@ __wt_rec_row_leaf(
kpack = &_kpack;
__wt_cell_unpack_kv(session, page->dsk, cell, kpack);
- if (btree->huffman_key == NULL && kpack->type == WT_CELL_KEY &&
- tmpkey->size >= kpack->prefix && tmpkey->size != 0) {
+ if (kpack->type == WT_CELL_KEY && tmpkey->size >= kpack->prefix && tmpkey->size != 0) {
/*
* Grow the buffer as necessary, ensuring data data has been copied into local
* buffer space, then append the suffix to the prefix already in the buffer.
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index 84af4dffa0a..f49bea7250e 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -62,7 +62,7 @@ __check_imported_ts(WT_SESSION_IMPL *session, const char *uri, const char *confi
ckptbase = NULL;
txn_global = &S2C(session)->txn_global;
- WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_with_config(session, false, &ckptbase, config), true);
+ WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config), true);
if (ret == WT_NOTFOUND)
WT_ERR_MSG(session, EINVAL,
"%s: import could not find any checkpoint information in supplied metadata", uri);
@@ -218,18 +218,18 @@ __create_file(
;
*p = val->data;
WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
- WT_ERR(__wt_metadata_insert(session, uri, fileconf));
} else {
- /* Read the data file's descriptor block and try to recreate the associated metadata. */
+ /* Try to recreate the associated metadata from the imported data source. */
WT_ERR(__wt_import_repair(session, uri, &fileconf));
}
+ WT_ERR(__wt_metadata_insert(session, uri, fileconf));
/*
* Ensure that the timestamps in the imported data file are not in the future relative to
* our oldest timestamp.
*/
if (import)
- WT_ERR(__check_imported_ts(session, filename, fileconf));
+ WT_ERR(__check_imported_ts(session, uri, fileconf));
}
/*
@@ -663,16 +663,18 @@ __create_table(
WT_CONFIG_ITEM cgkey, cgval, ckey, cval;
WT_DECL_RET;
WT_TABLE *table;
- size_t cgsize;
+ size_t len;
int ncolgroups, nkeys;
- char *tableconf, *cgname;
+ char *cgcfg, *cgname, *filecfg, *filename, *importcfg, *tablecfg;
const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL};
const char *tablename;
bool import_repair;
- cgname = NULL;
+ import_repair = false;
+
+ cgcfg = filecfg = importcfg = tablecfg = NULL;
+ cgname = filename = NULL;
table = NULL;
- tableconf = NULL;
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
@@ -680,7 +682,7 @@ __create_table(
WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
/* Check if the table already exists. */
- if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) {
+ if ((ret = __wt_metadata_search(session, uri, &tablecfg)) != WT_NOTFOUND) {
/*
* Regardless of the 'exclusive' flag, we should raise an error if we try to import an
* existing URI rather than just silently returning.
@@ -707,6 +709,13 @@ __create_table(
"'repair' option is provided",
uri);
WT_ERR_NOTFOUND_OK(ret, false);
+ } else {
+ /* Try to recreate the associated metadata from the imported data source. */
+ len = strlen("file:") + strlen(tablename) + strlen(".wt") + 1;
+ WT_ERR(__wt_calloc_def(session, len, &filename));
+ WT_ERR(__wt_snprintf(filename, len, "file:%s.wt", tablename));
+ WT_ERR(__wt_import_repair(session, filename, &filecfg));
+ cfg[2] = filecfg;
}
}
@@ -716,14 +725,24 @@ __create_table(
;
WT_ERR_NOTFOUND_OK(ret, false);
- WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
- WT_ERR(__wt_metadata_insert(session, uri, tableconf));
+ WT_ERR(__wt_config_collapse(session, cfg, &tablecfg));
+ WT_ERR(__wt_metadata_insert(session, uri, tablecfg));
if (ncolgroups == 0) {
- cgsize = strlen("colgroup:") + strlen(tablename) + 1;
- WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
- WT_ERR(__wt_snprintf(cgname, cgsize, "colgroup:%s", tablename));
- WT_ERR(__create_colgroup(session, cgname, exclusive, config));
+ len = strlen("colgroup:") + strlen(tablename) + 1;
+ WT_ERR(__wt_calloc_def(session, len, &cgname));
+ WT_ERR(__wt_snprintf(cgname, len, "colgroup:%s", tablename));
+ if (import_repair) {
+ len =
+ strlen(tablecfg) + strlen(",import=(enabled,file_metadata=())") + strlen(filecfg) + 1;
+ WT_ERR(__wt_calloc_def(session, len, &importcfg));
+ WT_ERR(__wt_snprintf(
+ importcfg, len, "%s,import=(enabled,file_metadata=(%s))", tablecfg, filecfg));
+ cfg[2] = importcfg;
+ WT_ERR(__wt_config_collapse(session, &cfg[1], &cgcfg));
+ WT_ERR(__create_colgroup(session, cgname, exclusive, cgcfg));
+ } else
+ WT_ERR(__create_colgroup(session, cgname, exclusive, config));
}
/*
@@ -739,8 +758,12 @@ __create_table(
err:
WT_TRET(__wt_schema_release_table(session, &table));
+ __wt_free(session, cgcfg);
__wt_free(session, cgname);
- __wt_free(session, tableconf);
+ __wt_free(session, filecfg);
+ __wt_free(session, filename);
+ __wt_free(session, importcfg);
+ __wt_free(session, tablecfg);
return (ret);
}
@@ -798,6 +821,8 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
* back it all out.
*/
WT_RET(__wt_meta_track_on(session));
+ if (import)
+ F_SET(session, WT_SESSION_IMPORT);
if (WT_PREFIX_MATCH(uri, "colgroup:"))
ret = __create_colgroup(session, uri, exclusive, config);
@@ -816,6 +841,7 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
ret = __wt_bad_object_type(session, uri);
session->dhandle = NULL;
+ F_CLR(session, WT_SESSION_IMPORT);
WT_TRET(__wt_meta_track_off(session, true, ret != 0));
return (ret);
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 340bddba73c..5700b72fd44 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -66,7 +66,22 @@ static const char *const __stats_dsrc_desc[] = {
"cache: eviction walks started from root of tree",
"cache: eviction walks started from saved location in tree",
"cache: hazard pointer blocked page eviction",
+ "cache: history store table insert calls",
+ "cache: history store table insert calls that returned restart",
+ "cache: history store table out-of-order resolved updates that lose their durable timestamp",
+ "cache: history store table out-of-order updates that were fixed up by moving existing records",
+ "cache: history store table out-of-order updates that were fixed up during insertion",
"cache: history store table reads",
+ "cache: history store table reads missed",
+ "cache: history store table reads requiring squashed modifies",
+ "cache: history store table truncation by rollback to stable to remove an unstable update",
+ "cache: history store table truncation by rollback to stable to remove an update",
+ "cache: history store table truncation to remove an update",
+ "cache: history store table truncation to remove range of updates due to key being removed from "
+ "the data page during reconciliation",
+ "cache: history store table truncation to remove range of updates due to non timestamped update "
+ "on data page",
+ "cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
"cache: internal pages evicted",
@@ -125,8 +140,10 @@ static const char *const __stats_dsrc_desc[] = {
"cursor: cache cursors reuse count",
"cursor: close calls that result in cache",
"cursor: create calls",
+ "cursor: cursor next calls that skip due to a globally visible history store tombstone",
"cursor: cursor next calls that skip greater than or equal to 100 entries",
"cursor: cursor next calls that skip less than 100 entries",
+ "cursor: cursor prev calls that skip due to a globally visible history store tombstone",
"cursor: cursor prev calls that skip greater than or equal to 100 entries",
"cursor: cursor prev calls that skip less than 100 entries",
"cursor: insert calls",
@@ -188,6 +205,12 @@ static const char *const __stats_dsrc_desc[] = {
"reconciliation: records written including a stop transaction ID",
"session: object compaction",
"transaction: race to read prepared update retry",
+ "transaction: rollback to stable hs records with stop timestamps older than newer records",
+ "transaction: rollback to stable keys removed",
+ "transaction: rollback to stable keys restored",
+ "transaction: rollback to stable restored tombstones from history store",
+ "transaction: rollback to stable sweeping history store keys",
+ "transaction: rollback to stable updates removed from history store",
"transaction: update conflicts",
};
@@ -292,7 +315,20 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_eviction_walk_from_root = 0;
stats->cache_eviction_walk_saved_pos = 0;
stats->cache_eviction_hazard = 0;
+ stats->cache_hs_insert = 0;
+ stats->cache_hs_insert_restart = 0;
+ stats->cache_hs_order_lose_durable_timestamp = 0;
+ stats->cache_hs_order_fixup_move = 0;
+ stats->cache_hs_order_fixup_insert = 0;
stats->cache_hs_read = 0;
+ stats->cache_hs_read_miss = 0;
+ stats->cache_hs_read_squash = 0;
+ stats->cache_hs_key_truncate_rts_unstable = 0;
+ stats->cache_hs_key_truncate_rts = 0;
+ stats->cache_hs_key_truncate = 0;
+ stats->cache_hs_key_truncate_onpage_removal = 0;
+ stats->cache_hs_key_truncate_non_ts = 0;
+ stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
stats->cache_eviction_internal = 0;
@@ -349,8 +385,10 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cursor_reopen = 0;
stats->cursor_cache = 0;
stats->cursor_create = 0;
+ stats->cursor_next_hs_tombstone = 0;
stats->cursor_next_skip_ge_100 = 0;
stats->cursor_next_skip_lt_100 = 0;
+ stats->cursor_prev_hs_tombstone = 0;
stats->cursor_prev_skip_ge_100 = 0;
stats->cursor_prev_skip_lt_100 = 0;
stats->cursor_insert = 0;
@@ -412,6 +450,12 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->rec_time_window_stop_txn = 0;
stats->session_compact = 0;
stats->txn_read_race_prepare_update = 0;
+ stats->txn_rts_hs_stop_older_than_newer_start = 0;
+ stats->txn_rts_keys_removed = 0;
+ stats->txn_rts_keys_restored = 0;
+ stats->txn_rts_hs_restore_tombstones = 0;
+ stats->txn_rts_sweep_hs_keys = 0;
+ stats->txn_rts_hs_removed = 0;
stats->txn_update_conflict = 0;
}
@@ -502,7 +546,20 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_eviction_walk_from_root += from->cache_eviction_walk_from_root;
to->cache_eviction_walk_saved_pos += from->cache_eviction_walk_saved_pos;
to->cache_eviction_hazard += from->cache_eviction_hazard;
+ to->cache_hs_insert += from->cache_hs_insert;
+ to->cache_hs_insert_restart += from->cache_hs_insert_restart;
+ to->cache_hs_order_lose_durable_timestamp += from->cache_hs_order_lose_durable_timestamp;
+ to->cache_hs_order_fixup_move += from->cache_hs_order_fixup_move;
+ to->cache_hs_order_fixup_insert += from->cache_hs_order_fixup_insert;
to->cache_hs_read += from->cache_hs_read;
+ to->cache_hs_read_miss += from->cache_hs_read_miss;
+ to->cache_hs_read_squash += from->cache_hs_read_squash;
+ to->cache_hs_key_truncate_rts_unstable += from->cache_hs_key_truncate_rts_unstable;
+ to->cache_hs_key_truncate_rts += from->cache_hs_key_truncate_rts;
+ to->cache_hs_key_truncate += from->cache_hs_key_truncate;
+ to->cache_hs_key_truncate_onpage_removal += from->cache_hs_key_truncate_onpage_removal;
+ to->cache_hs_key_truncate_non_ts += from->cache_hs_key_truncate_non_ts;
+ to->cache_hs_write_squash += from->cache_hs_write_squash;
to->cache_inmem_splittable += from->cache_inmem_splittable;
to->cache_inmem_split += from->cache_inmem_split;
to->cache_eviction_internal += from->cache_eviction_internal;
@@ -559,8 +616,10 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cursor_reopen += from->cursor_reopen;
to->cursor_cache += from->cursor_cache;
to->cursor_create += from->cursor_create;
+ to->cursor_next_hs_tombstone += from->cursor_next_hs_tombstone;
to->cursor_next_skip_ge_100 += from->cursor_next_skip_ge_100;
to->cursor_next_skip_lt_100 += from->cursor_next_skip_lt_100;
+ to->cursor_prev_hs_tombstone += from->cursor_prev_hs_tombstone;
to->cursor_prev_skip_ge_100 += from->cursor_prev_skip_ge_100;
to->cursor_prev_skip_lt_100 += from->cursor_prev_skip_lt_100;
to->cursor_insert += from->cursor_insert;
@@ -623,6 +682,12 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->rec_time_window_stop_txn += from->rec_time_window_stop_txn;
to->session_compact += from->session_compact;
to->txn_read_race_prepare_update += from->txn_read_race_prepare_update;
+ to->txn_rts_hs_stop_older_than_newer_start += from->txn_rts_hs_stop_older_than_newer_start;
+ to->txn_rts_keys_removed += from->txn_rts_keys_removed;
+ to->txn_rts_keys_restored += from->txn_rts_keys_restored;
+ to->txn_rts_hs_restore_tombstones += from->txn_rts_hs_restore_tombstones;
+ to->txn_rts_sweep_hs_keys += from->txn_rts_sweep_hs_keys;
+ to->txn_rts_hs_removed += from->txn_rts_hs_removed;
to->txn_update_conflict += from->txn_update_conflict;
}
@@ -708,7 +773,23 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_eviction_walk_from_root += WT_STAT_READ(from, cache_eviction_walk_from_root);
to->cache_eviction_walk_saved_pos += WT_STAT_READ(from, cache_eviction_walk_saved_pos);
to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard);
+ to->cache_hs_insert += WT_STAT_READ(from, cache_hs_insert);
+ to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart);
+ to->cache_hs_order_lose_durable_timestamp +=
+ WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp);
+ to->cache_hs_order_fixup_move += WT_STAT_READ(from, cache_hs_order_fixup_move);
+ to->cache_hs_order_fixup_insert += WT_STAT_READ(from, cache_hs_order_fixup_insert);
to->cache_hs_read += WT_STAT_READ(from, cache_hs_read);
+ to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss);
+ to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash);
+ to->cache_hs_key_truncate_rts_unstable +=
+ WT_STAT_READ(from, cache_hs_key_truncate_rts_unstable);
+ to->cache_hs_key_truncate_rts += WT_STAT_READ(from, cache_hs_key_truncate_rts);
+ to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
+ to->cache_hs_key_truncate_onpage_removal +=
+ WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
+ to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
+ to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
to->cache_eviction_internal += WT_STAT_READ(from, cache_eviction_internal);
@@ -767,8 +848,10 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
to->cursor_cache += WT_STAT_READ(from, cursor_cache);
to->cursor_create += WT_STAT_READ(from, cursor_create);
+ to->cursor_next_hs_tombstone += WT_STAT_READ(from, cursor_next_hs_tombstone);
to->cursor_next_skip_ge_100 += WT_STAT_READ(from, cursor_next_skip_ge_100);
to->cursor_next_skip_lt_100 += WT_STAT_READ(from, cursor_next_skip_lt_100);
+ to->cursor_prev_hs_tombstone += WT_STAT_READ(from, cursor_prev_hs_tombstone);
to->cursor_prev_skip_ge_100 += WT_STAT_READ(from, cursor_prev_skip_ge_100);
to->cursor_prev_skip_lt_100 += WT_STAT_READ(from, cursor_prev_skip_lt_100);
to->cursor_insert += WT_STAT_READ(from, cursor_insert);
@@ -835,6 +918,13 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->rec_time_window_stop_txn += WT_STAT_READ(from, rec_time_window_stop_txn);
to->session_compact += WT_STAT_READ(from, session_compact);
to->txn_read_race_prepare_update += WT_STAT_READ(from, txn_read_race_prepare_update);
+ to->txn_rts_hs_stop_older_than_newer_start +=
+ WT_STAT_READ(from, txn_rts_hs_stop_older_than_newer_start);
+ to->txn_rts_keys_removed += WT_STAT_READ(from, txn_rts_keys_removed);
+ to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored);
+ to->txn_rts_hs_restore_tombstones += WT_STAT_READ(from, txn_rts_hs_restore_tombstones);
+ to->txn_rts_sweep_hs_keys += WT_STAT_READ(from, txn_rts_sweep_hs_keys);
+ to->txn_rts_hs_removed += WT_STAT_READ(from, txn_rts_hs_removed);
to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
}
@@ -951,7 +1041,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to mixed timestamps",
+ "cache: history store table truncation to remove range of updates due to non timestamped update "
+ "on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -1467,7 +1558,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_mix_ts = 0;
+ stats->cache_hs_key_truncate_non_ts = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -1964,7 +2055,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_mix_ts += WT_STAT_READ(from, cache_hs_key_truncate_mix_ts);
+ to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index ce0e736226c..de902b8d442 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -139,6 +139,49 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
}
/*
+ * __wt_txn_active --
+ * Check if a transaction is still active. If not, it is either committed, prepared, or rolled
+ * back. It is possible that we race with commit, prepare or rollback and a transaction is still
+ * active before the start of the call is eventually reported as resolved.
+ */
+bool
+__wt_txn_active(WT_SESSION_IMPL *session, uint64_t txnid)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_SHARED *s;
+ uint64_t oldest_id;
+ uint32_t i, session_cnt;
+ bool active;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+ active = true;
+
+ /* We're going to scan the table: wait for the lock. */
+ __wt_readlock(session, &txn_global->rwlock);
+ oldest_id = txn_global->oldest_id;
+
+ if (WT_TXNID_LT(txnid, oldest_id)) {
+ active = false;
+ goto done;
+ }
+
+ /* Walk the array of concurrent transactions. */
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+ for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ /* If the transaction is in the list, it is uncommitted. */
+ if (s->id == txnid)
+ goto done;
+ }
+
+ active = false;
+done:
+ __wt_readunlock(session, &txn_global->rwlock);
+ return (active);
+}
+
+/*
* __txn_get_snapshot_int --
* Allocate a snapshot, optionally update our shared txn ids.
*/
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 54a13cedff0..8178e593f6b 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -1580,6 +1580,7 @@ __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGAT
WT_CKPT_FOREACH (ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
ckpt->write_gen = btree->write_gen;
+ ckpt->run_write_gen = btree->run_write_gen;
WT_TIME_AGGREGATE_COPY(&ckpt->ta, ta);
}
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 0b5b7da92c0..6e042fc9b33 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -841,11 +841,19 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(ret);
}
- /* Check whether the history store exists. */
- WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
-
/* Scan the metadata to find the live files and their IDs. */
WT_ERR(__recovery_file_scan(&r));
+
+ /*
+ * Check whether the history store exists.
+ *
+ * This will open a dhandle on the history store and initialize its write gen so we must ensure
+ * that the connection-wide base write generation is stable at this point. Performing a recovery
+ * file scan will involve updating the connection-wide base write generation so we MUST do this
+ * before checking for the existence of a history store file.
+ */
+ WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
+
/*
* Clear this out. We no longer need it and it could have been re-allocated when scanning the
* files.
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 75569b8b057..90ec9389deb 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -267,8 +267,10 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts ||
first_record);
- if (hs_stop_durable_ts < newer_hs_durable_ts)
+ if (hs_stop_durable_ts < newer_hs_durable_ts) {
WT_STAT_CONN_INCR(session, txn_rts_hs_stop_older_than_newer_start);
+ WT_STAT_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start);
+ }
/*
* Stop processing when we find the newer version value of this key is stable according to
@@ -319,6 +321,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_ERR(__wt_hs_modify(cbt, hs_upd));
WT_STAT_CONN_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_rts_unstable);
+ WT_STAT_DATA_INCR(session, txn_rts_hs_removed);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_rts_unstable);
}
if (replace) {
@@ -369,10 +373,12 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
tombstone->next = upd;
upd = tombstone;
WT_STAT_CONN_INCR(session, txn_rts_hs_restore_tombstones);
+ WT_STAT_DATA_INCR(session, txn_rts_hs_restore_tombstones);
}
} else {
WT_ERR(__wt_upd_alloc_tombstone(session, &upd, NULL));
WT_STAT_CONN_INCR(session, txn_rts_keys_removed);
+ WT_STAT_DATA_INCR(session, txn_rts_keys_removed);
__wt_verbose(session, WT_VERB_RTS, "%p: key removed", (void *)key);
}
@@ -385,6 +391,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_ERR(__wt_hs_modify(cbt, hs_upd));
WT_STAT_CONN_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_rts);
+ WT_STAT_DATA_INCR(session, txn_rts_hs_removed);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_rts);
}
if (0) {
@@ -439,6 +447,7 @@ __rollback_abort_row_ondisk_kv(
__wt_timestamp_to_string(rollback_timestamp, ts_string[4]));
WT_RET(__wt_upd_alloc_tombstone(session, &upd, NULL));
WT_STAT_CONN_INCR(session, txn_rts_sweep_hs_keys);
+ WT_STAT_DATA_INCR(session, txn_rts_sweep_hs_keys);
} else
return (0);
} else if (vpack->tw.durable_start_ts > rollback_timestamp ||
@@ -458,6 +467,7 @@ __rollback_abort_row_ondisk_kv(
*/
WT_RET(__wt_upd_alloc_tombstone(session, &upd, NULL));
WT_STAT_CONN_INCR(session, txn_rts_keys_removed);
+ WT_STAT_DATA_INCR(session, txn_rts_keys_removed);
}
} else if (WT_TIME_WINDOW_HAS_STOP(&vpack->tw) &&
(vpack->tw.durable_stop_ts > rollback_timestamp || prepared)) {
@@ -473,6 +483,7 @@ __rollback_abort_row_ondisk_kv(
upd->start_ts = vpack->tw.start_ts;
F_SET(upd, WT_UPDATE_RESTORED_FROM_DS);
WT_STAT_CONN_INCR(session, txn_rts_keys_restored);
+ WT_STAT_DATA_INCR(session, txn_rts_keys_restored);
__wt_verbose(session, WT_VERB_RTS,
"key restored with commit timestamp: %s, durable timestamp: %s txnid: %" PRIu64
"and removed commit timestamp: %s, durable timestamp: %s, txnid: %" PRIu64
@@ -1055,6 +1066,8 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
WT_ERR(__wt_hs_modify(cbt, hs_upd));
WT_STAT_CONN_INCR(session, txn_rts_hs_removed);
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_rts);
+ WT_STAT_DATA_INCR(session, txn_rts_hs_removed);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_rts);
hs_upd = NULL;
}
WT_ERR_NOTFOUND_OK(ret, false);
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 7cb58074db2..03d5d06cff4 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -617,11 +617,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
__wt_timestamp_to_string(commit_ts, ts_string[0]),
__wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[1]));
- /*
- * FIXME-WT-4780: Disabled to buy time to understand a test failure.
- * WT_RET(__txn_assert_after_reads(
- * session, "commit", commit_ts, NULL));
- */
+ WT_RET(__txn_assert_after_reads(session, "commit", commit_ts, NULL));
} else {
/*
* For a prepared transaction, the commit timestamp should not be less than the prepare
diff --git a/src/third_party/wiredtiger/test/csuite/incr_backup/main.c b/src/third_party/wiredtiger/test/csuite/incr_backup/main.c
index c9520f5621b..58cb40fee68 100644
--- a/src/third_party/wiredtiger/test/csuite/incr_backup/main.c
+++ b/src/third_party/wiredtiger/test/csuite/incr_backup/main.c
@@ -47,9 +47,13 @@
#define URI_MAX_LEN 32
#define URI_FORMAT "table:t%d-%d"
#define KEY_FORMAT "key-%d-%d"
+#define TABLE_FORMAT "key_format=S,value_format=u"
#define CONN_CONFIG_COMMON "timing_stress_for_test=[backup_rename]"
+#define NUM_ALLOC 5
+static const char *alloc_sizes[] = {"512B", "8K", "64K", "1M", "16M"};
+
static int verbose_level = 0;
static uint64_t seed = 0;
@@ -404,17 +408,31 @@ table_changes(WT_SESSION *session, TABLE *table)
* Create a table for the given slot.
*/
static void
-create_table(WT_SESSION *session, TABLE_INFO *tinfo, uint32_t slot)
+create_table(WT_SESSION *session, WT_RAND_STATE *rand, TABLE_INFO *tinfo, uint32_t slot)
{
- char *uri;
+ uint32_t alloc;
+ char buf[4096], *uri;
+ const char *allocstr;
testutil_assert(!TABLE_VALID(&tinfo->table[slot]));
uri = dcalloc(1, URI_MAX_LEN);
testutil_check(
__wt_snprintf(uri, URI_MAX_LEN, URI_FORMAT, (int)slot, (int)tinfo->table[slot].name_index++));
- VERBOSE(3, "create %s\n", uri);
- testutil_check(session->create(session, uri, "key_format=S,value_format=u"));
+ /*
+ * A quarter of the time use a non-default allocation size on the table. This is set
+ * independently of the granularity to stress mismatched values.
+ */
+ if (__wt_random(rand) % 4 == 0) {
+ alloc = __wt_random(rand) % NUM_ALLOC;
+ allocstr = alloc_sizes[alloc];
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "%s,allocation_size=%s,internal_page_max=%s,leaf_page_max=%s", TABLE_FORMAT, allocstr,
+ allocstr, allocstr));
+ } else
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", TABLE_FORMAT));
+ VERBOSE(3, "create %s: %s\n", uri, buf);
+ testutil_check(session->create(session, uri, buf));
tinfo->table[slot].name = uri;
tinfo->tables_in_use++;
}
@@ -482,6 +500,7 @@ base_backup(WT_CONNECTION *conn, WT_RAND_STATE *rand, const char *home, const ch
char buf[4096];
char *filename;
char granularity_unit;
+ const char *cons;
nfiles = 0;
@@ -505,9 +524,13 @@ base_backup(WT_CONNECTION *conn, WT_RAND_STATE *rand, const char *home, const ch
granularity_unit = 'M';
granularity += 1;
}
+ if (__wt_random(rand) % 2 == 0)
+ cons = ",consolidate=true";
+ else
+ cons = ",consolidate=false";
testutil_check(__wt_snprintf(buf, sizeof(buf),
- "incremental=(granularity=%" PRIu32 "%c,enabled=true,this_id=ID%" PRIu32 ")", granularity,
- granularity_unit, tinfo->full_backup_number));
+ "incremental=(granularity=%" PRIu32 "%c,enabled=true,%s,this_id=ID%" PRIu32 ")", granularity,
+ granularity_unit, cons, tinfo->full_backup_number));
VERBOSE(3, "open_cursor(session, \"backup:\", NULL, \"%s\", &cursor)\n", buf);
testutil_check(session->open_cursor(session, "backup:", NULL, buf, &cursor));
@@ -753,11 +776,11 @@ main(int argc, char *argv[])
WT_RAND_STATE rnd;
WT_SESSION *session;
uint32_t file_max, iter, max_value_size, next_checkpoint, rough_size, slot;
- int ch, ncheckpoints, status;
+ int ch, ncheckpoints, nreopens, status;
const char *backup_verbose, *working_dir;
char conf[1024], home[1024], backup_check[1024], backup_dir[1024], command[4096];
- ncheckpoints = 0;
+ ncheckpoints = nreopens = 0;
(void)testutil_set_progname(argv);
custom_die = die; /* Set our own abort handler */
WT_CLEAR(tinfo);
@@ -859,7 +882,7 @@ main(int argc, char *argv[])
*/
slot = __wt_random(&rnd) % tinfo.table_count;
if (!TABLE_VALID(&tinfo.table[slot]))
- create_table(session, &tinfo, slot);
+ create_table(session, &rnd, &tinfo, slot);
else if (__wt_random(&rnd) % 3 == 0 && do_rename)
rename_table(session, &tinfo, slot);
else if (do_drop)
@@ -877,6 +900,15 @@ main(int argc, char *argv[])
}
}
+ /* Close and reopen the connection once in a while. */
+ if (__wt_random(&rnd) % 10 == 0) {
+ VERBOSE(2, "Close and reopen the connection %d\n", nreopens);
+ testutil_check(conn->close(conn, NULL));
+ testutil_check(wiredtiger_open(home, NULL, conf, &conn));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ nreopens++;
+ }
+
if (iter == 0) {
base_backup(conn, &rnd, home, backup_dir, &tinfo, &active);
check_backup(backup_dir, backup_check, &tinfo);
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index 0179adddf46..7c4c97cb974 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -826,7 +826,8 @@ main(int argc, char *argv[])
cur_shadow->set_key(cur_shadow, kname);
/*
* The collection table should always only have the data as of the checkpoint. The
- * shadow table should always have the exact same data (or not) as the collection table.
+ * shadow table should always have the exact same data (or not) as the collection table,
+ * except for the last key that may be committed after the stable timestamp.
*/
if ((ret = cur_coll->search(cur_coll)) != 0) {
if (ret != WT_NOTFOUND)
@@ -850,7 +851,12 @@ main(int argc, char *argv[])
} else if ((ret = cur_shadow->search(cur_shadow)) != 0) {
if (ret != WT_NOTFOUND)
testutil_die(ret, "shadow search");
- else {
+ /*
+ * We respectively insert the record to the collection table at timestamp t and to
+ * the shadow table at t + 1. If the checkpoint finishes at timestamp t, the last
+ * shadow table record will be removed by rollback to stable after restart.
+ */
+ if (durable_fp <= stable_val) {
printf("%s: SHADOW no record with key %" PRIu64 "\n", fname, key);
absent_shadow++;
}
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 8a627e7fac5..491d4b24cb3 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -284,6 +284,38 @@ functions:
rm -rf "wiredtiger"
rm -rf "wiredtiger.tgz"
+ "run wt hang analyzer":
+ command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix"
+ script: |
+ set -o verbose
+
+ # Dump core (-c) and debugger outputs (-o)
+ wt_hang_analyzer_option="-c -o file -o stdout"
+
+ echo "Calling the wt hang analyzer ..."
+ PATH="/opt/mongodbtoolchain/gdb/bin:$PATH" ${python_binary|python3} ../test/wt_hang_analyzer/wt_hang_analyzer.py $wt_hang_analyzer_option
+
+ "save wt hang analyzer core/debugger files":
+ - command: archive.targz_pack
+ params:
+ target: "wt-hang-analyzer.tgz"
+ source_dir: "wiredtiger/build_posix"
+ include:
+ - "./*core*"
+ - "./debugger*.*"
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wt-hang-analyzer.tgz
+ bucket: build_external
+ permissions: public-read
+ content_type: application/tar
+ display_name: WT Hang Analyzer Output - Execution ${execution}
+ remote_file: wiredtiger/${build_variant}/${revision}/wt_hang_analyzer/wt-hang-analyzer_${task_name}_${build_id}${postfix|}.tgz
+
"dump stderr/stdout":
command: shell.exec
params:
@@ -423,8 +455,11 @@ post:
- func: "upload artifact"
vars:
postfix: -${execution}
+ - func: "save wt hang analyzer core/debugger files"
- func: "dump stderr/stdout"
- func: "cleanup"
+timeout:
+ - func: "run wt hang analyzer"
tasks:
# Base compile task on posix flavours
@@ -1658,6 +1693,17 @@ tasks:
set -o verbose
test/evergreen/compatibility_test_for_releases.sh -w
+ - name: import-compatibility-test
+ commands:
+ - func: "get project"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ test/evergreen/import_compatibility_test.sh
+
- name: generate-datafile-little-endian
depends_on:
- name: compile
@@ -1921,7 +1967,7 @@ tasks:
extra_args: leak_memory=0 mmap=1 file_type=row checkpoints=0 in_memory=1 reverse=1 truncate=1
- func: "format test"
vars:
- extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=zlib huffman_key=1 huffman_value=1
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=zlib huffman_value=1
# FIXME-WT-6668: temporarily disable lower isolation level test
# - func: "format test"
# vars:
@@ -2541,6 +2587,7 @@ buildvariants:
- ubuntu1804-test
tasks:
- name: compatibility-test-for-newer-releases
+ - name: import-compatibility-test
- name: windows-64
display_name: "! Windows 64-bit"
diff --git a/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh b/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh
new file mode 100755
index 00000000000..6f0ba25ee09
--- /dev/null
+++ b/src/third_party/wiredtiger/test/evergreen/import_compatibility_test.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+#
+# Test importing of files created in previous versions of WiredTiger.
+# Test that we can downgrade a database after importing a file.
+
+set -e
+
+# build_branch --
+# 1: branch
+build_branch()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Building branch: \"$1\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ # Clone if it doesn't already exist.
+ if [ ! -d "$1" ]; then
+ git clone --quiet https://github.com/wiredtiger/wiredtiger.git "$1"
+ fi
+ cd "$1"
+
+ git checkout --quiet "$1"
+
+ config=""
+ config+="--enable-snappy "
+ (sh build_posix/reconf &&
+ ./configure $config && make -j $(grep -c ^processor /proc/cpuinfo)) > /dev/null
+ cd ..
+}
+
+# create_file --
+# 1: branch
+# 2: file
+create_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Branch \"$1\" creating and populating \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ uri="file:$2"
+
+ # Make the home directory.
+ mkdir -p $test_dir
+
+ # Create the file and populate with a few key/values.
+ $wt_cmd -h $test_dir create -c "key_format=S,value_format=S" $uri
+ $wt_cmd -h $test_dir write $uri abc 123 def 456 hij 789
+}
+
+# import_file --
+# 1: dest branch
+# 2: source branch
+# 3: file
+import_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Importing file \"$3\" from \"$1\" to \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ mkdir -p $test_dir
+
+ # Move the file across to the destination branch's home directory.
+ import_file="$2/WT_TEST/$3"
+ cp $import_file $test_dir
+
+ # Run import via the wt tool.
+ uri="file:$3"
+ $wt_cmd -h $test_dir create -c "import=(enabled,repair=true)" $uri
+}
+
+# verify_file --
+# 1: branch
+# 2: file
+verify_file()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Branch \"$1\" verifying \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ wt_cmd="$1/wt"
+ test_dir="$1/WT_TEST/"
+ uri="file:$2"
+
+ $wt_cmd -h $test_dir verify $uri
+}
+
+# cleanup_branch --
+# 1: branch
+cleanup_branch()
+{
+ test_dir="$1/WT_TEST/"
+ if [ -d $test_dir ]; then
+ rm -rf $test_dir
+ fi
+}
+
+# import_compatibility_test --
+# 1: newer branch
+# 2: older branch
+import_compatibility_test()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Testing import compatibility between \"$1\" and \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ # Remove any leftover data files.
+ cleanup_branch $1
+ cleanup_branch $2
+
+ # Create a file in the older branch.
+ create_file $2 test_import
+
+ # Now import it into the newer branch and verify.
+ import_file $1 $2 test_import
+ verify_file $1 test_import
+
+ # Now downgrade by running wt from the older branch and dumping the table contents.
+ #
+ # Before trying this, we must remove the base configuration. The wt tool produces this file
+ # however MongoDB will not so we should emulate this.
+ rm $1/WT_TEST/WiredTiger.basecfg
+ $2/wt -h $1/WT_TEST/ dump file:test_import
+}
+
+# Release branches.
+#
+# Go all the way back to mongodb-4.2 since that's the first release where we don't support live
+# import.
+release_branches=(develop mongodb-5.0 mongodb-4.4 mongodb-4.2)
+
+# Build each of the release branches.
+for b in ${release_branches[@]}; do
+ build_branch $b
+done
+
+for i in ${!release_branches[@]}; do
+ newer=${release_branches[$i]}
+
+ # MongoDB v4.2 doesn't support live import so it should only ever be used as the "older" branch
+ # that we're importing from.
+ if [ $newer = mongodb-4.2 ]; then
+ continue
+ fi
+
+ older=${release_branches[$i+1]}
+ import_compatibility_test $newer $older
+done
diff --git a/src/third_party/wiredtiger/test/format/CONFIG.stress b/src/third_party/wiredtiger/test/format/CONFIG.stress
index 65a93e05821..e5ec1e5f754 100644
--- a/src/third_party/wiredtiger/test/format/CONFIG.stress
+++ b/src/third_party/wiredtiger/test/format/CONFIG.stress
@@ -1,5 +1,4 @@
# A reasonable configuration for stress testing.
-btree.huffman_key=0
btree.huffman_value=0
cache.minimum=20
runs.rows=1000000:5000000
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index 8a358343aeb..51eccbf75c9 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -242,7 +242,7 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
WT_DECL_RET;
size_t len, tmp_sz;
ssize_t rdsize;
- uint64_t offset, size, type;
+ uint64_t offset, size, this_size, total, type;
int rfd, wfd1, wfd2;
char config[512], *tmp;
bool first_pass;
@@ -286,25 +286,35 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
first_pass = false;
}
- if (tmp_sz < size) {
- tmp = drealloc(tmp, size);
- tmp_sz = size;
+ this_size = WT_MIN(size, BACKUP_MAX_COPY);
+ if (tmp_sz < this_size) {
+ tmp = drealloc(tmp, this_size);
+ tmp_sz = this_size;
}
- /*
- * Don't use the system checker for lseek. The system check macro uses an int which is
- * often 4 bytes and checks for any negative value. The offset returned from lseek is
- * 8 bytes and we can have a false positive error check.
- */
if (lseek(rfd, (wt_off_t)offset, SEEK_SET) == -1)
testutil_die(errno, "backup-read: lseek");
- error_sys_check(rdsize = read(rfd, tmp, size));
if (lseek(wfd1, (wt_off_t)offset, SEEK_SET) == -1)
testutil_die(errno, "backup-write1: lseek");
if (lseek(wfd2, (wt_off_t)offset, SEEK_SET) == -1)
testutil_die(errno, "backup-write2: lseek");
- /* Use the read size since we may have read less than the granularity. */
- error_sys_check(write(wfd1, tmp, (size_t)rdsize));
- error_sys_check(write(wfd2, tmp, (size_t)rdsize));
+ total = 0;
+ while (total < size) {
+ /*
+ * Don't use the system checker for lseek. The system check macro uses an int which
+ * is often 4 bytes and checks for any negative value. The offset returned from
+ * lseek is 8 bytes and we can have a false positive error check.
+ */
+ /* Use the read size since we may have read less than the granularity. */
+ error_sys_check(rdsize = read(rfd, tmp, this_size));
+ /* If we get EOF, we're done. */
+ if (rdsize == 0)
+ break;
+ error_sys_check(write(wfd1, tmp, (size_t)rdsize));
+ error_sys_check(write(wfd2, tmp, (size_t)rdsize));
+ total += (uint64_t)rdsize;
+ offset += (uint64_t)rdsize;
+ this_size = WT_MIN(this_size, size - total);
+ }
} else {
testutil_assert(type == WT_BACKUP_FILE);
testutil_assert(first_pass == true);
@@ -574,9 +584,10 @@ backup(void *arg)
else
active_now = &active[0];
src_id = g.backup_id - 1;
+ /* Use consolidation too. */
testutil_check(__wt_snprintf(cfg, sizeof(cfg),
- "incremental=(enabled,src_id=%" PRIu64 ",this_id=%" PRIu64 ")", src_id,
- g.backup_id));
+ "incremental=(enabled,consolidate=true,src_id=%" PRIu64 ",this_id=%" PRIu64 ")",
+ src_id, g.backup_id));
/* Restart a full incremental every once in a while. */
full = false;
incr_full = mmrand(NULL, 1, 8) == 1;
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 605ae4655fb..321dc33ddd2 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -88,9 +88,6 @@ static CONFIG c[] = {
NULL},
/* 20% */
- {"btree.huffman_key", "configure huffman encoded keys", C_BOOL, 20, 0, 0, &g.c_huffman_key, NULL},
-
- /* 20% */
{"btree.huffman_value", "configure huffman encoded values", C_BOOL, 20, 0, 0, &g.c_huffman_value,
NULL},
diff --git a/src/third_party/wiredtiger/test/format/config_compat.c b/src/third_party/wiredtiger/test/format/config_compat.c
index 69011fddfea..6bd784c1031 100644
--- a/src/third_party/wiredtiger/test/format/config_compat.c
+++ b/src/third_party/wiredtiger/test/format/config_compat.c
@@ -89,8 +89,6 @@ static const char *list[] = {
"runs.type",
"firstfit=",
"disk.firstfit",
- "huffman_key=",
- "btree.huffman_key",
"huffman_value=",
"btree.huffman_value",
"in_memory=",
diff --git a/src/third_party/wiredtiger/test/format/config_compat.sed b/src/third_party/wiredtiger/test/format/config_compat.sed
index 5b138afb355..0f43b19fc6a 100644
--- a/src/third_party/wiredtiger/test/format/config_compat.sed
+++ b/src/third_party/wiredtiger/test/format/config_compat.sed
@@ -5,7 +5,6 @@ s/^backup=/backups=/
s/^btree.bitcnt=/bitcnt=/
s/^btree.compression=/compression=/
s/^btree.dictionary=/dictionary=/
-s/^btree.huffman_key=/huffman_key=/
s/^btree.huffman_value=/huffman_value=/
s/^btree.internal_key_truncation=/internal_key_truncation=/
s/^btree.internal_page_max=/internal_page_max=/
diff --git a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-5637 b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-5637
index 32f0baa246b..68338d050ae 100644
--- a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-5637
+++ b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-5637
@@ -9,7 +9,6 @@ backup.incr_granularity=3973
btree.bitcnt=4
btree.compression=none
btree.dictionary=0
-btree.huffman_key=0
btree.huffman_value=0
btree.internal_key_truncation=1
btree.internal_page_max=9
diff --git a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6725 b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6725
index 0e9041c57f3..144d9aaadd4 100644
--- a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6725
+++ b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6725
@@ -9,7 +9,6 @@
btree.bitcnt=6
btree.compression=none
btree.dictionary=0
- btree.huffman_key=0
btree.huffman_value=0
btree.internal_key_truncation=1
btree.internal_page_max=15
diff --git a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6727 b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6727
index 554d86c6c13..62b3f7bba5f 100644
--- a/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6727
+++ b/src/third_party/wiredtiger/test/format/failure_configs/CONFIG.WT-6727
@@ -9,7 +9,6 @@ backup.incr_granularity=16
btree.bitcnt=7
btree.compression=snappy
btree.dictionary=0
-btree.huffman_key=0
btree.huffman_value=0
btree.internal_key_truncation=1
btree.internal_page_max=12
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 05645d4c37a..27ed68ece19 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -53,6 +53,7 @@
#define BACKUP_INFO_FILE "BACKUP_INFO" /* Format's backup information for restart */
#define BACKUP_INFO_FILE_TMP "BACKUP_INFO.TMP" /* Format's backup information for restart */
+#define BACKUP_MAX_COPY MEGABYTE(64) /* Maximum size we'll read/write at a time */
#define WT_NAME "wt" /* Object name */
#define DATASOURCE(v) (strcmp(v, g.c_data_source) == 0 ? 1 : 0)
@@ -170,7 +171,6 @@ typedef struct {
char *c_file_type;
uint32_t c_firstfit;
uint32_t c_hs_cursor;
- uint32_t c_huffman_key;
uint32_t c_huffman_value;
uint32_t c_in_memory;
uint32_t c_independent_thread_rng;
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
index 5cffe3a2274..8392d4a18ce 100755
--- a/src/third_party/wiredtiger/test/format/format.sh
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -49,9 +49,9 @@ smoke_list=(
# "$smoke_base_1 file_type=var"
# Huffman key/value encoding.
- "$smoke_base_1 file_type=row huffman_key=1 huffman_value=1"
+ "$smoke_base_1 file_type=row huffman_value=1"
# Temporarily disabled
- # "$smoke_base_1 file_type=var huffman_key=1 huffman_value=1"
+ # "$smoke_base_1 file_type=var huffman_value=1"
# LSM
# Temporarily disabled
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index a3295d16f88..d017d28ff38 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -317,8 +317,6 @@ create_object(WT_CONNECTION *conn)
CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt);
break;
case ROW:
- if (g.c_huffman_key)
- CONFIG_APPEND(p, ",huffman_key=english");
if (g.c_prefix_compression)
CONFIG_APPEND(p, ",prefix_compression_min=%" PRIu32, g.c_prefix_compression_min);
else
diff --git a/src/third_party/wiredtiger/test/suite/test_backup11.py b/src/third_party/wiredtiger/test/suite/test_backup11.py
index 76fa70c4b2b..3e6bd347b03 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup11.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup11.py
@@ -44,7 +44,6 @@ class test_backup11(wttest.WiredTigerTestCase, suite_subprocess):
uri="table:test"
def add_data(self):
-
c = self.session.open_cursor(self.uri)
for i in range(0, self.nops):
num = i + (self.mult * self.nops)
@@ -136,6 +135,16 @@ class test_backup11(wttest.WiredTigerTestCase, suite_subprocess):
self.pr("Opened backup for error testing")
# Now test all the error cases with an incremental primary open.
+ # - We cannot specify consolidate on the duplicate cursor.
+ config = 'incremental=(consolidate=true,file=test.wt)'
+ msg = "/consolidation can only be specified on a primary/"
+ self.pr("Test consolidation on a dup")
+ self.pr("=========")
+ # Test multiple duplicate backup cursors.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.assertEquals(self.session.open_cursor(None,
+ bkup_c, config), 0), msg)
+
# - We cannot make multiple incremental duplcate backup cursors.
# - We cannot duplicate the duplicate backup cursor.
config = 'incremental=(file=test.wt)'
diff --git a/src/third_party/wiredtiger/test/suite/test_backup12.py b/src/third_party/wiredtiger/test/suite/test_backup12.py
index f5fadcee393..90ca31aa76e 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup12.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup12.py
@@ -51,7 +51,6 @@ class test_backup12(wttest.WiredTigerTestCase, suite_subprocess):
bigval = 'Value' * 100
def add_data(self, uri):
-
c = self.session.open_cursor(uri)
for i in range(0, self.nops):
num = i + (self.mult * self.nops)
diff --git a/src/third_party/wiredtiger/test/suite/test_backup13.py b/src/third_party/wiredtiger/test/suite/test_backup13.py
index 445cbaa6dc1..10faed087ca 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup13.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup13.py
@@ -48,8 +48,25 @@ class test_backup13(wttest.WiredTigerTestCase, suite_subprocess):
bigkey = 'Key' * 100
bigval = 'Value' * 100
- def add_data(self, uri):
+ def simulate_crash_restart(self, olddir, newdir):
+ ''' Simulate a crash from olddir and restart in newdir. '''
+ # with the connection still open, copy files to new directory
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ for fname in os.listdir(olddir):
+ fullname = os.path.join(olddir, fname)
+ # Skip lock file on Windows since it is locked
+ if os.path.isfile(fullname) and \
+ "WiredTiger.lock" not in fullname and \
+ "Tmplog" not in fullname and \
+ "Preplog" not in fullname:
+ shutil.copy(fullname, newdir)
+ # close the original connection and open to new directory
+ self.close_conn()
+ self.conn = self.setUpConnectionOpen(newdir)
+ self.session = self.setUpSessionOpen(self.conn)
+ def add_data(self, uri):
c = self.session.open_cursor(uri)
for i in range(0, self.nops):
num = i + (self.mult * self.nops)
@@ -157,8 +174,15 @@ class test_backup13(wttest.WiredTigerTestCase, suite_subprocess):
# Make sure after a force stop we cannot access old backup info.
config = 'incremental=(src_id="ID1",this_id="ID3")'
+
self.assertRaises(wiredtiger.WiredTigerError,
lambda: self.session.open_cursor('backup:', None, config))
+
+ # Make sure after a crash we cannot access old backup info.
+ self.simulate_crash_restart(".", "RESTART")
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor('backup:', None, config))
+
self.reopen_conn()
# Make sure after a restart we cannot access old backup info.
self.assertRaises(wiredtiger.WiredTigerError,
diff --git a/src/third_party/wiredtiger/test/suite/test_backup14.py b/src/third_party/wiredtiger/test/suite/test_backup14.py
index c312020bcef..67ebe68a8af 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup14.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup14.py
@@ -106,10 +106,10 @@ class test_backup14(wttest.WiredTigerTestCase, suite_subprocess):
newfile = cursor.get_key()
if self.counter == 0:
- # Take a full bakcup into each incremental directory
+ # Take a full backup into each incremental directory
for i in range(0, self.max_iteration):
copy_from = newfile
- # If it is log file, prepend the path.
+ # If it is a log file, prepend the path.
if ("WiredTigerLog" in newfile):
copy_to = self.home_incr + '.' + str(i) + '/' + self.logpath
else:
diff --git a/src/third_party/wiredtiger/test/suite/test_backup15.py b/src/third_party/wiredtiger/test/suite/test_backup15.py
index 0327abe9838..509efcacb4f 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup15.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup15.py
@@ -124,10 +124,10 @@ class test_backup15(wttest.WiredTigerTestCase, suite_subprocess):
newfile = bkup_c.get_key()
if self.counter == 0:
- # Take a full bakcup into each incremental directory
+ # Take a full backup into each incremental directory
for i in range(0, self.max_iteration):
copy_from = newfile
- # If it is log file, prepend the path.
+ # If it is a log file, prepend the path.
if ("WiredTigerLog" in newfile):
copy_to = self.home_incr + '.' + str(i) + '/' + self.logpath
else:
diff --git a/src/third_party/wiredtiger/test/suite/test_backup16.py b/src/third_party/wiredtiger/test/suite/test_backup16.py
index 0265c39b73f..82df7c39307 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup16.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup16.py
@@ -63,7 +63,6 @@ class test_backup16(wttest.WiredTigerTestCase, suite_subprocess):
bigval = 'Value' * 10
def add_data(self, uri):
-
c = self.session.open_cursor(uri)
for i in range(0, self.nops):
num = i + (self.mult * self.nops)
diff --git a/src/third_party/wiredtiger/test/suite/test_backup17.py b/src/third_party/wiredtiger/test/suite/test_backup17.py
new file mode 100644
index 00000000000..5fa250fd485
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup17.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, shutil
+from helper import compare_files
+from suite_subprocess import suite_subprocess
+from wtdataset import simple_key
+from wtscenario import make_scenarios
+
+# test_backup17.py
+# Test cursor backup with a block-based incremental cursor and consolidate.
+class test_backup17(wttest.WiredTigerTestCase, suite_subprocess):
+ dir='backup.dir' # Backup directory name
+ gran="100K"
+ granval=100*1024
+ logmax="100K"
+ uri="table:test"
+ uri2="table:test2"
+ nops=1000
+ mult=0
+
+ conn_config='cache_size=1G,log=(enabled,file_max=%s)' % logmax
+
+ pfx = 'test_backup'
+ # Set the key and value big enough that we modify a few blocks.
+ bigkey = 'Key' * 100
+ bigval = 'Value' * 100
+
+ def add_data(self, uri):
+ c = self.session.open_cursor(uri)
+ for i in range(0, self.nops):
+ num = i + (self.mult * self.nops)
+ key = self.bigkey + str(num)
+ val = self.bigval + str(num)
+ c[key] = val
+ self.session.checkpoint()
+ c.close()
+
+ def take_incr_backup(self, id, consolidate):
+ # Open the backup data source for incremental backup.
+ buf = 'incremental=(src_id="ID' + str(id - 1) + '",this_id="ID' + str(id) + '"'
+ if consolidate:
+ buf += ',consolidate=true'
+ buf += ')'
+ bkup_c = self.session.open_cursor('backup:', None, buf)
+ lens = []
+ saw_multiple = False
+ while True:
+ ret = bkup_c.next()
+ if ret != 0:
+ break
+ newfile = bkup_c.get_key()
+ config = 'incremental=(file=' + newfile + ')'
+ self.pr('Open incremental cursor with ' + config)
+ dup_cnt = 0
+ dupc = self.session.open_cursor(None, bkup_c, config)
+ while True:
+ ret = dupc.next()
+ if ret != 0:
+ break
+ incrlist = dupc.get_keys()
+ offset = incrlist[0]
+ size = incrlist[1]
+ curtype = incrlist[2]
+ # 1 is WT_BACKUP_FILE
+ # 2 is WT_BACKUP_RANGE
+ self.assertTrue(curtype == 1 or curtype == 2)
+ if curtype == 1:
+ self.pr('Copy from: ' + newfile + ' (' + str(size) + ') to ' + self.dir)
+ shutil.copy(newfile, self.dir)
+ else:
+ self.pr('Range copy file ' + newfile + ' offset ' + str(offset) + ' len ' + str(size))
+ lens.append(size)
+ rfp = open(newfile, "r+b")
+ wfp = open(self.dir + '/' + newfile, "w+b")
+ rfp.seek(offset, 0)
+ wfp.seek(offset, 0)
+ if size > self.granval:
+ saw_multiple = True
+ buf = rfp.read(size)
+ wfp.write(buf)
+ rfp.close()
+ wfp.close()
+ dup_cnt += 1
+ dupc.close()
+ self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+ bkup_c.close()
+ if consolidate:
+ self.assertTrue(saw_multiple)
+ else:
+ self.assertFalse(saw_multiple)
+ return lens
+
+ def test_backup17(self):
+
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.session.create(self.uri2, "key_format=S,value_format=S")
+ self.add_data(self.uri)
+ self.add_data(self.uri2)
+ self.mult += 1
+
+ # Open up the backup cursor. This causes a new log file to be created.
+ # That log file is not part of the list returned. This is a full backup
+ # primary cursor with incremental configured.
+ os.mkdir(self.dir)
+ config = 'incremental=(enabled,granularity=%s,this_id="ID1")' % self.gran
+ bkup_c = self.session.open_cursor('backup:', None, config)
+
+ # Now copy the files returned by the backup cursor.
+ all_files = []
+ while True:
+ ret = bkup_c.next()
+ if ret != 0:
+ break
+ newfile = bkup_c.get_key()
+ sz = os.path.getsize(newfile)
+ self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir)
+ shutil.copy(newfile, self.dir)
+ all_files.append(newfile)
+ self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+ bkup_c.close()
+
+ # This is the main part of the test for consolidate. Add data to the first table.
+ # Then perform the incremental backup with consolidate off (the default). Then add the
+ # same data to the second table. Perform an incremental backup with consolidate on and
+ # verify we get fewer, consolidated values.
+ self.add_data(self.uri)
+ uri1_lens = self.take_incr_backup(2, False)
+
+ self.add_data(self.uri2)
+ uri2_lens = self.take_incr_backup(3, True)
+
+ # Assert that we recorded fewer lengths on the consolidated backup.
+ self.assertLess(len(uri2_lens), len(uri1_lens))
+ # Assert that we recorded the same total data length for both.
+ self.assertEqual(sum(uri2_lens), sum(uri1_lens))
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup18.py b/src/third_party/wiredtiger/test/suite/test_backup18.py
new file mode 100644
index 00000000000..2034c8ac6d1
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup18.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, shutil
+from helper import compare_files
+from suite_subprocess import suite_subprocess
+from wtdataset import simple_key
+from wtscenario import make_scenarios
+
+# test_backup18.py
+# Test backup:query_id API.
+class test_backup18(wttest.WiredTigerTestCase, suite_subprocess):
+ conn_config= 'cache_size=1G,log=(enabled,file_max=100K)'
+ mult=0
+ nops=100
+ pfx = 'test_backup'
+ uri="table:test"
+
+ def id_check(self, expect):
+ got = []
+ bkup_c = self.session.open_cursor('backup:query_id', None, None)
+ # We cannot use 'for idstr in bkup_c:' usage because backup cursors don't have
+ # values and adding in get_values returns ENOTSUP and causes the usage to fail.
+ while True:
+ ret = bkup_c.next()
+ if ret != 0:
+ break
+ idstr = bkup_c.get_key()
+ got.append(idstr)
+ bkup_c.close()
+ got.sort()
+ expect.sort()
+ self.assertEqual(got, expect)
+
+ def add_data(self):
+ c = self.session.open_cursor(self.uri)
+ for i in range(0, self.nops):
+ num = i + (self.mult * self.nops)
+ key = 'key' + str(num)
+ val = 'value' + str(num)
+ c[key] = val
+ self.mult += 1
+ self.session.checkpoint()
+ c.close()
+
+ def test_backup18(self):
+ # We're not taking actual backups in this test, but we do want a table to
+ # exist for the backup cursor to generate something.
+ self.session.create(self.uri, "key_format=S,value_format=S")
+ self.add_data()
+
+ msg = "/is not configured/"
+ self.pr("Query IDs before any backup")
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.assertEquals(self.session.open_cursor('backup:query_id',
+ None, None), 0), msg)
+
+ # Open up the backup cursor.
+ config = 'incremental=(enabled,this_id="ID1")'
+ bkup_c = self.session.open_cursor('backup:', None, config)
+
+ # Try to open the query cursor as a duplicate on the backup.
+ msg = "/should be passed either/"
+ self.pr("Query IDs as duplicate cursor")
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.assertEquals(self.session.open_cursor('backup:query_id',
+ bkup_c, None), 0), msg)
+
+ # Try to open the query cursor while backup cursor is open.
+ msg = "/there is already a backup/"
+ self.pr("Query IDs while backup cursor open")
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.assertEquals(self.session.open_cursor('backup:query_id',
+ None, None), 0), msg)
+ bkup_c.close()
+
+ # Check a few basic cases.
+ self.pr("Query IDs basic cases")
+ expect = ["ID1"]
+ self.id_check(expect)
+
+ config = 'incremental=(enabled,src_id="ID1",this_id="ID2")'
+ bkup_c = self.session.open_cursor('backup:', None, config)
+ bkup_c.close()
+ expect = ["ID1", "ID2"]
+ self.id_check(expect)
+
+ config = 'incremental=(enabled,src_id="ID2",this_id="ID3")'
+ bkup_c = self.session.open_cursor('backup:', None, config)
+ bkup_c.close()
+ expect = ["ID2", "ID3"]
+ self.id_check(expect)
+
+ self.reopen_conn()
+ self.pr("Query after reopen")
+ expect = ["ID2", "ID3"]
+ self.id_check(expect)
+
+ # Force stop and then recheck. Incremental is no longer configured.
+ msg = "/is not configured/"
+ self.pr("Query after force stop")
+ config = 'incremental=(force_stop=true)'
+ bkup_c = self.session.open_cursor('backup:', None, config)
+ bkup_c.close()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.assertEquals(self.session.open_cursor('backup:query_id',
+ None, None), 0), msg)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup19.py b/src/third_party/wiredtiger/test/suite/test_backup19.py
new file mode 100644
index 00000000000..547445cafbe
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup19.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, shutil
+from helper import compare_files
+from suite_subprocess import suite_subprocess
+from wtdataset import simple_key
+from wtscenario import make_scenarios
+import glob
+
+# test_backup19.py
+# Test cursor backup with a block-based incremental cursor source id only.
+class test_backup19(wttest.WiredTigerTestCase, suite_subprocess):
+ bkp_home = "WT_BLOCK"
+ counter=0
+ conn_config='cache_size=1G,log=(enabled,file_max=100K)'
+ logmax="100K"
+ mult=0
+ nops=10000
+ savefirst=0
+ savekey='NOTSET'
+ uri="table:main"
+
+ dir='backup.dir' # Backup directory name
+ home_full = "WT_BLOCK_LOG_FULL"
+ home_incr = "WT_BLOCK_LOG_INCR"
+
+ full_out = "./backup_block_full"
+ incr_out = "./backup_block_incr"
+ logpath = "logpath"
+ new_table=False
+ initial_backup=False
+
+ pfx = 'test_backup'
+ # Set the key and value big enough that we modify a few blocks.
+ bigkey = 'Key' * 100
+ bigval = 'Value' * 100
+
+ #
+ # Set up all the directories needed for the test. We have a full backup directory for each
+ # iteration and an incremental backup for each iteration. That way we can compare the full and
+ # incremental each time through.
+ #
+ def setup_directories(self):
+ # We're only coming through once so just set up the 0 and 1 directories.
+ for i in range(0, 2):
+ # The log directory is a subdirectory of the home directory,
+ # creating that will make the home directory also.
+ log_dir = self.home_incr + '.' + str(i) + '/' + self.logpath
+ os.makedirs(log_dir)
+ if i != 0:
+ log_dir = self.home_full + '.' + str(i) + '/' + self.logpath
+ os.makedirs(log_dir)
+
+ def range_copy(self, filename, offset, size):
+ read_from = filename
+ old_to = self.home_incr + '.' + str(self.counter - 1) + '/' + filename
+ write_to = self.home_incr + '.' + str(self.counter) + '/' + filename
+ rfp = open(read_from, "r+b")
+ self.pr('RANGE CHECK file ' + old_to + ' offset ' + str(offset) + ' len ' + str(size))
+ rfp2 = open(old_to, "r+b")
+ rfp.seek(offset, 0)
+ rfp2.seek(offset, 0)
+ buf = rfp.read(size)
+ buf2 = rfp2.read(size)
+ # This assertion tests that the offset range we're given actually changed
+ # from the previous backup.
+ self.assertNotEqual(buf, buf2)
+ wfp = open(write_to, "w+b")
+ wfp.seek(offset, 0)
+ wfp.write(buf)
+ rfp.close()
+ rfp2.close()
+ wfp.close()
+
+ def take_full_backup(self):
+ if self.counter != 0:
+ hdir = self.home_full + '.' + str(self.counter)
+ else:
+ hdir = self.home_incr
+
+ #
+ # First time through we take a full backup into the incremental directories. Otherwise only
+ # into the appropriate full directory.
+ #
+ buf = None
+ if self.initial_backup == True:
+ buf = 'incremental=(granularity=1M,enabled=true,this_id=ID0)'
+
+ bkup_c = self.session.open_cursor('backup:', None, buf)
+ # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have
+ # values and adding in get_values returns ENOTSUP and causes the usage to fail.
+ # If that changes then this, and the use of the duplicate below can change.
+ while True:
+ ret = bkup_c.next()
+ if ret != 0:
+ break
+ newfile = bkup_c.get_key()
+
+ if self.counter == 0:
+ # Take a full backup into each incremental directory
+ for i in range(0, 2):
+ copy_from = newfile
+ # If it is a log file, prepend the path.
+ if ("WiredTigerLog" in newfile):
+ copy_to = self.home_incr + '.' + str(i) + '/' + self.logpath
+ else:
+ copy_to = self.home_incr + '.' + str(i)
+ shutil.copy(copy_from, copy_to)
+ else:
+ copy_from = newfile
+ # If it is log file, prepend the path.
+ if ("WiredTigerLog" in newfile):
+ copy_to = hdir + '/' + self.logpath
+ else:
+ copy_to = hdir
+
+ shutil.copy(copy_from, copy_to)
+ self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+ bkup_c.close()
+
+ def take_incr_backup(self):
+ self.assertTrue(self.counter > 0)
+ # Open the backup data source for incremental backup.
+ buf = 'incremental=(src_id="ID' + str(self.counter - 1) + '")'
+ self.pr(buf)
+ bkup_c = self.session.open_cursor('backup:', None, buf)
+
+ # We cannot use 'for newfile in bkup_c:' usage because backup cursors don't have
+ # values and adding in get_values returns ENOTSUP and causes the usage to fail.
+ # If that changes then this, and the use of the duplicate below can change.
+ while True:
+ ret = bkup_c.next()
+ if ret != 0:
+ break
+ newfile = bkup_c.get_key()
+ h = self.home_incr + '.0'
+ copy_from = newfile
+ # If it is log file, prepend the path.
+ if ("WiredTigerLog" in newfile):
+ copy_to = h + '/' + self.logpath
+ else:
+ copy_to = h
+
+ shutil.copy(copy_from, copy_to)
+ first = True
+ config = 'incremental=(file=' + newfile + ')'
+ dup_cnt = 0
+ # For each file listed, open a duplicate backup cursor and copy the blocks.
+ incr_c = self.session.open_cursor(None, bkup_c, config)
+
+ # We cannot use 'for newfile in incr_c:' usage because backup cursors don't have
+ # values and adding in get_values returns ENOTSUP and causes the usage to fail.
+ # If that changes then this, and the use of the duplicate below can change.
+ while True:
+ ret = incr_c.next()
+ if ret != 0:
+ break
+ incrlist = incr_c.get_keys()
+ offset = incrlist[0]
+ size = incrlist[1]
+ curtype = incrlist[2]
+ self.assertTrue(curtype == wiredtiger.WT_BACKUP_FILE or curtype == wiredtiger.WT_BACKUP_RANGE)
+ if curtype == wiredtiger.WT_BACKUP_FILE:
+ # Copy the whole file.
+ if first == True:
+ h = self.home_incr + '.' + str(self.counter)
+ first = False
+
+ copy_from = newfile
+ if ("WiredTigerLog" in newfile):
+ copy_to = h + '/' + self.logpath
+ else:
+ copy_to = h
+ shutil.copy(copy_from, copy_to)
+ else:
+ # Copy the block range.
+ self.pr('Range copy file ' + newfile + ' offset ' + str(offset) + ' len ' + str(size))
+ self.range_copy(newfile, offset, size)
+ dup_cnt += 1
+ self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+ incr_c.close()
+
+ # For each file, we want to copy it into each of the later incremental directories.
+ for i in range(self.counter, 2):
+ h = self.home_incr + '.' + str(i)
+ copy_from = newfile
+ if ("WiredTigerLog" in newfile):
+ copy_to = h + '/' + self.logpath
+ else:
+ copy_to = h
+ shutil.copy(copy_from, copy_to)
+ self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+ bkup_c.close()
+
+ def compare_backups(self, t_uri):
+ # Run wt dump on full backup directory.
+ full_backup_out = self.full_out + '.' + str(self.counter)
+ home_dir = self.home_full + '.' + str(self.counter)
+ if self.counter == 0:
+ home_dir = self.home
+ self.runWt(['-R', '-h', home_dir, 'dump', t_uri], outfilename=full_backup_out)
+
+ # Run wt dump on incremental backup directory.
+ incr_backup_out = self.incr_out + '.' + str(self.counter)
+ home_dir = self.home_incr + '.' + str(self.counter)
+ self.runWt(['-R', '-h', home_dir, 'dump', t_uri], outfilename=incr_backup_out)
+
+ self.assertEqual(True,
+ compare_files(self, full_backup_out, incr_backup_out))
+
+ #
+ # Add data to the given uri.
+ #
+ def add_data(self, uri):
+ c = self.session.open_cursor(uri, None, None)
+ # The first time we want to add in a lot of data. Then after that we want to
+ # rapidly change a single key to create a hotspot in one block.
+ if self.savefirst < 2:
+ nops = self.nops
+ else:
+ nops = self.nops // 10
+ for i in range(0, nops):
+ num = i + (self.mult * nops)
+ if self.savefirst >= 2:
+ key = self.savekey
+ else:
+ key = str(num) + self.bigkey + str(num)
+ val = str(num) + self.bigval + str(num)
+ c[key] = val
+ if self.savefirst == 0:
+ self.savekey = key
+ self.savefirst += 1
+ c.close()
+
+ # Increase the multiplier so that later calls insert unique items.
+ self.mult += 1
+ # Increase the counter so that later backups have unique ids.
+ if self.initial_backup == False:
+ self.counter += 1
+
+ def test_backup19(self):
+ os.mkdir(self.bkp_home)
+ self.home = self.bkp_home
+ self.session.create(self.uri, "key_format=S,value_format=S")
+
+ self.setup_directories()
+
+ self.pr('*** Add data, checkpoint, take backups and validate ***')
+ self.pr('Adding initial data')
+ self.initial_backup = True
+ self.add_data(self.uri)
+ self.take_full_backup()
+ self.initial_backup = False
+ self.session.checkpoint()
+
+ self.add_data(self.uri)
+ self.session.checkpoint()
+ self.take_full_backup()
+ self.take_incr_backup()
+ self.compare_backups(self.uri)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_base02.py b/src/third_party/wiredtiger/test/suite/test_base02.py
index 58685a3196a..7f328f02432 100644
--- a/src/third_party/wiredtiger/test/suite/test_base02.py
+++ b/src/third_party/wiredtiger/test/suite/test_base02.py
@@ -73,7 +73,7 @@ class test_base02(wttest.WiredTigerTestCase):
]
conf_encoding = [
None,
- 'huffman_key=,huffman_value=english',
+ 'huffman_value=english',
]
for size in conf_confsize:
for col in conf_col:
diff --git a/src/third_party/wiredtiger/test/suite/test_base05.py b/src/third_party/wiredtiger/test/suite/test_base05.py
index 8e63715838c..fcdebb90031 100755
--- a/src/third_party/wiredtiger/test/suite/test_base05.py
+++ b/src/third_party/wiredtiger/test/suite/test_base05.py
@@ -42,9 +42,7 @@ class test_base05(wttest.WiredTigerTestCase):
nentries = 1000
scenarios = make_scenarios([
('no_huffman', dict(extraconfig='')),
- ('huffman_key', dict(extraconfig='huffman_key="english"')),
('huffman_val', dict(extraconfig='huffman_value="english"')),
- ('huffman_keyval', dict(extraconfig='huffman_key="english",huffman_value="english"'))
])
def config_string(self):
diff --git a/src/third_party/wiredtiger/test/suite/test_huffman01.py b/src/third_party/wiredtiger/test/suite/test_huffman01.py
index 17683256355..abd44079371 100644
--- a/src/third_party/wiredtiger/test/suite/test_huffman01.py
+++ b/src/third_party/wiredtiger/test/suite/test_huffman01.py
@@ -40,28 +40,16 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess):
"""
table_name = 'table:test_huff'
- huffkey = [
- ('none', dict(huffkey='huffman_key=none',kfile=None)),
- ('english', dict(huffkey='huffman_key=english',kfile=None)),
- ('utf8', dict(huffkey='huffman_key=utf8t8file',kfile='t8file')),
- ('utf16', dict(huffkey='huffman_key=utf16t16file',kfile='t16file')),
- ]
huffval = [
('none', dict(huffval=',huffman_value=none',vfile=None)),
('english', dict(huffval=',huffman_value=english',vfile=None)),
('utf8', dict(huffval=',huffman_value=utf8t8file',vfile='t8file')),
('utf16', dict(huffval=',huffman_value=utf16t16file',vfile='t16file')),
]
- scenarios = make_scenarios(huffkey, huffval)
+ scenarios = make_scenarios(huffval)
def test_huffman(self):
dir = self.conn.get_home()
- if self.kfile != None:
- # For the UTF settings write some made-up frequency information.
- f = open(dir + '/' + self.kfile, 'w')
- f.write('48 546233\n49 460946\n')
- f.write('0x4a 546233\n0x4b 460946\n')
- f.close()
# if self.vfile != None and not os.path.exists(self.vfile):
if self.vfile != None:
f = open(dir + '/' + self.vfile, 'w')
@@ -69,7 +57,7 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess):
f.write('48 546233\n49 460946\n')
f.write('0x4a 546233\n0x4b 460946\n')
f.close()
- config=self.huffkey + self.huffval
+ config= self.huffval
self.session.create(self.table_name, config)
# Test Huffman encoding ranges.
@@ -82,7 +70,7 @@ class test_huffman_range(wttest.WiredTigerTestCase):
f = open(dir + '/t8file', 'w')
f.write('256 546233\n257 460946\n')
f.close()
- config="huffman_key=utf8t8file"
+ config="huffman_value=utf8t8file"
msg = '/not in range/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.create(self.table_name, config), msg)
@@ -93,7 +81,7 @@ class test_huffman_range(wttest.WiredTigerTestCase):
f = open(dir + '/t16file', 'w')
f.write('65536 546233\n65537 460946\n')
f.close()
- config="huffman_key=utf16t16file"
+ config="huffman_value=utf16t16file"
msg = '/not in range/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.create(self.table_name, config), msg)
@@ -105,7 +93,7 @@ class test_huffman_range(wttest.WiredTigerTestCase):
f = open(dir + '/t8file', 'w')
f.write('48 4294967296\n49 4294967297\n')
f.close()
- config="huffman_key=utf8t8file"
+ config="huffman_value=utf8t8file"
msg = '/not in range/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.create(self.table_name, config), msg)
@@ -117,7 +105,7 @@ class test_huffman_range(wttest.WiredTigerTestCase):
f.write('100 546233\n101 460946\n')
f.write('102 546233\n100 460946\n')
f.close()
- config="huffman_key=utf8t8file"
+ config="huffman_value=utf8t8file"
msg = '/duplicate symbol/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.create(self.table_name, config), msg)
diff --git a/src/third_party/wiredtiger/test/suite/test_huffman02.py b/src/third_party/wiredtiger/test/suite/test_huffman02.py
index a9dce6a0c3c..327386466b4 100644
--- a/src/third_party/wiredtiger/test/suite/test_huffman02.py
+++ b/src/third_party/wiredtiger/test/suite/test_huffman02.py
@@ -34,11 +34,6 @@ import wiredtiger, wttest
# test_huffman02.py
# Huffman key and value configurations test.
class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess):
- huffkey = [
- ('bad', dict(keybad=1,huffkey=',huffman_key=bad')),
- ('english', dict(keybad=0,huffkey=',huffman_key=english')),
- ('none', dict(keybad=0,huffkey=',huffman_key=none')),
- ]
huffval = [
('bad', dict(valbad=1,huffval=',huffman_value=bad')),
('english', dict(valbad=0,huffval=',huffman_value=english')),
@@ -48,15 +43,15 @@ class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess):
('file', dict(uri='file:huff')),
('table', dict(uri='table:huff')),
]
- scenarios = make_scenarios(type, huffkey, huffval)
+ scenarios = make_scenarios(type, huffval)
def test_huffman(self):
- if self.keybad or self.valbad:
+ if self.valbad:
msg = '/Invalid argument/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
- self.session.create(self.uri, self.huffkey + self.huffval), msg)
+ self.session.create(self.uri, self.huffval), msg)
else:
- self.session.create(self.uri, self.huffkey + self.huffval)
+ self.session.create(self.uri, self.huffval)
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_import09.py b/src/third_party/wiredtiger/test/suite/test_import09.py
new file mode 100644
index 00000000000..b6b747b9005
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_import09.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_import09.py
+# Import a table with the repair option (no exported metadata).
+
+import os, random, shutil
+from test_import01 import test_import_base
+from wtscenario import make_scenarios
+
+class test_import09(test_import_base):
+ nrows = 100
+ ntables = 1
+ session_config = 'isolation=snapshot'
+
+ allocsizes = [
+ ('512', dict(allocsize='512')),
+ ('1024', dict(allocsize='1024')),
+ ('2048', dict(allocsize='2048')),
+ ('4096', dict(allocsize='4096')),
+ ]
+ compressors = [
+ ('none', dict(compressor='none')),
+ ('nop', dict(compressor='nop')),
+ ('lz4', dict(compressor='lz4')),
+ ('snappy', dict(compressor='snappy')),
+ ('zlib', dict(compressor='zlib')),
+ ('zstd', dict(compressor='zstd')),
+ ]
+ encryptors = [
+ ('none', dict(encryptor='none')),
+ ('nop', dict(encryptor='nop')),
+ ('rotn', dict(encryptor='rotn')),
+ ]
+ tables = [
+ ('simple_table', dict(
+ is_simple = True,
+ keys = [k for k in range(1, nrows+1)],
+ values = random.sample(range(1000000), k=nrows),
+ config = 'key_format=r,value_format=i')),
+ ('table_with_named_columns', dict(
+ is_simple = False,
+ keys = [k for k in range(1, 7)],
+ values = [('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3),
+ ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)],
+ config = 'columns=(id,country,capital,population),key_format=r,value_format=SSi')),
+ ]
+ scenarios = make_scenarios(tables, allocsizes, compressors, encryptors)
+
+ # Check to verify table projections.
+ def check_projections(self, uri, keys, values):
+ for i in range(0, len(keys)):
+ self.check_record(uri + '(country,capital)',
+ keys[i], [values[i][0], values[i][1]])
+ self.check_record(uri + '(country,population)',
+ keys[i], [values[i][0], values[i][2]])
+ self.check_record(uri + '(capital,population)',
+ keys[i], [values[i][1], values[i][2]])
+
+ # Load the compressor extension, skip the test if missing.
+ def conn_extensions(self, extlist):
+ extlist.skip_if_missing = True
+ extlist.extension('compressors', self.compressor)
+ extlist.extension('encryptors', self.encryptor)
+
+ def conn_config(self):
+ return 'cache_size=50MB,log=(enabled),statistics=(all),encryption=(name={})'.format(
+ self.encryptor)
+
+ def test_import_table_repair(self):
+ # Add some tables & data and checkpoint.
+ self.populate(self.ntables, self.nrows)
+ self.session.checkpoint()
+
+ # Create the table targeted for import.
+ original_db_table = 'original_db_table'
+ uri = 'table:' + original_db_table
+ create_config = ('allocation_size={},log=(enabled=true),block_compressor={},'
+ 'encryption=(name={}),') + self.config
+ self.session.create(uri,
+ create_config.format(self.allocsize, self.compressor, self.encryptor))
+
+ keys = self.keys
+ values = self.values
+ ts = [10*k for k in range(1, len(keys)+1)]
+
+ # Add data to our target table and perform a checkpoint.
+ min_idx = 0
+ max_idx = len(keys) // 3
+ for i in range(min_idx, max_idx):
+ self.update(uri, keys[i], values[i], ts[i])
+ self.session.checkpoint()
+
+ # Add more data and checkpoint again.
+ min_idx = max_idx
+ max_idx = 2*len(keys) // 3
+ for i in range(min_idx, max_idx):
+ self.update(uri, keys[i], values[i], ts[i])
+ self.session.checkpoint()
+
+ # Export the file and table metadata so we can verify our repair later.
+ original_db_file_uri = 'file:' + original_db_table + '.wt'
+ c = self.session.open_cursor('metadata:', None, None)
+ original_db_table_config = c[uri]
+ original_db_file_config = c[original_db_file_uri]
+ c.close()
+
+ self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
+ self.printVerbose(3, '\nTable configuration:\n' + original_db_table_config)
+
+ # Close the connection.
+ self.close_conn()
+
+ # Create a new database and connect to it.
+ newdir = 'IMPORT_DB'
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ self.conn = self.setUpConnectionOpen(newdir)
+ self.session = self.setUpSessionOpen(self.conn)
+
+ # Make a bunch of files and fill them with data.
+ self.populate(self.ntables, self.nrows)
+ self.session.checkpoint()
+
+ # Bring forward the oldest to be past or equal to the timestamps we'll be importing.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx]))
+
+ # Copy over the datafile for the table we want to import.
+ self.copy_file(original_db_table + '.wt', '.', newdir)
+
+ # Construct the config string.
+ import_config = 'log=(enabled=true),import=(enabled,repair=true)'
+
+ # Import the file.
+ self.session.create(uri, import_config)
+
+ # Verify object.
+ self.session.verify(uri)
+
+ # Check that the previously inserted values survived the import.
+ self.check(uri, keys[:max_idx], values[:max_idx])
+
+ # Check against projections when the table is not simple.
+ if not self.is_simple:
+ self.check_projections(uri, keys[:max_idx], values[:max_idx])
+
+ # Compare configuration metadata.
+ c = self.session.open_cursor('metadata:', None, None)
+ new_db_file_config = c[original_db_file_uri]
+ new_db_table_config = c[uri]
+ c.close()
+ self.config_compare(original_db_file_config, new_db_file_config)
+ self.config_compare(original_db_table_config, new_db_table_config)
+
+ # Add some data and check that the table operates as usual after importing.
+ min_idx = max_idx
+ max_idx = len(keys)
+ for i in range(min_idx, max_idx):
+ self.update(uri, keys[i], values[i], ts[i])
+ self.check(uri, keys, values)
+ if not self.is_simple:
+ self.check_projections(uri, keys, values)
+
+ # Perform a checkpoint.
+ self.session.checkpoint()
diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump02.py b/src/third_party/wiredtiger/test/suite/test_jsondump02.py
index 9ae8dead18c..080b698d7a2 100755
--- a/src/third_party/wiredtiger/test/suite/test_jsondump02.py
+++ b/src/third_party/wiredtiger/test/suite/test_jsondump02.py
@@ -92,9 +92,6 @@ class test_jsondump02(wttest.WiredTigerTestCase, suite_subprocess):
Create JSON cursors and test them directly, also test
dump/load commands.
"""
- import platform
- if platform.system() == 'Darwin':
- self.skipTest('JSON cursor test for OSX not yet working on Python3')
extra_params = ',allocation_size=512,' +\
'internal_page_max=16384,leaf_page_max=131072'
self.session.create(self.table_uri1,
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp20.py b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
index 63a2503d915..442101fb57a 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp20.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp20.py
@@ -66,14 +66,14 @@ class test_timestamp20(wttest.WiredTigerTestCase):
old_reader_session = self.conn.open_session()
old_reader_cursor = old_reader_session.open_cursor(uri)
- old_reader_session.begin_transaction('read_timestamp=' + timestamp_str(30))
+ old_reader_session.begin_transaction('read_timestamp=' + timestamp_str(20))
# Now put two updates out of order. 5 will go to the history store and will trigger a
# correction to the existing contents.
for i in range(1, 10000):
self.session.begin_transaction()
cursor[str(i)] = value4
- self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(25))
self.session.begin_transaction()
cursor[str(i)] = value5
self.session.commit_transaction('commit_timestamp=' + timestamp_str(40))
@@ -84,7 +84,7 @@ class test_timestamp20(wttest.WiredTigerTestCase):
self.session.rollback_transaction()
for i in range(1, 10000):
- self.assertEqual(old_reader_cursor[str(i)], value3)
+ self.assertEqual(old_reader_cursor[str(i)], value2)
old_reader_session.rollback_transaction()
# In this test we're using modifies since they are more sensitive to corruptions.
@@ -126,7 +126,7 @@ class test_timestamp20(wttest.WiredTigerTestCase):
# has been squashed into a full update.
old_reader_session = self.conn.open_session()
old_reader_cursor = old_reader_session.open_cursor(uri)
- old_reader_session.begin_transaction('read_timestamp=' + timestamp_str(30))
+ old_reader_session.begin_transaction('read_timestamp=' + timestamp_str(20))
# Now apply the last modify.
# This will be the end of the chain of modifies.
@@ -141,7 +141,7 @@ class test_timestamp20(wttest.WiredTigerTestCase):
for i in range(1, 10000):
self.session.begin_transaction()
cursor[str(i)] = value2
- self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(25))
self.session.begin_transaction()
cursor[str(i)] = value3
self.session.commit_transaction('commit_timestamp=' + timestamp_str(50))
@@ -156,7 +156,6 @@ class test_timestamp20(wttest.WiredTigerTestCase):
# Put together expected value.
expected = list(value1)
expected[100] = 'B'
- expected[200] = 'C'
expected = str().join(expected)
# On the other hand, this older transaction SHOULD be able to read past the 5.
diff --git a/src/third_party/wiredtiger/test/suite/test_txn25.py b/src/third_party/wiredtiger/test/suite/test_txn25.py
new file mode 100644
index 00000000000..befdd6cdf24
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn25.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_txn24.py
+# Test the write generation mechanism to ensure that transaction ids get wiped between runs.
+#
+
+import wiredtiger, wttest
+
+class test_txn25(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
+ session_config = 'isolation=snapshot'
+
+ def test_txn25(self):
+ uri = 'file:test_txn25'
+ create_config = 'allocation_size=512,key_format=S,value_format=S'
+ self.session.create(uri, create_config)
+
+ # Populate the file and ensure that we start seeing some high transaction IDs in the system.
+ value1 = 'aaaaa' * 100
+ value2 = 'bbbbb' * 100
+ value3 = 'ccccc' * 100
+
+ # Keep transaction ids around.
+ session2 = self.conn.open_session()
+ session2.begin_transaction()
+
+ cursor = self.session.open_cursor(uri)
+ for i in range(1, 1000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value1
+ self.session.commit_transaction()
+
+ for i in range(1, 1000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value2
+ self.session.commit_transaction()
+
+ for i in range(1, 1000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value3
+ self.session.commit_transaction()
+
+ session2.rollback_transaction()
+ session2.close()
+
+ # Close and re-open the connection.
+ cursor.close()
+ self.conn.close()
+ self.conn = wiredtiger.wiredtiger_open(self.home, self.conn_config)
+ self.session = self.conn.open_session(self.session_config)
+
+ # Now that we've reopened, check that we can view the latest data from the previous run.
+ #
+ # Since we've restarted the system, our transaction IDs are going to begin from 1 again
+ # so we have to wipe the cell's transaction IDs in order to see them.
+ cursor = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ for i in range(1, 1000):
+ self.assertEqual(cursor[str(i)], value3)
+ self.session.rollback_transaction()
diff --git a/src/third_party/wiredtiger/test/suite/test_util01.py b/src/third_party/wiredtiger/test/suite/test_util01.py
index 9dae07f3ca6..3eb457ceb1f 100755
--- a/src/third_party/wiredtiger/test/suite/test_util01.py
+++ b/src/third_party/wiredtiger/test/suite/test_util01.py
@@ -133,7 +133,21 @@ class test_util01(wttest.WiredTigerTestCase, suite_subprocess):
def dump_kv_to_line(self, b):
# The output from dump is a 'u' format.
- return b.strip(b'\x00').decode() + '\n'
+ # Printable chars appear 'as is', unprintable chars
+ # appear as \hh where hh are hex digits.
+ # We can't decode the entire byte array, some Unicode decoders
+ # will complain as the set of bytes don't represent UTF-8 encoded
+ # characters.
+
+ # Create byte representation of printable ascii chars
+ printable_chars = bytes(string.printable, 'ascii')
+ result = ''
+ for byte in b.strip(b'\x00'):
+ if byte in printable_chars:
+ result += bytearray([byte]).decode()
+ else:
+ result += "\\{:02x}".format(byte)
+ return result + '\n'
def write_entries(self, cursor, expectout, hexoutput, commit_timestamp, write_expected):
if commit_timestamp is not None:
@@ -217,9 +231,6 @@ class test_util01(wttest.WiredTigerTestCase, suite_subprocess):
self.dump(False, True, None, None)
def test_dump_api(self):
- import platform
- if platform.system() == 'Darwin':
- self.skipTest('dump API test for OSX not yet working on Python3')
self.dump(True, False, None, None)
def test_dump_api_hex(self):
diff --git a/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py
new file mode 100644
index 00000000000..6c5b7832cb4
--- /dev/null
+++ b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python
+"""Hang Analyzer module.
+
+A prototype hang analyzer for Evergreen integration to help investigate test timeouts.
+
+1. Script supports taking dumps, and/or dumping a summary of useful information about a process.
+2. Script will iterate through a list of interesting processes,
+ and run the tools from step 1. The list of processes can be provided as an option.
+
+Currently only supports Linux. There are two issues with the MacOS and Windows implementations:
+1. WT-6918 - lldb cannot attach to processes in MacOS.
+2. WT-6919 - Windows cannot find the debug symbols.
+"""
+
+import csv, glob, itertools, logging, re, tempfile, traceback
+import os, sys, platform, signal, subprocess, threading, time
+from distutils import spawn
+from io import BytesIO, TextIOWrapper
+from optparse import OptionParser
+_IS_WINDOWS = (sys.platform == "win32")
+
+if _IS_WINDOWS:
+ import win32event
+ import win32api
+
+"""
+Helper class to read output of a subprocess.
+
+Used to avoid deadlocks from the pipe buffer filling up and blocking the subprocess while it's
+being waited on.
+"""
+class LoggerPipe(threading.Thread):
+ """Asynchronously reads the output of a subprocess and sends it to a logger."""
+
+ # The start() and join() methods are not intended to be called directly on the LoggerPipe
+ # instance. Since we override them for that effect, the super's version are preserved here.
+ __start = threading.Thread.start
+ __join = threading.Thread.join
+
+ def __init__(self, logger, level, pipe_out):
+ """Initialize the LoggerPipe with the specified arguments."""
+
+ threading.Thread.__init__(self)
+ # Main thread should not call join() when exiting.
+ self.daemon = True
+
+ self.__logger = logger
+ self.__level = level
+ self.__pipe_out = pipe_out
+
+ self.__lock = threading.Lock()
+ self.__condition = threading.Condition(self.__lock)
+
+ self.__started = False
+ self.__finished = False
+
+ LoggerPipe.__start(self)
+
+ def start(self):
+ """Start not implemented."""
+ raise NotImplementedError("start should not be called directly")
+
+ def run(self):
+ """Read the output from 'pipe_out' and logs each line to 'logger'."""
+
+ with self.__lock:
+ self.__started = True
+ self.__condition.notify_all()
+
+ # Close the pipe when all of the output has been read.
+ with self.__pipe_out:
+ # Avoid buffering the output from the pipe.
+ for line in iter(self.__pipe_out.readline, b""):
+ # Convert the output of the process from a bytestring to a UTF-8 string, and replace
+ # any characters that cannot be decoded with the official Unicode replacement
+ # character, U+FFFD.
+ line = line.decode("utf-8", "replace")
+ self.__logger.log(self.__level, line.rstrip())
+
+ with self.__lock:
+ self.__finished = True
+ self.__condition.notify_all()
+
+ def join(self, timeout=None):
+ """Join not implemented."""
+ raise NotImplementedError("join should not be called directly")
+
+ def wait_until_started(self):
+ """Wait until started."""
+ with self.__lock:
+ while not self.__started:
+ self.__condition.wait()
+
+ def wait_until_finished(self):
+ """Wait until finished."""
+ with self.__lock:
+ while not self.__finished:
+ self.__condition.wait()
+
+ # No need to pass a timeout to join() because the thread should already be done after
+ # notifying us it has finished reading output from the pipe.
+ LoggerPipe.__join(self)
+
+def call(args, logger):
+ """Call subprocess on args list."""
+ logger.info(str(args))
+
+ # Use a common pipe for stdout & stderr for logging.
+ process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ logger_pipe = LoggerPipe(logger, logging.INFO, process.stdout)
+ logger_pipe.wait_until_started()
+
+ ret = process.wait()
+ logger_pipe.wait_until_finished()
+
+ if ret != 0:
+ logger.error("Bad exit code %d", ret)
+ raise Exception("Bad exit code %d from %s" % (ret, " ".join(args)))
+
+def callo(args, logger):
+ """Call subprocess on args string."""
+ logger.info("%s", str(args))
+
+ return subprocess.check_output(args)
+
+def find_program(prog, paths):
+ """Find the specified program in env PATH, or tries a set of paths."""
+ loc = spawn.find_executable(prog)
+
+ if loc is not None:
+ return loc
+
+ for loc in paths:
+ full_prog = os.path.join(loc, prog)
+ if os.path.exists(full_prog):
+ return full_prog
+
+ return None
+
+def get_process_logger(debugger_output, pid, process_name):
+ """Return the process logger from options specified."""
+ process_logger = logging.Logger("process", level=logging.DEBUG)
+ process_logger.mongo_process_filename = None
+
+ if 'stdout' in debugger_output:
+ s_handler = logging.StreamHandler(sys.stdout)
+ s_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+ process_logger.addHandler(s_handler)
+
+ if 'file' in debugger_output:
+ filename = "debugger_%s_%d.log" % (os.path.splitext(process_name)[0], pid)
+ process_logger.mongo_process_filename = filename
+ f_handler = logging.FileHandler(filename=filename, mode="w")
+ f_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+ process_logger.addHandler(f_handler)
+
+ return process_logger
+
+class WindowsDumper(object):
+ """WindowsDumper class."""
+
+ @staticmethod
+ def __find_debugger(logger, debugger):
+ """Find the installed debugger."""
+ # We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64.
+ cdb = spawn.find_executable(debugger)
+ if cdb is not None:
+ return cdb
+ from win32com.shell import shell, shellcon
+
+ # Cygwin via sshd does not expose the normal environment variables.
+ # Use the shell api to get the variable instead.
+ root_dir = shell.SHGetFolderPath(0, shellcon.CSIDL_PROGRAM_FILESX86, None, 0)
+
+ # Construct the debugger search paths in most-recent order.
+ debugger_paths = [os.path.join(root_dir, "Windows Kits", "10", "Debuggers", "x64")]
+ for idx in reversed(range(0, 2)):
+ debugger_paths.append(
+ os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64"))
+
+ for dbg_path in debugger_paths:
+ logger.info("Checking for debugger in %s", dbg_path)
+ if os.path.exists(dbg_path):
+ return os.path.join(dbg_path, debugger)
+
+ return None
+
+ def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+ """Dump useful information to the console."""
+ debugger = "cdb.exe"
+ dbg = self.__find_debugger(root_logger, debugger)
+
+ if dbg is None:
+ root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+ return
+
+ root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+ dump_command = ""
+ if take_dump:
+ # Dump to file, dump_<process name>.<pid>.mdmp.
+ dump_file = "dump_%s.%d.%s" % (os.path.splitext(process_name)[0], pid,
+ self.get_dump_ext())
+ dump_command = ".dump /ma %s" % dump_file
+ root_logger.info("Dumping core to %s", dump_file)
+
+ cmds = [
+ ".symfix", # Fixup symbol path.
+ "!sym noisy", # Enable noisy symbol loading.
+ ".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG).
+ ".reload", # Reload symbols.
+ "!peb", # Dump current exe & environment variables.
+ "lm", # Dump loaded modules.
+ dump_command,
+ "!uniqstack -pn", # Dump all unique threads with function arguments.
+ "!cs -l", # Dump all locked critical sections.
+ ".detach", # Detach.
+ "q" # Quit.
+ ]
+
+ call([dbg, '-c', ";".join(cmds), '-p', str(pid)], logger)
+
+ root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+ @staticmethod
+ def get_dump_ext():
+ """Return the dump file extension."""
+ return "mdmp"
+
+class WindowsProcessList(object):
+ """WindowsProcessList class."""
+
+ @staticmethod
+ def __find_ps():
+ """Find tasklist."""
+ return os.path.join(os.environ["WINDIR"], "system32", "tasklist.exe")
+
+ def dump_processes(self, logger):
+ """Get list of [Pid, Process Name]."""
+ ps = self.__find_ps()
+
+ logger.info("Getting list of processes using %s", ps)
+
+ ret = callo([ps, "/FO", "CSV"], logger)
+
+ buff = TextIOWrapper(BytesIO(ret))
+ csv_reader = csv.reader(buff)
+
+ return [[int(row[1]), row[0]] for row in csv_reader if row[1] != "PID"]
+
+# LLDB dumper is for MacOS X.
+class LLDBDumper(object):
+ """LLDBDumper class."""
+
+ @staticmethod
+ def __find_debugger(debugger):
+ """Find the installed debugger."""
+ return find_program(debugger, ['/usr/bin'])
+
+ def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+ """Dump info."""
+ debugger = "lldb"
+ dbg = self.__find_debugger(debugger)
+
+ if dbg is None:
+ root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+ return
+
+ root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+ lldb_version = callo([dbg, "--version"], logger)
+
+ logger.info(lldb_version)
+
+ # Do we have the XCode or LLVM version of lldb?
+ # Old versions of lldb do not work well when taking commands via a file.
+ # XCode (7.2): lldb-340.4.119.
+ # LLVM - lldb version 3.7.0 ( revision ).
+
+ lldb_version = str(lldb_version)
+ if 'version' not in lldb_version:
+ # We have XCode's lldb.
+ lldb_version = lldb_version[lldb_version.index("lldb-"):]
+ lldb_version = lldb_version.replace('lldb-', '')
+ lldb_major_version = int(lldb_version[:lldb_version.index('.')])
+ if lldb_major_version < 340:
+ logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2")
+ return
+
+ dump_command = ""
+ if take_dump:
+ # Dump to file, dump_<process name>.<pid>.core.
+ dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext())
+ dump_command = "process save-core %s" % dump_file
+ root_logger.info("Dumping core to %s", dump_file)
+
+ cmds = [
+ "attach -p %d" % pid,
+ "target modules list",
+ "thread backtrace all",
+ dump_command,
+ "settings set interpreter.prompt-on-quit false",
+ "quit",
+ ]
+
+ tf = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8')
+
+ for cmd in cmds:
+ tf.write(cmd + "\n")
+
+ tf.flush()
+
+ # Works on in MacOS 10.9 & later.
+ #call([dbg] + list( itertools.chain.from_iterable([['-o', b] for b in cmds])), logger)
+ call(['cat', tf.name], logger)
+ call([dbg, '--source', tf.name], logger)
+
+ root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+ @staticmethod
+ def get_dump_ext():
+ """Return the dump file extension."""
+ return "core"
+
+class DarwinProcessList(object):
+ """DarwinProcessList class."""
+
+ @staticmethod
+ def __find_ps():
+ """Find ps."""
+ return find_program('ps', ['/bin'])
+
+ def dump_processes(self, logger):
+ """Get list of [Pid, Process Name]."""
+ ps = self.__find_ps()
+
+ logger.info("Getting list of processes using %s", ps)
+
+ ret = callo([ps, "-axco", "pid,comm"], logger)
+
+ buff = TextIOWrapper(BytesIO(ret))
+ csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True)
+
+ return [[int(row[0]), row[1]] for row in csv_reader if row[0] != "PID"]
+
+# GDB dumper is for Linux.
+class GDBDumper(object):
+ """GDBDumper class."""
+
+ @staticmethod
+ def __find_debugger(debugger):
+ """Find the installed debugger."""
+ return find_program(debugger, ['/opt/mongodbtoolchain/v3/bin/gdb', '/usr/bin'])
+
+ def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+ """Dump info."""
+ debugger = "gdb"
+ dbg = self.__find_debugger(debugger)
+
+ if dbg is None:
+ logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+ return
+
+ root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+ dump_command = ""
+ if take_dump:
+ # Dump to file, dump_<process name>.<pid>.core.
+ dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext())
+ dump_command = "gcore %s" % dump_file
+ root_logger.info("Dumping core to %s", dump_file)
+
+ call([dbg, "--version"], logger)
+
+ cmds = [
+ "set interactive-mode off",
+ "set print thread-events off", # Suppress GDB messages of threads starting/finishing.
+ "file %s" % process_name,
+ "attach %d" % pid,
+ "info sharedlibrary",
+ "info threads", # Dump a simple list of commands to get the thread name.
+ "thread apply all bt",
+ "set python print-stack full",
+ # Lock the scheduler, before running commands, which execute code in the attached process.
+ "set scheduler-locking on",
+ dump_command,
+ "set confirm off",
+ "quit",
+ ]
+
+ call([dbg, "--quiet", "--nx"] +
+ list(itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger)
+
+ root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+ @staticmethod
+ def get_dump_ext():
+ """Return the dump file extension."""
+ return "core"
+
+ @staticmethod
+ def _find_gcore():
+ """Find the installed gcore."""
+ dbg = "/usr/bin/gcore"
+ if os.path.exists(dbg):
+ return dbg
+
+ return None
+
+class LinuxProcessList(object):
+ """LinuxProcessList class."""
+
+ @staticmethod
+ def __find_ps():
+ """Find ps."""
+ return find_program('ps', ['/bin', '/usr/bin'])
+
+ def dump_processes(self, logger):
+ """Get list of [Pid, Process Name]."""
+ ps = self.__find_ps()
+
+ logger.info("Getting list of processes using %s", ps)
+
+ call([ps, "--version"], logger)
+
+ ret = callo([ps, "-eo", "pid,args"], logger)
+
+ buff = TextIOWrapper(BytesIO(ret))
+ csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True)
+
+ return [[int(row[0]), os.path.split(row[1])[1]] for row in csv_reader if row[0] != "PID"]
+
+def get_hang_analyzers():
+ """Return hang analyzers."""
+
+ dbg = None
+ ps = None
+
+ # Skip taking the dump in Mac OS and result in an error.
+ # FIXME : WT-6918 - Remove the skip block of code after fixing the issues.
+ if sys.platform == "darwin":
+ return [ps, dbg]
+
+ if sys.platform.startswith("linux"):
+ dbg = GDBDumper()
+ ps = LinuxProcessList()
+ elif _IS_WINDOWS or sys.platform == "cygwin":
+ dbg = WindowsDumper()
+ ps = WindowsProcessList()
+ elif sys.platform == "darwin":
+ dbg = LLDBDumper()
+ ps = DarwinProcessList()
+
+ return [ps, dbg]
+
+def check_dump_quota(quota, ext):
+ """Check if sum of the files with ext is within the specified quota in megabytes."""
+
+ files = glob.glob("*." + ext)
+
+ size_sum = 0
+ for file_name in files:
+ size_sum += os.path.getsize(file_name)
+
+ return size_sum <= quota
+
+def pname_match(exact_match, pname, processes):
+ """Return True if the pname matches in processes."""
+ pname = os.path.splitext(pname)[0]
+ for ip in processes:
+ if exact_match and pname == ip or not exact_match and ip in pname:
+ return True
+ return False
+
+# Basic procedure
+#
+# 1. Get a list of interesting processes.
+# 2. Dump useful information or take dumps.
+def main():
+ """Execute Main program."""
+ root_logger = logging.Logger("hang_analyzer", level=logging.DEBUG)
+
+ handler = logging.StreamHandler(sys.stdout)
+ handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+ root_logger.addHandler(handler)
+
+ root_logger.info("Python Version: %s", sys.version)
+ root_logger.info("OS: %s", platform.platform())
+
+ try:
+ if _IS_WINDOWS or sys.platform == "cygwin":
+ distro = platform.win32_ver()
+ root_logger.info("Windows Distribution: %s", distro)
+ else:
+ distro = platform.linux_distribution()
+ root_logger.info("Linux Distribution: %s", distro)
+
+ except AttributeError:
+ root_logger.warning("Cannot determine Linux distro since Python is too old")
+
+ try:
+ uid = os.getuid()
+ root_logger.info("Current User: %s", uid)
+ current_login = os.getlogin()
+ root_logger.info("Current Login: %s", current_login)
+ except OSError:
+ root_logger.warning("Cannot determine Unix Current Login")
+ except AttributeError:
+ root_logger.warning("Cannot determine Unix Current Login, not supported on Windows")
+
+ contain_processes = ["ex_", "intpack-test", "python", "test_"]
+ exact_processes = ["cursor_order", "packing-test", "t"]
+ process_ids = []
+
+ parser = OptionParser(description=__doc__)
+ parser.add_option('-p', '--process-contains-names', dest='process_contains_names',
+ help='Comma separated list of process patterns to analyze')
+ parser.add_option('-e', '--process-names', dest='process_exact_names',
+ help='Comma separated list of exact process names to analyze')
+ parser.add_option('-d', '--process-ids', dest='process_ids', default=None,
+ help='Comma separated list of process ids (PID) to analyze, overrides -p & e')
+ parser.add_option('-c', '--dump-core', dest='dump_core', action="store_true", default=False,
+ help='Dump core file for each analyzed process')
+ parser.add_option('-s', '--max-core-dumps-size', dest='max_core_dumps_size', default=10000,
+ help='Maximum total size of core dumps to keep in megabytes')
+ parser.add_option('-o', '--debugger-output', dest='debugger_output', action="append",
+ choices=['file', 'stdout'], default=None,
+ help="If 'stdout', then the debugger's output is written to the Python"
+ " process's stdout. If 'file', then the debugger's output is written"
+ " to a file named debugger_<process>_<pid>.log for each process it"
+ " attaches to. This option can be specified multiple times on the"
+ " command line to have the debugger's output written to multiple"
+ " locations. By default, the debugger's output is written only to the"
+ " Python process's stdout.")
+
+ (options, _) = parser.parse_args()
+
+ if options.debugger_output is None:
+ options.debugger_output = ['stdout']
+
+ if options.process_ids is not None:
+ # process_ids is an int list of PIDs.
+ process_ids = [int(pid) for pid in options.process_ids.split(',')]
+
+ if options.process_exact_names is not None:
+ exact_processes = options.process_exact_names.split(',')
+
+ if options.process_contains_names is not None:
+ contain_processes = options.process_contains_names.split(',')
+
+ [ps, dbg] = get_hang_analyzers()
+
+ if ps is None or dbg is None:
+ root_logger.warning("hang_analyzer.py: Unsupported platform: %s", sys.platform)
+ exit(1)
+
+ all_processes = ps.dump_processes(root_logger)
+
+ # Canonicalize the process names to lowercase to handle cases where the name of the Python
+ # process is /System/Library/.../Python on OS X and -p python is specified.
+ all_processes = [(pid, process_name.lower()) for (pid, process_name) in all_processes]
+
+ # Find all running interesting processes:
+ # If a list of process_ids is supplied, match on that.
+ # Otherwise, do a substring match on interesting_processes.
+ if process_ids:
+ processes = [(pid, pname) for (pid, pname) in all_processes
+ if pid in process_ids and pid != os.getpid()]
+
+ running_pids = set([pid for (pid, pname) in all_processes])
+ missing_pids = set(process_ids) - running_pids
+ if missing_pids:
+ root_logger.warning("The following requested process ids are not running %s",
+ list(missing_pids))
+ else:
+ processes = [(pid, pname) for (pid, pname) in all_processes
+ if (pname_match(True, pname, exact_processes) or pname_match(False, pname, contain_processes)) and pid != os.getpid()]
+
+ root_logger.info("Found %d interesting processes %s", len(processes), processes)
+
+ max_dump_size_bytes = int(options.max_core_dumps_size) * 1024 * 1024
+
+ trapped_exceptions = []
+
+ # Dump all processes.
+ for (pid, process_name) in processes:
+ process_logger = get_process_logger(options.debugger_output, pid, process_name)
+ try:
+ dbg.dump_info(root_logger, process_logger, pid, process_name, options.dump_core
+ and check_dump_quota(max_dump_size_bytes, dbg.get_dump_ext()))
+ except Exception as err:
+ root_logger.info("Error encountered when invoking debugger %s", err)
+ trapped_exceptions.append(traceback.format_exc())
+
+ root_logger.info("Done analyzing all processes for hangs")
+
+ for exception in trapped_exceptions:
+ root_logger.info(exception)
+ if trapped_exceptions:
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main()