diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-07-17 22:40:41 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-07-17 22:40:41 +1000 |
commit | d7e9b92a8117edab8869c132a4f7bfae3d3ff2ff (patch) | |
tree | 662253d1f3793c5119e5a9fdb608a3aa520621b6 /src | |
parent | 5170a33c1a3632cff838c4b9291938cc3a4ad41c (diff) | |
download | mongo-d7e9b92a8117edab8869c132a4f7bfae3d3ff2ff.tar.gz |
Import wiredtiger-wiredtiger-2.6.1-284-g42823c9.tar.gz from wiredtiger branch mongodb-3.2
Diffstat (limited to 'src')
65 files changed, 1009 insertions, 798 deletions
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in index 4bfb4df7fa2..de2f8963629 100644 --- a/src/third_party/wiredtiger/build_posix/configure.ac.in +++ b/src/third_party/wiredtiger/build_posix/configure.ac.in @@ -13,7 +13,9 @@ AC_CONFIG_SRCDIR([RELEASE_INFO]) : ${CFLAGS=-O3 -g} : ${CXXFLAGS=-O3 -g} -AM_INIT_AUTOMAKE([1.11 foreign parallel-tests subdir-objects]) +# We rely on some automake features for testing (like AM_TEST_ENVIRONMENT) +# that didn't work before 1.11.6. +AM_INIT_AUTOMAKE([1.11.6 foreign parallel-tests subdir-objects]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([no])]) # Configure options. The AM_OPTIONS and the libtool configuration diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 28a0ed8adb6..43b585a6c6d 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -402,7 +402,8 @@ connection_runtime_config = [ to close''', min=0), Config('close_idle_time', '30', r''' amount of time in seconds a file handle needs to be idle - before attempting to close it''', min=1, max=100000), + before attempting to close it. A setting of 0 means that idle + handles are not closed''', min=0, max=100000), Config('close_scan_interval', '10', r''' interval in seconds at which to check for files that are inactive and close them''', min=1, max=100000), diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all index 77db7df9b3d..60e8b8f1551 100644..100755 --- a/src/third_party/wiredtiger/dist/s_all +++ b/src/third_party/wiredtiger/dist/s_all @@ -2,7 +2,8 @@ # Run standard scripts. t=__wt.$$ -trap 'rm -f $t *.pyc __tmp __wt.*' 0 1 2 3 13 15 +t_pfx=__s_all_tmp +trap 'rm -f $t *.pyc __tmp __wt.* __s_all_tmp*' 0 1 2 3 13 15 # We require python which may not be installed. type python > /dev/null 2>&1 || { @@ -10,23 +11,6 @@ type python > /dev/null 2>&1 || { exit 1 } -run() -{ - printf "WiredTiger: $2..." - $1 > $t - - if `grep 'skipped' $t > /dev/null 2>&1`; then - printf " " && cat $t - elif `test -s $t`; then - echo - sed -e 's/^/ /' $t - else - echo ' OK' - fi - rm -f $t - return 0 -} - echo 'dist/s_all run started...' force= @@ -44,41 +28,77 @@ while : esac done -run "sh ./s_version $force" "Updating files that include the package version" +echo "Updating files that include the package version" && + sh ./s_version $force test "$reconf" -eq 0 || { - (cd ../build_posix && - run "sh ./reconf" "Rebuilding GNU tools library support") + (echo "Rebuilding GNU tools library support" && + cd ../build_posix && 2>&1 sh ./reconf | sed -e 's/^/ /') } -run "sh ./s_readme $force" "building README file" -run "python api_config.py" "building WiredTiger API" -run "python api_err.py" "building WiredTiger error returns" -run "python flags.py" "building flags" -run "python log.py" "building logging layer" -run "python stat.py" "building statistics support" -run "python java_doc.py" "building Java documentation index" -run "sh ./s_typedef -b" "building standard typedefs" -run "sh ./s_prototypes" "building function prototypes" -run "sh ./s_tags" "building tags files" +errchk() +{ + if ! `test -s $2`; then + return + fi -run "sh ./s_copyright" "checking copyright notices" -run "sh ./s_define" "checking for unused #defines" -run "sh ./s_typedef -c" "checking for unused typedefs" -run "sh ./s_funcs" "checking for unused functions" -run "sh ./s_export" "checking external symbol names" -run "sh ./s_getopt" "checking for incorrect getopt usage" -run "sh ./s_label" "checking error/return labels" -run "sh ./s_lang" "checking for SWIG generated name conflicts" -run "sh ./s_longlines" "checking for long lines" -run "sh ./s_stat" "checking for unused statistics fields" -run "sh ./s_string" "checking string spelling" -run "python style.py" "checking style (pass 1)" -run "sh ./s_style" "checking style (pass 2)" -run "sh ./s_python" "checking Python style" -run "sh ./s_whitespace" "checking whitespace" -run "sh ./s_win" "checking windows config" + echo "####################### ERROR ############################" + echo "s_all run of: \"$1\" resulted in:" + sed -e 's/^/ /' $2 + echo "#######################" + + rm -f $2 +} -run "sh ./s_docs" "generating documentation" +run() +{ + 2>&1 $1 > $t + errchk "$1" $t +} + +# Non parallelizable scripts The following scripts either modify files or +# already parallelize internally. +run "sh ./s_readme $force" +run "python api_config.py" +run "python api_err.py" +run "python flags.py" +run "python log.py" +run "python stat.py" +run "python java_doc.py" +run "sh ./s_prototypes" +run "sh ./s_typedef -b" +run "sh ./s_copyright" +run "sh ./s_style" + +COMMANDS=" +2>&1 ./s_tags > ${t_pfx}tags +2>&1 ./s_define > ${t_pfx}s_define +2>&1 ./s_typedef -c > ${t_pfx}s_typedef_c +2>&1 ./s_funcs > ${t_pfx}s_funcs +2>&1 ./s_export > ${t_pfx}s_export +2>&1 ./s_getopt > ${t_pfx}s_getopt +2>&1 ./s_label > ${t_pfx}s_label +2>&1 ./s_lang > ${t_pfx}s_lang +2>&1 ./s_longlines > ${t_pfx}s_longlines +2>&1 ./s_stat > ${t_pfx}_stat +2>&1 ./s_string > ${t_pfx}s_string +2>&1 python style.py > ${t_pfx}py_style +2>&1 ./s_python > ${t_pfx}s_python +2>&1 ./s_whitespace > ${t_pfx}s_whitespace +2>&1 ./s_win > ${t_pfx}s_win +2>&1 ./s_docs > ${t_pfx}s_docs" + +echo "$COMMANDS" | xargs -P 20 -I{} /bin/sh -c {} + +for f in `find . -name ${t_pfx}\*`; do + if ! `test -s $f`; then + continue + fi + LOCAL_NAME=`basename $f` + # Find original command and trim redirect garbage + FAILED_CMD=`echo "$COMMANDS" | grep $LOCAL_NAME | \ + sed -e 's/ >.*//' -e 's/.* //'` + errchk "$FAILED_CMD" $f +done echo 'dist/s_all run finished' diff --git a/src/third_party/wiredtiger/dist/s_copyright b/src/third_party/wiredtiger/dist/s_copyright index 73f45ffc4aa..020be6ae33d 100755 --- a/src/third_party/wiredtiger/dist/s_copyright +++ b/src/third_party/wiredtiger/dist/s_copyright @@ -2,10 +2,50 @@ # Check the copyrights. -c1=__wt.1$$ -c2=__wt.2$$ -c3=__wt.3$$ -c4=__wt.4$$ +c1=__wt.copyright.1 +c2=__wt.copyright.2 +c3=__wt.copyright.3 +c4=__wt.copyright.4 + +check() +{ + # Skip files in which WiredTiger holds no rights. + if `egrep "skip $1" dist/s_copyright.list > /dev/null`; then + return; + fi + + # It's okay if the file doesn't exist: we may be running in a release + # tree with some files removed. + test -f $1 || return + + # Check for a correct copyright header. + if `sed -e 2,5p -e 6q -e d $1 | diff - dist/$c1 > /dev/null` ; then + return; + fi + if `sed -e 2,4p -e 5q -e d $1 | diff - dist/$c2 > /dev/null` ; then + return; + fi + if `sed -e 3,6p -e 7q -e d $1 | diff - dist/$c3 > /dev/null` ; then + return; + fi + if `sed -e 3,5p -e 6q -e d $1 | diff - dist/$c4 > /dev/null` ; then + return; + fi + if `sed -e 1,3p -e 4q -e d $1 | diff - dist/$c4 > /dev/null` ; then + return; + fi + + echo "$1: copyright information is incorrect" + exit 1 +} + +# s_copyright is re-entrant, calling itself with individual file names. +# Any single argument call is a file name, check its copyright. +if [ $# -ne 0 ]; then + check $1 + exit 0 +fi + trap 'rm -f $c1 $c2 $c3 $c4; exit 0' 0 1 2 3 13 15 year=`date +%Y` @@ -41,57 +81,27 @@ cat > $c4 <<ENDOFTEXT # This is free and unencumbered software released into the public domain. ENDOFTEXT -check() -{ - # Skip files in which WiredTiger holds no rights. - if `egrep "skip $1" s_copyright.list > /dev/null`; then - return; - fi - - # It's okay if the file doesn't exist: we may be running in a release - # tree with some files removed. - test -f ../$i || return - - # Check for a correct copyright header. - if `sed -e 2,5p -e 6q -e d ../$1 | diff - $c1 > /dev/null` ; then - return; - fi - if `sed -e 2,4p -e 5q -e d ../$1 | diff - $c2 > /dev/null` ; then - return; - fi - if `sed -e 3,6p -e 7q -e d ../$1 | diff - $c3 > /dev/null` ; then - return; - fi - if `sed -e 3,5p -e 6q -e d ../$1 | diff - $c4 > /dev/null` ; then - return; - fi - if `sed -e 1,3p -e 4q -e d ../$1 | diff - $c4 > /dev/null` ; then - return; - fi - - echo "$1: copyright information is incorrect" -} - # Search for files, skipping some well-known 3rd party directories. -for i in `cd .. && - find [a-z]* -name '*.[chi]' \ - -o -name '*.cxx' -o -name '*.in' -o -name '*.java' -o -name '*.py' | +(cd .. && find [a-z]* -name '*.[chi]' \ + -o -name '*.cxx' \ + -o -name '*.in' \ + -o -name '*.java' \ + -o -name '*.py' | sed -e '/Makefile.in/d' \ -e '/^build_posix\//d' \ - -e '/api\/leveldb\/basho\//d' \ - -e '/api\/leveldb\/hyperleveldb\//d' \ - -e '/api\/leveldb\/leveldb\//d' \ - -e '/api\/leveldb\/rocksdb\//d' \ - -e '/test\/3rdparty\//d' \ - -e '/tools\/wtperf_stats\/3rdparty\//d' \ - -e 's/^\.\///'` -do - check $i -done - -# A few special cases: LICENSE, documentation, wt utility, some of which -# have more than one copyright notice in the file. For files that have -# only a single copyright notice, we give it to MongoDB, from 2008 to now. + -e '/api\/leveldb\/basho\//d' \ + -e '/api\/leveldb\/hyperleveldb\//d' \ + -e '/api\/leveldb\/leveldb\//d' \ + -e '/api\/leveldb\/rocksdb\//d' \ + -e '/\/3rdparty\//d' \ + -e '/\/node_modules\//d' \ + -e '/dist\/__/d' \ + -e 's/^\.\///' | + xargs -P 20 -n 1 -I{} sh dist/s_copyright {}) + +# A few special cases: LICENSE, documentation, wt utility, some of which have +# more than one copyright notice in the file. For files that have only a single +# copyright notice, we give it to MongoDB, from 2008 to now. string1="Copyright \(c\) 2014-$year MongoDB, Inc." string2="Copyright \(c\) 2008-$year MongoDB, Inc." string3="printf.*Copyright \(c\) 2008-$year MongoDB, Inc." diff --git a/src/third_party/wiredtiger/dist/s_define b/src/third_party/wiredtiger/dist/s_define index 7809bf14918..7809bf14918 100644..100755 --- a/src/third_party/wiredtiger/dist/s_define +++ b/src/third_party/wiredtiger/dist/s_define diff --git a/src/third_party/wiredtiger/dist/s_export b/src/third_party/wiredtiger/dist/s_export index 1212b5b2c1f..1212b5b2c1f 100644..100755 --- a/src/third_party/wiredtiger/dist/s_export +++ b/src/third_party/wiredtiger/dist/s_export diff --git a/src/third_party/wiredtiger/dist/s_funcs b/src/third_party/wiredtiger/dist/s_funcs index 3769ccc4aa7..3769ccc4aa7 100644..100755 --- a/src/third_party/wiredtiger/dist/s_funcs +++ b/src/third_party/wiredtiger/dist/s_funcs diff --git a/src/third_party/wiredtiger/dist/s_getopt b/src/third_party/wiredtiger/dist/s_getopt index 745de80503a..745de80503a 100644..100755 --- a/src/third_party/wiredtiger/dist/s_getopt +++ b/src/third_party/wiredtiger/dist/s_getopt diff --git a/src/third_party/wiredtiger/dist/s_label b/src/third_party/wiredtiger/dist/s_label index d1785aee54a..d1785aee54a 100644..100755 --- a/src/third_party/wiredtiger/dist/s_label +++ b/src/third_party/wiredtiger/dist/s_label diff --git a/src/third_party/wiredtiger/dist/s_longlines b/src/third_party/wiredtiger/dist/s_longlines index 15ca5603385..15ca5603385 100644..100755 --- a/src/third_party/wiredtiger/dist/s_longlines +++ b/src/third_party/wiredtiger/dist/s_longlines diff --git a/src/third_party/wiredtiger/dist/s_python b/src/third_party/wiredtiger/dist/s_python index ce955328288..ce955328288 100644..100755 --- a/src/third_party/wiredtiger/dist/s_python +++ b/src/third_party/wiredtiger/dist/s_python diff --git a/src/third_party/wiredtiger/dist/s_readme b/src/third_party/wiredtiger/dist/s_readme index be809a6455c..be809a6455c 100644..100755 --- a/src/third_party/wiredtiger/dist/s_readme +++ b/src/third_party/wiredtiger/dist/s_readme diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat index 152097f14be..152097f14be 100644..100755 --- a/src/third_party/wiredtiger/dist/s_stat +++ b/src/third_party/wiredtiger/dist/s_stat diff --git a/src/third_party/wiredtiger/dist/s_string b/src/third_party/wiredtiger/dist/s_string index 08d066f5929..08d066f5929 100644..100755 --- a/src/third_party/wiredtiger/dist/s_string +++ b/src/third_party/wiredtiger/dist/s_string diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 1e430efe403..ba5717d1b4a 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -674,6 +674,7 @@ logtest lookup lookups lossy +lrt lru lseek lsm @@ -844,6 +845,7 @@ sd secretkey sed sessionp +setkv setstr setv sfence diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index c1f54f381ff..e5411748a31 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -4,12 +4,38 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 -cd .. +# s_style is re-entrant, when run with no parameters it calls itself +# again for each file that needs checking. +if [ $# -ne 1 ]; then + cd .. + + find bench examples ext src test \ + -name '*.[chisy]' -o -name '*.in' -o -name '*.dox' | + sed -e '/Makefile.in/d' -e '/build_win\/wiredtiger_config.h/d' | + xargs -P 20 -n 1 -I{} sh ./dist/s_style {} +else + # General style correction and cleanup for a single file + f=$1 + fname=`basename $f` + t=__wt_s_style.$fname.$$ + + if [ ! -e $f ]; then + echo "$0 error $1 does not exist" + exit 1; + fi + + egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t + test -s $t && { + echo "paired typo" + echo "============================" + cat $t + } + + extension="${fname##*.}" + if [ "x$extension" = "xdox" ]; then + exit 0; + fi -for f in \ - `find bench examples ext src test -name '*.[chisy]' -o -name '*.in' | - sed -e '/Makefile.in/d' \ - -e '/build_win\/wiredtiger_config.h/d'`; do if grep "^[^}]*while (0);" $f > $t; then echo "$f: while (0) has trailing semi-colon" cat $t @@ -111,14 +137,4 @@ for f in \ -e 's/^#define /#define /' >$t cmp $t $f > /dev/null 2>&1 || (echo "modifying $f" && cp $t $f) -done - -# Check for common typos (Wikipedia's list). -find bench examples ext src test \ - -name '*.[chisy]' -o -name '*.in' -o -name '*.dox' | -xargs egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' > $t - test -s $t && { - echo "paired typo" - echo "============================" - cat $t - } +fi diff --git a/src/third_party/wiredtiger/dist/s_tags b/src/third_party/wiredtiger/dist/s_tags index faed132d05b..faed132d05b 100644..100755 --- a/src/third_party/wiredtiger/dist/s_tags +++ b/src/third_party/wiredtiger/dist/s_tags diff --git a/src/third_party/wiredtiger/dist/s_whitespace b/src/third_party/wiredtiger/dist/s_whitespace index 3a51b251bfe..3a51b251bfe 100644..100755 --- a/src/third_party/wiredtiger/dist/s_whitespace +++ b/src/third_party/wiredtiger/dist/s_whitespace diff --git a/src/third_party/wiredtiger/dist/s_wtstats b/src/third_party/wiredtiger/dist/s_wtstats index 834b011110e..834b011110e 100644..100755 --- a/src/third_party/wiredtiger/dist/s_wtstats +++ b/src/third_party/wiredtiger/dist/s_wtstats diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 791cb30e99c..ee9c8782594 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -218,7 +218,6 @@ connection_stats = [ ########################################## # Logging statistics ########################################## - LogStat('log_buffer_grow', 'log buffer size increases'), LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale'), LogStat('log_bytes_payload', 'log bytes of payload data'), LogStat('log_bytes_written', 'log bytes written'), diff --git a/src/third_party/wiredtiger/lang/java/Makefile.am b/src/third_party/wiredtiger/lang/java/Makefile.am index cf18f2ff3dd..b9871cf2599 100644 --- a/src/third_party/wiredtiger/lang/java/Makefile.am +++ b/src/third_party/wiredtiger/lang/java/Makefile.am @@ -49,6 +49,8 @@ JAVA_JUNIT = \ $(JAVATEST)/CursorTest02.java \ $(JAVATEST)/ExceptionTest.java \ $(JAVATEST)/PackTest.java \ + $(JAVATEST)/PackTest02.java \ + $(JAVATEST)/PackTest03.java \ $(JAVATEST)/WiredTigerSuite.java dist_java_JAVA = $(JAVA_SRC) @JAVA_JUNIT@ diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java index c53938d0a58..0d0ce42d375 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java @@ -180,5 +180,12 @@ public class PackFormatInputStream { } return valueLen; } -} + /** + * Return whether there is an explicit length indicated in the format + * string. + */ + protected boolean hasLength() { + return (getIntFromFormat(false) > 0); + } +} diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java index a49b2e01f17..77eb19d1de1 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java @@ -119,7 +119,6 @@ public class PackInputStream { throws WiredTigerPackingException { format.checkType('U', false); getByteArrayInternal(getByteArrayLength(), dest, off, len); - } /** @@ -128,6 +127,7 @@ public class PackInputStream { */ public byte[] getByteArray() throws WiredTigerPackingException { + format.checkType('U', false); int itemLen = getByteArrayLength(); byte[] unpacked = new byte[itemLen]; getByteArrayInternal(itemLen, unpacked, 0, itemLen); @@ -141,8 +141,17 @@ public class PackInputStream { private int getByteArrayLength() throws WiredTigerPackingException { int itemLen = 0; - /* The rest of the buffer is a byte array. */ - if (format.available() == 1) { + + if (format.hasLength()) { + // If the format has a length, it's always used. + itemLen = format.getLengthFromFormat(true); + } else if (format.getType() == 'U') { + // The 'U' format is used internally, and may be exposed to us. + // It indicates that the size is always stored unless there + // is a size in the format. + itemLen = unpackInt(false); + } else if (format.available() == 1) { + // The rest of the buffer is a byte array. itemLen = valueLen - valueOff; } else { itemLen = unpackInt(false); @@ -156,7 +165,6 @@ public class PackInputStream { private void getByteArrayInternal( int itemLen, byte[] dest, int off, int destLen) throws WiredTigerPackingException { - /* TODO: padding. */ int copyLen = itemLen; if (itemLen > destLen) { copyLen = destLen; @@ -171,11 +179,11 @@ public class PackInputStream { */ public int getInt() throws WiredTigerPackingException { - boolean signed = false; + boolean signed = true; format.checkType('i', false); if (format.getType() == 'I' || format.getType() == 'L') { - signed = true; + signed = false; } format.consume(); return unpackInt(signed); @@ -186,10 +194,10 @@ public class PackInputStream { */ public long getLong() throws WiredTigerPackingException { - boolean signed = false; + boolean signed = true; format.checkType('q', false); if (format.getType() == 'Q') { - signed = true; + signed = false; } format.consume(); return unpackLong(signed); @@ -210,10 +218,10 @@ public class PackInputStream { */ public short getShort() throws WiredTigerPackingException { - boolean signed = false; + boolean signed = true; format.checkType('h', false); if (format.getType() == 'H') { - signed = true; + signed = false; } format.consume(); return unpackShort(signed); diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java index e79b4c63498..cc8b93e3457 100644 --- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java +++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java @@ -109,14 +109,31 @@ public class PackOutputStream { */ public void addByteArray(byte[] value, int off, int len) throws WiredTigerPackingException { - format.checkType('U', true); - // If this is not the last item, store the size. - if (format.available() > 0) { - packLong(len, false); + int padBytes = 0; + + format.checkType('U', false); + boolean havesize = format.hasLength(); + char type = format.getType(); + if (havesize) { + int size = format.getLengthFromFormat(true); + if (len > size) { + len = size; + } else if (size > len) { + padBytes = size - len; + } } + // We're done pulling information from the field now. + format.consume(); + // If this is not the last item and the format does not have the + // size, or we're using the internal 'U' format, store the size. + if (!havesize && (format.available() > 0 || type == 'U')) { + packLong(len, false); + } packed.write(value, off, len); - /* TODO: padding. */ + while(padBytes-- > 0) { + packed.write(0); + } } /** @@ -178,17 +195,33 @@ public class PackOutputStream { // Strings have two possible encodings. A lower case 's' is not null // terminated, and has a length define in the format (default 1). An // upper case 'S' is variable length and has a null terminator. - if (fieldFormat == 's') { - stringLen = format.getLengthFromFormat(true); - valLen = value.length(); - if (stringLen > valLen) { - padBytes = stringLen - valLen; - stringLen = valLen; - } + + // Logic from python packing.py: + boolean havesize = format.hasLength(); + int nullpos = value.indexOf('\0'); + int size = 0; + + if (fieldFormat == 'S' && nullpos >= 0) { + stringLen = nullpos; } else { stringLen = value.length(); - padBytes = 1; // Null terminator } + if (havesize) { + size = format.getLengthFromFormat(true); + if (stringLen > size) { + stringLen = size; + } + } else if (fieldFormat == 's') { + havesize = true; + size = 1; + } + + if (fieldFormat == 'S' && !havesize) { + padBytes = 1; + } else if (size > stringLen) { + padBytes = size - stringLen; + } + // We're done pulling information from the field now. format.consume(); @@ -249,6 +282,12 @@ public class PackOutputStream { intBuf[offset++] = (byte)(PackUtil.POS_2BYTE_MARKER | PackUtil.GET_BITS(x, 13, 8)); intBuf[offset++] = PackUtil.GET_BITS(x, 8, 0); + } else if (x == PackUtil.POS_2BYTE_MAX + 1) { + // This is a special case where we could store the value with + // just a single byte, but we append a zero byte so that the + // encoding doesn't get shorter for this one value. + intBuf[offset++] = (byte)(PackUtil.POS_MULTI_MARKER | 0x01); + intBuf[offset++] = 0; } else { x -= PackUtil.POS_2BYTE_MAX + 1; intBuf[offset] = PackUtil.POS_MULTI_MARKER; diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i index fbdfbb32212..ae370ec89f5 100644 --- a/src/third_party/wiredtiger/lang/java/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i @@ -177,6 +177,9 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { } %enddef +/* + * 'Declare' a WiredTiger class. This sets up boilerplate typemaps. + */ %define WT_CLASS(type, class, name) /* * Extra 'self' elimination. @@ -210,17 +213,32 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { */" %enddef -%define WT_CLASS_WITH_CLOSE_HANDLER(type, class, name, closeHandler, priv) +/* + * Declare a WT_CLASS so that close methods call a specified closeHandler, + * after the WT core close function has completed. Arguments to the + * closeHandler are saved in advance since, as macro args, they may refer to + * values that are freed/zeroed by the close. + */ +%define WT_CLASS_WITH_CLOSE_HANDLER(type, class, name, closeHandler, + sess, priv) WT_CLASS(type, class, name) -%typemap(in, numinputs=0) class ## _CLOSED *name (JAVA_CALLBACK *jcb) { +/* + * This typemap recognizes a close function via a special declaration on its + * first argument. See WT_HANDLE_CLOSED in wiredtiger.h . Like + * WT_CURSOR_NULLABLE, the WT_{CURSOR,SESSION,CONNECTION}_CLOSED typedefs + * are only visible to the SWIG parser. + */ +%typemap(in, numinputs=0) class ## _CLOSED *name ( + WT_SESSION *savesess, JAVA_CALLBACK *jcb) { $1 = *(type **)&jarg1; NULL_CHECK($1, $1_name) + savesess = sess; jcb = (JAVA_CALLBACK *)(priv); } %typemap(freearg, numinputs=0) class ## _CLOSED *name { - closeHandler(jcb2); + closeHandler(jenv, savesess2, jcb2); priv = NULL; } @@ -239,11 +257,11 @@ WT_CLASS(type, class, name) %} WT_CLASS_WITH_CLOSE_HANDLER(struct __wt_connection, WT_CONNECTION, connection, - closeHandler, ((WT_CONNECTION_IMPL *)$1)->lang_private) + closeHandler, NULL, ((WT_CONNECTION_IMPL *)$1)->lang_private) WT_CLASS_WITH_CLOSE_HANDLER(struct __wt_session, WT_SESSION, session, - closeHandler, ((WT_SESSION_IMPL *)$1)->lang_private) + closeHandler, $1, ((WT_SESSION_IMPL *)$1)->lang_private) WT_CLASS_WITH_CLOSE_HANDLER(struct __wt_cursor, WT_CURSOR, cursor, - cursorCloseHandler, ((WT_CURSOR *)$1)->lang_private) + cursorCloseHandler, $1->session, ((WT_CURSOR *)$1)->lang_private) WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %define COPYDOC(SIGNATURE_CLASS, CLASS, METHOD) @@ -281,6 +299,7 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %ignore __wt_cursor::set_value; %ignore __wt_cursor::insert; %ignore __wt_cursor::remove; +%ignore __wt_cursor::reset; %ignore __wt_cursor::search; %ignore __wt_cursor::search_near; %ignore __wt_cursor::update; @@ -312,21 +331,35 @@ enum SearchStatus { FOUND, NOTFOUND, SMALLER, LARGER }; %wrapper %{ /* Zero out SWIG's pointer to the C object, * equivalent to 'jobj.swigCPtr = 0;' in java. + * We expect that either env in non-null (if called + * via an explicit session/cursor close() call), or + * that session is non-null (if called implicitly + * as part of connection/session close). */ static int -javaClose(JNIEnv *env, JAVA_CALLBACK *jcb, jfieldID *pfid) +javaClose(JNIEnv *env, WT_SESSION *session, JAVA_CALLBACK *jcb, jfieldID *pfid) { jclass cls; jfieldID fid; + WT_CONNECTION_IMPL *conn; + /* If we were not called via an implicit close call, + * we won't have a JNIEnv yet. Get one from the connection, + * since the thread that started the session may have + * terminated. + */ + if (env == NULL) { + conn = (WT_CONNECTION_IMPL *)session->connection; + env = ((JAVA_CALLBACK *)conn->lang_private)->jnienv; + } if (pfid == NULL || *pfid == NULL) { cls = (*env)->GetObjectClass(env, jcb->jobj); fid = (*env)->GetFieldID(env, cls, "swigCPtr", "J"); if (pfid != NULL) *pfid = fid; - } else { + } else fid = *pfid; - } + (*env)->SetLongField(env, jcb->jobj, fid, 0L); (*env)->DeleteGlobalRef(env, jcb->jobj); __wt_free(jcb->session, jcb); @@ -335,20 +368,22 @@ javaClose(JNIEnv *env, JAVA_CALLBACK *jcb, jfieldID *pfid) /* Connection and Session close handler. */ static int -closeHandler(JAVA_CALLBACK *jcb) +closeHandler(JNIEnv *env, WT_SESSION *session, JAVA_CALLBACK *jcb) { - return (javaClose(jcb->jnienv, jcb, NULL)); + return (javaClose(env, session, jcb, NULL)); } /* Cursor specific close handler. */ static int -cursorCloseHandler(JAVA_CALLBACK *jcb) +cursorCloseHandler(JNIEnv *env, WT_SESSION *wt_session, JAVA_CALLBACK *jcb) { int ret; JAVA_CALLBACK *sess_jcb; + WT_SESSION_IMPL *session; - sess_jcb = (JAVA_CALLBACK *)jcb->session->lang_private; - ret = javaClose(jcb->jnienv, jcb, + session = (WT_SESSION_IMPL *)wt_session; + sess_jcb = (JAVA_CALLBACK *)session->lang_private; + ret = javaClose(env, wt_session, jcb, sess_jcb ? &sess_jcb->cptr_fid : NULL); return (ret); @@ -364,9 +399,10 @@ javaCloseHandler(WT_EVENT_HANDLER *handler, WT_SESSION *session, WT_UNUSED(handler); if (cursor != NULL) - ret = cursorCloseHandler((JAVA_CALLBACK *)cursor->lang_private); + ret = cursorCloseHandler(NULL, session, (JAVA_CALLBACK *) + cursor->lang_private); else - ret = closeHandler((JAVA_CALLBACK *) + ret = closeHandler(NULL, session, (JAVA_CALLBACK *) ((WT_SESSION_IMPL *)session)->lang_private); return (ret); } @@ -474,7 +510,7 @@ err: __wt_err(session, ret, "Java async callback error"); } /* Invalidate the AsyncOp, further use throws NullPointerException. */ - ret = javaClose(jenv, jcb, &conn_jcb->asynccptr_fid); + ret = javaClose(jenv, NULL, jcb, &conn_jcb->asynccptr_fid); (*jenv)->DeleteGlobalRef(jenv, jcallback); @@ -1103,6 +1139,11 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return $self->remove($self); } + %javamethodmodifiers reset_wrap "protected"; + int reset_wrap() { + return $self->reset($self); + } + %javamethodmodifiers search_wrap "protected"; int search_wrap(WT_ITEM *k) { $self->set_key($self, k); @@ -1671,6 +1712,21 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; } /** + * Reset a cursor. + * + * \return The status of the operation. + */ + public int reset() + throws WiredTigerException { + int ret = reset_wrap(); + keyPacker.reset(); + valuePacker.reset(); + keyUnpacker = null; + valueUnpacker = null; + return ret; + } + + /** * Search for an item in the table. * * \return The result of the comparison. @@ -1891,8 +1947,8 @@ err: if (ret != 0) jcb->jnienv = jenv; jcb->session = connimpl->default_session; (*jenv)->GetJavaVM(jenv, &jcb->javavm); - jcb->jcallback = JCALL1(NewGlobalRef, jcb->jnienv, callbackObject); - JCALL1(DeleteLocalRef, jcb->jnienv, callbackObject); + jcb->jcallback = JCALL1(NewGlobalRef, jenv, callbackObject); + JCALL1(DeleteLocalRef, jenv, callbackObject); asyncop->c.lang_private = jcb; asyncop->c.flags |= WT_CURSTD_RAW; diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/intpack-test.py b/src/third_party/wiredtiger/lang/python/wiredtiger/intpack-test.py deleted file mode 100644 index 8855dc4e72d..00000000000 --- a/src/third_party/wiredtiger/lang/python/wiredtiger/intpack-test.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2015 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# - -from intpacking import compress_int - -i = 1 -while i < 1 << 60: - print -i, ''.join('%02x' % ord(c) for c in compress_int(-i)) - print i, ''.join('%02x' % ord(c) for c in compress_int(i)) - i <<= 1 diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/intpacking.py b/src/third_party/wiredtiger/lang/python/wiredtiger/intpacking.py index 239bc84069d..fe2e93f3dca 100644 --- a/src/third_party/wiredtiger/lang/python/wiredtiger/intpacking.py +++ b/src/third_party/wiredtiger/lang/python/wiredtiger/intpacking.py @@ -89,6 +89,11 @@ def pack_int(x): elif x <= POS_2BYTE_MAX: x -= (POS_1BYTE_MAX + 1) return chr(POS_2BYTE_MARKER | getbits(x, 13, 8)) + chr(getbits(x, 8)) + elif x == POS_2BYTE_MAX + 1: + # This is a special case where we could store the value with + # just a single byte, but we append a zero byte so that the + # encoding doesn't get shorter for this one value. + return chr(POS_MULTI_MARKER | 0x1) + chr(0) else: packed = struct.pack('>Q', x - (POS_2BYTE_MAX + 1)) while packed and packed[0] == '\x00': diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/packing-test.py b/src/third_party/wiredtiger/lang/python/wiredtiger/packing-test.py deleted file mode 100644 index 2eb0baa1d47..00000000000 --- a/src/third_party/wiredtiger/lang/python/wiredtiger/packing-test.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2015 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# - -from packing import pack, unpack - -def check_common(fmt, verbose, *v): - v = list(v) - packed = pack(fmt, *v) - unpacked = unpack(fmt, packed) - if unpacked == v: - result = 'ok' - else: - result = '** FAIL!' - print '* %s as %s: %s' % (repr(v), fmt, result) - if verbose or unpacked != v: - print '** packed: ', ''.join('%02x' % ord(c) for c in packed) - print '** unpacked: ', unpacked - -def check(fmt, *v): - check_common(fmt, False, *v) - -def check_verbose(fmt, *v): - check_common(fmt, True, *v) - - -if __name__ == '__main__': - import sys - if 'verbose' in sys.argv: - check = check_verbose - check('iii', 0, 101, -99) - check('3i', 0, 101, -99) - check('iS', 42, "forty two") - - # - check('S', 'abc') - check('9S', 'a' * 9) - check('9SS', "forty two", "spam egg") - check('42S', 'a' * 42) - check('42SS', 'a' * 42, 'something') - check('S42S', 'something', 'a' * 42) - # nul terminated string with padding - check('10SS', 'aaaaa\x00\x00\x00\x00\x00', 'something') - check('S10S', 'something', 'aaaaa\x00\x00\x00\x00\x00') - - check('u', r"\x42" * 20) - check('uu', r"\x42" * 10, r"\x42" * 10) - check('3u', r"\x4") - check('3uu', r"\x4", r"\x42" * 10) - check('u3u', r"\x42" * 10, r"\x4") - - check('s', "4") - check("2s", "42") diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py b/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py index 103c0471724..b0e055490b0 100644 --- a/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py +++ b/src/third_party/wiredtiger/lang/python/wiredtiger/packing.py @@ -87,17 +87,19 @@ def unpack(fmt, s): size = 1 s = s[size:] # Note: no value, don't increment i - elif f in 'Ssu': + elif f in 'SsUu': if not havesize: if f == 's': size = 1 elif f == 'S': size = s.find('\0') - elif f == 'u': - if offset == len(fmt) - 1: - size = len(s) - else: - size, s = unpack_int(s) + elif f == 'u' and offset == len(fmt) - 1: + size = len(s) + else: + # Note: 'U' is used internally, and may be exposed to us. + # It indicates that the size is always stored unless there + # is a size in the format. + size, s = unpack_int(s) result.append(s[:size]) if f == 'S' and not havesize: size += 1 @@ -108,6 +110,16 @@ def unpack(fmt, s): size = 1 result.append(ord(s[0:1])) s = s[1:] + elif f in 'Bb': + # byte type + if not havesize: + size = 1 + for i in xrange(size): + v = ord(s[0:1]) + if f != 'B': + v -= 0x80 + result.append(v) + s = s[1:] else: # integral type if not havesize: @@ -122,7 +134,7 @@ def __pack_iter_fmt(fmt, values): for offset, havesize, size, char in __unpack_iter_fmt(fmt): if char == 'x': # padding no value yield offset, havesize, size, char, None - elif char in 'Ssut': + elif char in 'SsUut': yield offset, havesize, size, char, values[index] index += 1 else: # integral type @@ -147,7 +159,7 @@ def pack(fmt, *values): else: result += '\0' * size # Note: no value, don't increment i - elif f in 'Ssu': + elif f in 'SsUu': if f == 'S' and '\0' in val: l = val.find('\0') else: @@ -157,7 +169,7 @@ def pack(fmt, *values): l = size elif f == 's': havesize = size = 1 - elif f == 'u' and offset != len(fmt) - 1: + elif (f == 'u' and offset != len(fmt) - 1) or f == 'U': result += pack_int(l) if type(val) is unicode and f in 'Ss': result += str(val[:l]) @@ -177,6 +189,19 @@ def pack(fmt, *values): if (mask & val) != val: raise ValueError("value out of range for 't' encoding") result += chr(val) + elif f in 'Bb': + # byte type + if not havesize: + size = 1 + for i in xrange(size): + if f == 'B': + v = val + else: + # Translate to maintain ordering with the sign bit. + v = val + 0x80 + if v > 255 or v < 0: + raise ValueError("value out of range for 'B' encoding") + result += chr(v) else: # integral type result += pack_int(val) diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 8e45ec85a97..df42a14816f 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -100,11 +100,10 @@ __wt_block_manager_create( WT_TRET(__wt_close(session, &fh)); /* - * If checkpoint syncing is enabled, some filesystems require that we - * sync the directory to be confident that the file will appear. + * Some filesystems require that we sync the directory to be confident + * that the file will appear. */ - if (ret == 0 && F_ISSET(S2C(session), WT_CONN_CKPT_SYNC) && - (ret = __wt_filename(session, filename, &path)) == 0) { + if (ret == 0 && (ret = __wt_filename(session, filename, &path)) == 0) { ret = __wt_directory_sync(session, path); __wt_free(session, path); } @@ -180,10 +179,10 @@ __wt_block_open(WT_SESSION_IMPL *session, WT_DECL_RET; uint64_t bucket, hash; - WT_TRET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); + WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); conn = S2C(session); - *blockp = NULL; + *blockp = block = NULL; hash = __wt_hash_city64(filename, strlen(filename)); bucket = hash % WT_HASH_ARRAY_SIZE; __wt_spin_lock(session, &conn->block_lock); @@ -264,7 +263,8 @@ __wt_block_open(WT_SESSION_IMPL *session, __wt_spin_unlock(session, &conn->block_lock); return (0); -err: WT_TRET(__block_destroy(session, block)); +err: if (block != NULL) + WT_TRET(__block_destroy(session, block)); __wt_spin_unlock(session, &conn->block_lock); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 4625865fbf7..77d80cdb3a2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -334,6 +334,8 @@ __wt_debug_disk( if (F_ISSET(dsk, WT_PAGE_COMPRESSED)) __dmsg(ds, ", compressed"); + if (F_ISSET(dsk, WT_PAGE_ENCRYPTED)) + __dmsg(ds, ", encrypted"); if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL)) __dmsg(ds, ", empty-all"); if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE)) diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index f08909a4b85..86edd992b28 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -21,7 +21,7 @@ static int __inmem_row_leaf_entries( * Check if a page matches the criteria for forced eviction. */ static int -__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) +__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BTREE *btree; @@ -35,10 +35,6 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) if (WT_PAGE_IS_INTERNAL(page)) return (0); - /* Eviction may be turned off. */ - if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(btree, WT_BTREE_NO_EVICTION)) - return (0); - /* * It's hard to imagine a page with a huge memory footprint that has * never been modified, but check to be sure. @@ -68,11 +64,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #endif ) { + WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; u_int sleep_cnt, wait_cnt; int busy, cache_work, force_attempts, oldgen; + btree = S2BT(session); + for (force_attempts = oldgen = 0, wait_cnt = 0;;) { switch (ref->state) { case WT_REF_DISK: @@ -115,7 +114,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * be evicting if no hazard pointer is required, we're * done. */ - if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY)) + if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) goto skip_evict; /* @@ -140,7 +139,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * the page's generation number. If eviction isn't being * done on this file, we're done. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + if (LF_ISSET(WT_READ_NO_EVICT) || + F_ISSET(btree, WT_BTREE_NO_EVICTION)) goto skip_evict; /* @@ -148,7 +148,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags */ page = ref->page; if (force_attempts < 10 && - __evict_force_check(session, page, flags)) { + __evict_force_check(session, page)) { ++force_attempts; ret = __wt_page_release_evict(session, ref); /* If forced eviction fails, stall. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 6f27e077109..dbd4042129d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -753,7 +753,7 @@ __split_multi_inmem( /* * We modified the page above, which will have set the first dirty - * transaction to the last transaction current running. However, the + * transaction to the last transaction currently running. However, the * updates we installed may be older than that. Set the first dirty * transaction to an impossibly old value so this page is never skipped * in a checkpoint. diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 6c5b1fb98e8..838d778dadf 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -109,17 +109,6 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) /* Write all dirty in-cache pages. */ flags |= WT_READ_NO_EVICT; for (walk = NULL;;) { - /* - * If we have a page, and it was ever modified, track - * the highest transaction ID in the tree. We do this - * here because we want the value after reconciling - * dirty pages. - */ - if (walk != NULL && walk->page != NULL && - (mod = walk->page->modify) != NULL && - WT_TXNID_LT(btree->rec_max_txn, mod->rec_max_txn)) - btree->rec_max_txn = mod->rec_max_txn; - WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); if (walk == NULL) break; @@ -190,6 +179,18 @@ err: /* On error, clear any left-over tree walk. */ if (btree->checkpointing) { /* + * Update the checkpoint generation for this handle so visible + * updates newer than the checkpoint can be evicted. + * + * This has to be published before eviction is enabled again, + * so that eviction knows that the checkpoint has completed. + */ + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); + + /* * Clear the checkpoint flag and push the change; not required, * but publishing the change means stalled eviction gets moving * as soon as possible. diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index f7b65a8f73d..73837c46ee8 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -57,7 +57,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_file_manager_subconfigs[] = { { "close_handle_minimum", "int", NULL, "min=0", NULL, 0 }, { "close_idle_time", "int", - NULL, "min=1,max=100000", + NULL, "min=0,max=100000", NULL, 0 }, { "close_scan_interval", "int", NULL, "min=1,max=100000", diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c index de7e9e3486f..fdc95a32387 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c @@ -11,15 +11,29 @@ /* * Tuning constants. */ -/* Threshold when a connection is allocated more cache */ -#define WT_CACHE_POOL_BUMP_THRESHOLD 6 -/* Threshold when a connection is allocated less cache */ -#define WT_CACHE_POOL_REDUCE_THRESHOLD 2 +/* + * Threshold when a connection is allocated more cache, as a percentage of + * the amount of pressure the busiest participant has. + */ +#define WT_CACHE_POOL_BUMP_THRESHOLD 60 +/* + * Threshold when a connection is allocated less cache, as a percentage of + * the amount of pressure the busiest participant has. + */ +#define WT_CACHE_POOL_REDUCE_THRESHOLD 20 /* Balancing passes after a bump before a connection is a candidate. */ #define WT_CACHE_POOL_BUMP_SKIPS 10 /* Balancing passes after a reduction before a connection is a candidate. */ #define WT_CACHE_POOL_REDUCE_SKIPS 5 +/* + * Constants that control how much influence different metrics have on + * the pressure calculation. + */ +#define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 10 +#define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 50 +#define WT_CACHE_POOL_READ_MULTIPLIER 1 + static int __cache_pool_adjust(WT_SESSION_IMPL *, uint64_t, uint64_t, int *); static int __cache_pool_assess(WT_SESSION_IMPL *, uint64_t *); static int __cache_pool_balance(WT_SESSION_IMPL *); @@ -441,10 +455,12 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest) WT_CACHE_POOL *cp; WT_CACHE *cache; WT_CONNECTION_IMPL *entry; - uint64_t entries, highest, new; + uint64_t app_evicts, app_waits, reads; + uint64_t entries, highest, tmp; cp = __wt_process.cache_pool; - entries = highest = 0; + entries = 0; + highest = 1; /* Avoid divide by zero */ /* Generate read pressure information. */ TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) { @@ -453,22 +469,54 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest) continue; cache = entry->cache; ++entries; - new = cache->bytes_read; - /* Handle wrapping of eviction requests. */ - if (new >= cache->cp_saved_read) - cache->cp_current_read = new - cache->cp_saved_read; + + /* + * Figure out a delta since the last time we did an assessment + * for each metric we are tracking. Watch out for wrapping + * of values. + */ + tmp = cache->bytes_read; + if (tmp >= cache->cp_saved_read) + reads = tmp - cache->cp_saved_read; else - cache->cp_current_read = new; - cache->cp_saved_read = new; - if (cache->cp_current_read > highest) - highest = cache->cp_current_read; + reads = (UINT64_MAX - cache->cp_saved_read) + tmp; + cache->cp_saved_read = tmp; + + /* Update the application eviction count information */ + tmp = cache->app_evicts; + if (tmp >= cache->cp_saved_app_evicts) + app_evicts = tmp - cache->cp_saved_app_evicts; + else + app_evicts = + (UINT64_MAX - cache->cp_saved_app_evicts) + tmp; + cache->cp_saved_app_evicts = tmp; + + /* Update the eviction wait information */ + tmp = cache->app_waits; + if (tmp >= cache->cp_saved_app_waits) + app_waits = tmp - cache->cp_saved_app_waits; + else + app_waits = + (UINT64_MAX - cache->cp_saved_app_waits) + tmp; + cache->cp_saved_app_waits = tmp; + + /* Calculate the weighted pressure for this member */ + cache->cp_pass_pressure = + (app_evicts * WT_CACHE_POOL_APP_EVICT_MULTIPLIER) + + (app_waits * WT_CACHE_POOL_APP_WAIT_MULTIPLIER) + + (reads * WT_CACHE_POOL_READ_MULTIPLIER); + + if (cache->cp_pass_pressure > highest) + highest = cache->cp_pass_pressure; + + WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE, + "Assess entry. reads: %" PRIu64 ", app evicts: %" PRIu64 + ", app waits: %" PRIu64 ", pressure: %" PRIu64, + reads, app_evicts, app_waits, cache->cp_pass_pressure)); } WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE, "Highest eviction count: %" PRIu64 ", entries: %" PRIu64, highest, entries)); - /* Normalize eviction information across connections. */ - highest = highest / (entries + 1); - ++highest; /* Avoid divide by zero. */ *phighest = highest; return (0); @@ -487,18 +535,21 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, WT_CACHE_POOL *cp; WT_CACHE *cache; WT_CONNECTION_IMPL *entry; - uint64_t adjusted, reserved, read_pressure; + uint64_t adjusted, highest_percentile, pressure, reserved; int force, grew; *adjustedp = 0; cp = __wt_process.cache_pool; force = (cp->currently_used > cp->size); grew = 0; + /* Highest as a percentage, avoid 0 */ + highest_percentile = (highest / 100) + 1; + if (WT_VERBOSE_ISSET(session, WT_VERB_SHARED_CACHE)) { WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE, "Cache pool distribution: ")); WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE, - "\t" "cache_size, read_pressure, skips: ")); + "\t" "cache_size, pressure, skips: ")); } TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) { @@ -506,10 +557,17 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, reserved = cache->cp_reserved; adjusted = 0; - read_pressure = cache->cp_current_read / highest; + /* + * The read pressure is calculated as a percentage of how + * much read pressure there is on this participant compared + * to the participant with the most activity. The closer we + * are to the most active the more cache we should get + * assigned. + */ + pressure = cache->cp_pass_pressure / highest_percentile; WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE, "\t%" PRIu64 ", %" PRIu64 ", %" PRIu32, - entry->cache_size, read_pressure, cache->cp_skip_count)); + entry->cache_size, pressure, cache->cp_skip_count)); /* Allow to stabilize after changes. */ if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0) @@ -523,6 +581,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, if (entry->cache_size < reserved) { grew = 1; adjusted = reserved - entry->cache_size; + /* * Conditions for reducing the amount of resources for an * entry: @@ -534,9 +593,9 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, * space in the pool. */ } else if ((force && entry->cache_size > reserved) || - (read_pressure < WT_CACHE_POOL_REDUCE_THRESHOLD && - highest > 1 && entry->cache_size > reserved && - cp->currently_used >= cp->size)) { + (pressure < WT_CACHE_POOL_REDUCE_THRESHOLD && + highest > 1 && entry->cache_size > reserved && + cp->currently_used >= cp->size)) { grew = 0; /* * Shrink by a chunk size if that doesn't drop us @@ -553,14 +612,15 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, * - This entry is using less than the entire cache pool * - The connection is using enough cache to require eviction * - There is space available in the pool - * - Additional cache would benefit the connection + * - Additional cache would benefit the connection OR + * - The pool is less than half distributed */ - } else if (highest > 1 && - entry->cache_size < cp->size && - cache->bytes_inmem >= - (entry->cache_size * cache->eviction_target) / 100 && - cp->currently_used < cp->size && - read_pressure > bump_threshold) { + } else if (entry->cache_size < cp->size && + __wt_cache_bytes_inuse(cache) >= + (entry->cache_size * cache->eviction_target) / 100 && + ((cp->currently_used < cp->size && + pressure > bump_threshold) || + cp->currently_used < cp->size * 0.5)) { grew = 1; adjusted = WT_MIN(cp->chunk, cp->size - cp->currently_used); diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index 0e7ab0a03d8..94e69897c1d 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -53,7 +53,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle")); WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); - WT_RET(__wt_spin_init(session, &conn->hot_backup_lock, "hot backup")); + WT_RET(__wt_rwlock_alloc(session, + &conn->hot_backup_lock, "hot backup")); WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema")); WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation")); @@ -136,7 +137,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); - __wt_spin_destroy(session, &conn->hot_backup_lock); + WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock)); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->table_lock); diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index bc80152f6bf..be7ce2e9344 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -139,7 +139,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) * We can only archive files if a hot backup is not in progress or * if we are the backup. */ - __wt_spin_lock(session, &conn->hot_backup_lock); + WT_RET(__wt_readlock(session, conn->hot_backup_lock)); locked = 1; if (conn->hot_backup == 0 || backup_file != 0) { for (i = 0; i < logcount; i++) { @@ -151,7 +151,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) } } } - __wt_spin_unlock(session, &conn->hot_backup_lock); + WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); locked = 0; __wt_log_files_free(session, logfiles, logcount); logfiles = NULL; @@ -167,7 +167,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) if (0) err: __wt_err(session, ret, "log archive server error"); if (locked) - __wt_spin_unlock(session, &conn->hot_backup_lock); + WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); @@ -207,9 +207,8 @@ __log_prealloc_once(WT_SESSION_IMPL *session) if (log->prep_missed > 0) { conn->log_prealloc += log->prep_missed; WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "Now pre-allocating up to %" PRIu32, - conn->log_prealloc)); - log->prep_missed = 0; + "Missed %" PRIu32 ". Now pre-allocating up to %" PRIu32, + log->prep_missed, conn->log_prealloc)); } WT_STAT_FAST_CONN_SET(session, log_prealloc_max, conn->log_prealloc); @@ -221,6 +220,13 @@ __log_prealloc_once(WT_SESSION_IMPL *session) session, ++log->prep_fileid, WT_LOG_PREPNAME, 1)); WT_STAT_FAST_CONN_INCR(session, log_prealloc_files); } + /* + * Reset the missed count now. If we missed during pre-allocating + * the log files, it means the allocation is not keeping up, not that + * we didn't allocate enough. So we don't just want to keep adding + * in more. + */ + log->prep_missed = 0; if (0) err: __wt_err(session, ret, "log pre-alloc server error"); diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index 08137c9c9ff..ec6f628a02e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -35,7 +35,8 @@ __sweep_mark(WT_SESSION_IMPL *session, int *dead_handlesp) continue; } if (dhandle->session_inuse != 0 || - now <= dhandle->timeofdeath + conn->sweep_idle_time) + now <= dhandle->timeofdeath + conn->sweep_idle_time || + conn->sweep_idle_time == 0) continue; if (dhandle->timeofdeath == 0) { dhandle->timeofdeath = now; @@ -121,6 +122,10 @@ __sweep_expire(WT_SESSION_IMPL *session) conn = S2C(session); + /* If sweep_idle_time is 0, then we won't expire any cursors */ + if (conn->sweep_idle_time == 0) + return (0); + /* Don't discard handles that have been open recently. */ WT_RET(__wt_seconds(session, &now)); @@ -265,8 +270,14 @@ __sweep_server(void *arg) */ WT_ERR(__sweep_mark(session, &dead_handles)); + /* + * We only want to flush and expire if there are no dead handles + * and if either the sweep_idle_time is not 0, or if we have + * reached the configured limit of handles. + */ if (dead_handles == 0 && - conn->open_file_count < conn->sweep_handles_min) + (conn->open_file_count < conn->sweep_handles_min || + conn->sweep_idle_time != 0)) continue; /* Close handles if we have reached the configured limit */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 8f43e98e2f7..60d94697189 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -217,9 +217,9 @@ __backup_start( * could start a hot backup that would race with an already-started * checkpoint. */ - __wt_spin_lock(session, &conn->hot_backup_lock); + WT_RET(__wt_writelock(session, conn->hot_backup_lock)); conn->hot_backup = 1; - __wt_spin_unlock(session, &conn->hot_backup_lock); + WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock)); /* Create the hot backup file. */ WT_ERR(__backup_file_create(session, cb, 0)); @@ -318,9 +318,9 @@ __backup_stop(WT_SESSION_IMPL *session) ret = __wt_backup_file_remove(session); /* Checkpoint deletion can proceed, as can the next hot backup. */ - __wt_spin_lock(session, &conn->hot_backup_lock); + WT_TRET(__wt_writelock(session, conn->hot_backup_lock)); conn->hot_backup = 0; - __wt_spin_unlock(session, &conn->hot_backup_lock); + WT_TRET(__wt_writeunlock(session, conn->hot_backup_lock)); return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 9860eb65a55..460c46c0d29 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -30,15 +30,42 @@ WT_CURSTD_VALUE_EXT); \ } while (0) -#define WT_MD_SET_KEY_VALUE(c, mc, fc) do { \ - (c)->key.data = (fc)->key.data; \ - (c)->key.size = (fc)->key.size; \ - (c)->value.data = (fc)->value.data; \ - (c)->value.size = (fc)->value.size; \ - F_SET((c), WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \ - F_CLR((mc), WT_MDC_ONMETADATA); \ - F_SET((mc), WT_MDC_POSITIONED); \ -} while (0) +/* + * __curmetadata_setkv -- + * Copy key/value into the public cursor, stripping internal metadata for + * "create-only" cursors. + */ +static int +__curmetadata_setkv(WT_CURSOR_METADATA *mdc, WT_CURSOR *fc) +{ + WT_CURSOR *c; + WT_DECL_RET; + WT_SESSION_IMPL *session; + char *value; + + c = &mdc->iface; + session = (WT_SESSION_IMPL *)c->session; + + c->key.data = fc->key.data; + c->key.size = fc->key.size; + if (F_ISSET(mdc, WT_MDC_CREATEONLY)) { + WT_RET(__wt_schema_create_strip( + session, fc->value.data, NULL, &value)); + ret = __wt_buf_set( + session, &c->value, value, strlen(value) + 1); + __wt_free(session, value); + WT_RET(ret); + } else { + c->value.data = fc->value.data; + c->value.size = fc->value.size; + } + + F_SET(c, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); + F_CLR(mdc, WT_MDC_ONMETADATA); + F_SET(mdc, WT_MDC_POSITIONED); + + return (0); +} /* * Check if a key matches the metadata. The public value is "metadata:", @@ -57,17 +84,21 @@ __curmetadata_metadata_search(WT_SESSION_IMPL *session, WT_CURSOR *cursor) { WT_CURSOR_METADATA *mdc; WT_DECL_RET; - char *value; + char *value, *stripped; mdc = (WT_CURSOR_METADATA *)cursor; /* The metadata search interface allocates a new string in value. */ WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value)); - /* - * Copy the value to the underlying btree cursor's tmp item which will - * be freed when the cursor is closed. - */ + if (F_ISSET(mdc, WT_MDC_CREATEONLY)) { + ret = __wt_schema_create_strip( + session, value, NULL, &stripped); + __wt_free(session, value); + WT_RET(ret); + value = stripped; + } + ret = __wt_buf_setstr(session, &cursor->value, value); __wt_free(session, value); WT_RET(ret); @@ -141,7 +172,7 @@ __curmetadata_next(WT_CURSOR *cursor) WT_ERR(__curmetadata_metadata_search(session, cursor)); else { WT_ERR(file_cursor->next(mdc->file_cursor)); - WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor); + WT_ERR(__curmetadata_setkv(mdc, file_cursor)); } err: if (ret != 0) { @@ -174,9 +205,9 @@ __curmetadata_prev(WT_CURSOR *cursor) } ret = file_cursor->prev(file_cursor); - if (ret == 0) { - WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor); - } else if (ret == WT_NOTFOUND) + if (ret == 0) + WT_ERR(__curmetadata_setkv(mdc, file_cursor)); + else if (ret == WT_NOTFOUND) WT_ERR(__curmetadata_metadata_search(session, cursor)); err: if (ret != 0) { @@ -234,7 +265,7 @@ __curmetadata_search(WT_CURSOR *cursor) WT_ERR(__curmetadata_metadata_search(session, cursor)); else { WT_ERR(file_cursor->search(file_cursor)); - WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor); + WT_ERR(__curmetadata_setkv(mdc, file_cursor)); } err: if (ret != 0) { @@ -268,7 +299,7 @@ __curmetadata_search_near(WT_CURSOR *cursor, int *exact) *exact = 1; } else { WT_ERR(file_cursor->search_near(file_cursor, exact)); - WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor); + WT_ERR(__curmetadata_setkv(mdc, file_cursor)); } err: if (ret != 0) { @@ -438,6 +469,10 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp)); + /* If we are only returning create config, strip internal metadata. */ + if (WT_STREQ(uri, "metadata:create")) + F_SET(mdc, WT_MDC_CREATEONLY); + /* * Metadata cursors default to readonly; if not set to not-readonly, * they are permanently readonly and cannot be reconfigured. diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 1bf62fc7130..513da401ae6 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1475,7 +1475,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full) * to make sure there is free space in the cache. */ txn_global = &conn->txn_global; - txn_state = &txn_global->states[session->id]; + txn_state = WT_SESSION_TXN_STATE(session); txn_busy = txn_state->id != WT_TXN_NONE || session->nhazard > 0 || (txn_state->snap_min != WT_TXN_NONE && @@ -1512,6 +1512,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full) q_found = 0; switch (ret = __evict_page(session, 0)) { case 0: + cache->app_evicts++; if (--count == 0) return (0); @@ -1550,6 +1551,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full) WT_RET( __wt_cond_wait(session, cache->evict_waiter_cond, 100000)); + cache->app_waits++; /* Check if things have changed so that we are busy. */ if (!busy && txn_state->snap_min != WT_TXN_NONE && txn_global->current != txn_global->oldest_id) diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 8680a644421..1e5faf45de2 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -59,6 +59,9 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int closing) conn = S2C(session); + /* Checkpoints should never do eviction. */ + WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session)); + page = ref->page; forced_eviction = page->read_gen == WT_READGEN_OLDEST; inmem_split = 0; diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 4809d257e7e..f13504d66ca 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -195,6 +195,11 @@ struct __wt_page_modify { /* The largest update transaction ID (approximate). */ uint64_t update_txn; +#ifdef HAVE_DIAGNOSTIC + /* Check that transaction time moves forward. */ + uint64_t last_oldest_id; +#endif + /* Dirty bytes added to the cache. */ size_t bytes_dirty; diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index cb7e66d2bbd..ed93f82538c 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -65,6 +65,9 @@ struct __wt_cache { uint64_t pages_dirty; uint64_t bytes_read; /* Bytes read into memory */ + uint64_t app_evicts; /* Pages evicted by user threads */ + uint64_t app_waits; /* User threads waited for cache */ + uint64_t evict_max_page_size; /* Largest page seen at eviction */ /* @@ -105,12 +108,15 @@ struct __wt_cache { /* * Cache pool information. */ - uint64_t cp_saved_read; /* Read count from last pass */ - uint64_t cp_current_read; /* Read count from current pass */ - uint32_t cp_skip_count; /* Post change stabilization */ + uint64_t cp_pass_pressure; /* Calculated pressure from this pass */ uint64_t cp_reserved; /* Base size for this cache */ WT_SESSION_IMPL *cp_session; /* May be used for cache management */ + uint32_t cp_skip_count; /* Post change stabilization */ wt_thread_t cp_tid; /* Thread ID for cache pool manager */ + /* State seen at the last pass of the shared cache manager */ + uint64_t cp_saved_app_evicts; /* User eviction count at last review */ + uint64_t cp_saved_app_waits; /* User wait count at last review */ + uint64_t cp_saved_read; /* Read count at last review */ /* * Flags. diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index d6a2bb0b17a..cd55aadfc07 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -266,7 +266,7 @@ struct __wt_connection_impl { WT_TXN_GLOBAL txn_global; /* Global transaction state */ - WT_SPINLOCK hot_backup_lock; /* Hot backup serialization */ + WT_RWLOCK *hot_backup_lock; /* Hot backup serialization */ int hot_backup; WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 9b61318aacc..36f36f2c46c 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -291,8 +291,9 @@ struct __wt_cursor_metadata { WT_CURSOR *file_cursor; /* Queries of regular metadata */ -#define WT_MDC_POSITIONED 0x01 +#define WT_MDC_CREATEONLY 0x01 #define WT_MDC_ONMETADATA 0x02 +#define WT_MDC_POSITIONED 0x04 uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index e17b309cf5d..87099ac839f 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -361,7 +361,6 @@ extern int __wt_log_slot_notify(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size); extern int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); -extern int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize); extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm); extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm); extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks); @@ -533,6 +532,7 @@ extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); +extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret); extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep); extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf); extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf); @@ -575,7 +575,6 @@ extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*f extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, int free_buffers); extern int __wt_session_copy_values(WT_SESSION_IMPL *session); extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); -extern int __wt_session_create_strip(WT_SESSION *wt_session, const char *v1, const char *v2, char **value_ret); extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, int uses_dhandles, int open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, int open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip); diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i index e1bcdb42ebd..d3fdfeaf1a6 100644 --- a/src/third_party/wiredtiger/src/include/intpack.i +++ b/src/third_party/wiredtiger/src/include/intpack.i @@ -300,7 +300,6 @@ __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp) *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8); *xp |= *p++; *xp += NEG_2BYTE_MIN; - p += 2; break; case NEG_1BYTE_MARKER: case NEG_1BYTE_MARKER | 0x10: diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index 3de72b8b9a6..051f9fb262e 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -12,7 +12,7 @@ /* Logging subsystem declarations. */ #define WT_LOG_ALIGN 128 -#define WT_LOG_SLOT_BUF_INIT_SIZE 64 * 1024 +#define WT_LOG_SLOT_BUF_SIZE 256 * 1024 #define WT_INIT_LSN(l) do { \ (l)->file = 1; \ @@ -91,11 +91,10 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct { WT_ITEM slot_buf; /* Buffer for grouped writes */ int32_t slot_churn; /* Active slots are scarce. */ -#define WT_SLOT_BUF_GROW 0x01 /* Grow buffer on release */ -#define WT_SLOT_BUFFERED 0x02 /* Buffer writes */ -#define WT_SLOT_CLOSEFH 0x04 /* Close old fh on release */ -#define WT_SLOT_SYNC 0x08 /* Needs sync on release */ -#define WT_SLOT_SYNC_DIR 0x10 /* Directory sync on release */ +#define WT_SLOT_BUFFERED 0x01 /* Buffer writes */ +#define WT_SLOT_CLOSEFH 0x02 /* Close old fh on release */ +#define WT_SLOT_SYNC 0x04 /* Needs sync on release */ +#define WT_SLOT_SYNC_DIR 0x08 /* Directory sync on release */ uint32_t flags; /* Flags */ } WT_LOGSLOT; @@ -117,6 +116,7 @@ typedef struct { */ uint32_t fileid; /* Current log file number */ uint32_t prep_fileid; /* Pre-allocated file number */ + uint32_t tmp_fileid; /* Temporary file number */ uint32_t prep_missed; /* Pre-allocated file misses */ WT_FH *log_fh; /* Logging file handle */ WT_FH *log_close_fh; /* Logging file handle to close */ @@ -157,10 +157,11 @@ typedef struct { * slot count of one. */ #define WT_SLOT_ACTIVE 1 -#define WT_SLOT_POOL 16 +#define WT_SLOT_POOL 128 uint32_t pool_index; /* Global pool index */ WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ + uint32_t slot_buf_size; /* Buffer size for slots */ #define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index b97b3a322ce..bf6b5abce67 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -181,6 +181,7 @@ next: if (pack->cur == pack->end) /* Integral types repeat <size> times. */ if (pv->size == 0) goto next; + pv->havesize = 0; pack->repeats = pv->size - 1; pack->lastv = *pv; return (0); @@ -322,18 +323,19 @@ __pack_write( *pp += pv->size; break; case 's': + WT_SIZE_CHECK(pv->size, maxlen); + memcpy(*pp, pv->u.s, pv->size); + *pp += pv->size; + break; case 'S': - /* - * XXX if pv->havesize, only want to know if there is a - * '\0' in the first pv->size characters. - */ s = strlen(pv->u.s); - if ((pv->type == 's' || pv->havesize) && pv->size < s) { - s = pv->size; - pad = 0; - } else if (pv->havesize) - pad = pv->size - s; - else + if (pv->havesize) { + if (pv->size < s) { + s = pv->size; + pad = 0; + } else + pad = pv->size - s; + } else pad = 1; WT_SIZE_CHECK(s + pad, maxlen); if (s > 0) @@ -665,6 +667,7 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session, if (fmt[0] != '\0' && fmt[1] == '\0') { pv.type = fmt[0]; + pv.size = 1; if ((ret = __unpack_read(session, &pv, &p, size)) == 0) WT_UNPACK_PUT(session, pv, ap); return (0); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 728c8c9fe8e..f05d87c058b 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -203,7 +203,6 @@ struct __wt_connection_stats { WT_STATS dh_session_handles; WT_STATS dh_session_sweeps; WT_STATS file_open; - WT_STATS log_buffer_grow; WT_STATS log_buffer_size; WT_STATS log_bytes_payload; WT_STATS log_bytes_written; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 85c11e19685..7a67f713244 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -25,6 +25,9 @@ #define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) +#define WT_SESSION_IS_CHECKPOINT(s) \ + ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) + struct __wt_named_snapshot { const char *name; @@ -64,7 +67,7 @@ struct __wt_txn_global { */ volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ volatile uint64_t checkpoint_gen; - volatile uint64_t checkpoint_snap_min; + volatile uint64_t checkpoint_pinned; /* Named snapshot state. */ WT_RWLOCK *nsnap_rwlock; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 1e3afbd4df3..a9b54d26e47 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -105,19 +105,20 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; - uint64_t checkpoint_snap_min, oldest_id; - uint32_t checkpoint_id; + uint64_t checkpoint_gen, checkpoint_pinned, oldest_id; txn_global = &S2C(session)->txn_global; btree = S2BT_SAFE(session); /* * Take a local copy of these IDs in case they are updated while we are - * checking visibility. + * checking visibility. Only the generation needs to be carefully + * ordered: if a checkpoint is starting and the generation is bumped, + * we take the minimum of the other two IDs, which is what we want. */ - checkpoint_id = txn_global->checkpoint_id; - checkpoint_snap_min = txn_global->checkpoint_snap_min; oldest_id = txn_global->oldest_id; + WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen); + checkpoint_pinned = txn_global->checkpoint_pinned; /* * Checkpoint transactions often fall behind ordinary application @@ -129,17 +130,13 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * checkpoint, or this handle is up to date with the active checkpoint * then it's safe to ignore the checkpoint ID in the visibility check. */ - if (checkpoint_snap_min != WT_TXN_NONE && - checkpoint_id != session->id && (btree == NULL || - btree->checkpoint_gen != txn_global->checkpoint_gen) && - WT_TXNID_LT(checkpoint_snap_min, oldest_id)) - /* - * Use the checkpoint ID for the visibility check if it is the - * oldest ID in the system. - */ - oldest_id = checkpoint_snap_min; + if (checkpoint_pinned == WT_TXN_NONE || + WT_TXNID_LT(oldest_id, checkpoint_pinned) || + WT_SESSION_IS_CHECKPOINT(session) || + (btree != NULL && btree->checkpoint_gen == checkpoint_gen)) + return (oldest_id); - return (oldest_id); + return (checkpoint_pinned); } /* @@ -355,7 +352,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) if (!F_ISSET(txn, WT_TXN_HAS_ID)) { conn = S2C(session); txn_global = &conn->txn_global; - txn_state = &txn_global->states[session->id]; + txn_state = WT_SESSION_TXN_STATE(session); WT_ASSERT(session, txn_state->id == WT_TXN_NONE); @@ -447,7 +444,7 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session) txn = &session->txn; txn_global = &S2C(session)->txn_global; - txn_state = &txn_global->states[session->id]; + txn_state = WT_SESSION_TXN_STATE(session); /* * If there is no transaction running (so we don't have an ID), and no diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 77cccfcf9d3..096fea3eeb3 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1721,8 +1721,9 @@ struct __wt_connection { * handles open before the file manager will look for handles to close., * an integer greater than or equal to 0; default \c 250.} * @config{ close_idle_time, amount of time in - * seconds a file handle needs to be idle before attempting to close - * it., an integer between 1 and 100000; default \c 30.} + * seconds a file handle needs to be idle before attempting to close it. + * A setting of 0 means that idle handles are not closed., an integer + * between 0 and 100000; default \c 30.} * @config{ close_scan_interval, interval in * seconds at which to check for files that are inactive and close * them., an integer between 1 and 100000; default \c 10.} @@ -2152,11 +2153,12 @@ struct __wt_connection { * before the file manager will look for handles to close., an integer greater * than or equal to 0; default \c 250.} * @config{ close_idle_time, amount of time in seconds a - * file handle needs to be idle before attempting to close it., an integer - * between 1 and 100000; default \c 30.} - * @config{ close_scan_interval, interval in seconds at - * which to check for files that are inactive and close them., an integer - * between 1 and 100000; default \c 10.} + * file handle needs to be idle before attempting to close it. A setting of 0 + * means that idle handles are not closed., an integer between 0 and 100000; + * default \c 30.} + * @config{ close_scan_interval, interval + * in seconds at which to check for files that are inactive and close them., an + * integer between 1 and 100000; default \c 10.} * @config{ ),,} * @config{hazard_max, maximum number of simultaneous hazard pointers per * session handle., an integer greater than or equal to 15; default \c 1000.} @@ -3682,150 +3684,148 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_DH_SESSION_SWEEPS 1069 /*! connection: files currently open */ #define WT_STAT_CONN_FILE_OPEN 1070 -/*! log: log buffer size increases */ -#define WT_STAT_CONN_LOG_BUFFER_GROW 1071 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1072 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1071 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1073 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1072 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1074 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1073 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1075 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1074 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1076 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1075 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1077 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1076 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1078 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1077 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1079 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1078 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1080 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1079 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1081 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1080 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1082 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1081 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1083 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1082 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1084 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1083 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1085 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1084 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1086 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1085 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1087 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1086 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1088 +#define WT_STAT_CONN_LOG_SCANS 1087 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1089 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1088 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1090 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1089 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1091 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1090 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1092 +#define WT_STAT_CONN_LOG_SLOT_RACES 1091 /*! log: slots selected for switching that were unavailable */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1093 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1092 /*! log: record size exceeded maximum */ -#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1094 +#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1093 /*! log: failed to find a slot large enough for record */ -#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1095 +#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1094 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1096 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1095 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1097 +#define WT_STAT_CONN_LOG_SYNC 1096 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1098 +#define WT_STAT_CONN_LOG_SYNC_DIR 1097 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1099 +#define WT_STAT_CONN_LOG_WRITE_LSN 1098 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1100 +#define WT_STAT_CONN_LOG_WRITES 1099 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1101 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1100 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1102 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1101 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1103 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1102 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1104 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1103 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1105 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1104 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1106 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1105 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1107 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1106 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1108 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1107 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1109 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1108 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1110 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1109 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1111 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1110 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1112 +#define WT_STAT_CONN_MEMORY_FREE 1111 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1113 +#define WT_STAT_CONN_MEMORY_GROW 1112 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1114 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1113 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1115 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1114 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1116 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1115 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1117 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1116 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1118 +#define WT_STAT_CONN_PAGE_SLEEP 1117 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1119 +#define WT_STAT_CONN_READ_IO 1118 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1120 +#define WT_STAT_CONN_REC_PAGES 1119 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1121 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1120 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1122 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1121 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1123 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1122 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1124 +#define WT_STAT_CONN_RWLOCK_READ 1123 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1125 +#define WT_STAT_CONN_RWLOCK_WRITE 1124 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1126 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1125 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1127 +#define WT_STAT_CONN_SESSION_OPEN 1126 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1128 +#define WT_STAT_CONN_TXN_BEGIN 1127 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1129 +#define WT_STAT_CONN_TXN_CHECKPOINT 1128 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1130 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1129 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1131 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1130 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1132 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1131 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1133 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1132 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1134 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1133 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1135 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1134 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1136 +#define WT_STAT_CONN_TXN_COMMIT 1135 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1137 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1136 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1138 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1137 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1139 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1138 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1140 +#define WT_STAT_CONN_TXN_ROLLBACK 1139 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1141 +#define WT_STAT_CONN_TXN_SYNC 1140 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1142 +#define WT_STAT_CONN_WRITE_IO 1141 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 7776b68e3d2..77ae0383cbe 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -577,7 +577,6 @@ __log_file_header( tmp.slot_fh = fh; } else { WT_ASSERT(session, fh == NULL); - log->prep_missed++; WT_ERR(__log_acquire(session, logrec->len, &tmp)); } WT_ERR(__log_fill(session, &myslot, 1, buf, NULL)); @@ -777,25 +776,28 @@ __wt_log_allocfile( WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; + uint32_t tmp_id; conn = S2C(session); log = conn->log; log_fh = NULL; + /* * Preparing a log file entails creating a temporary file: * - Writing the header. * - Truncating to the offset of the first record. * - Pre-allocating the file if needed. - * - Renaming it to the pre-allocated file name. + * - Renaming it to the desired file name. */ WT_RET(__wt_scr_alloc(session, 0, &from_path)); WT_ERR(__wt_scr_alloc(session, 0, &to_path)); - WT_ERR(__log_filename(session, lognum, WT_LOG_TMPNAME, from_path)); + tmp_id = WT_ATOMIC_ADD4(log->tmp_fileid, 1); + WT_ERR(__log_filename(session, tmp_id, WT_LOG_TMPNAME, from_path)); WT_ERR(__log_filename(session, lognum, dest, to_path)); /* * Set up the temporary file. */ - WT_ERR(__log_openfile(session, 1, &log_fh, WT_LOG_TMPNAME, lognum)); + WT_ERR(__log_openfile(session, 1, &log_fh, WT_LOG_TMPNAME, tmp_id)); WT_ERR(__log_file_header(session, log_fh, NULL, 1)); WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); if (prealloc) @@ -1245,9 +1247,12 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created) /* * If we need to create the log file, do so now. */ - if (create_log && (ret = __wt_log_allocfile( - session, log->fileid, WT_LOG_FILENAME, 0)) != 0) - return (ret); + if (create_log) { + log->prep_missed++; + if ((ret = __wt_log_allocfile( + session, log->fileid, WT_LOG_FILENAME, 0)) != 0) + return (ret); + } WT_RET(__log_openfile(session, 0, &log->log_fh, WT_LOG_FILENAME, log->fileid)); /* @@ -1811,11 +1816,6 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, session, record, lsnp, flags)) == EAGAIN) ; WT_ERR(ret); - /* - * Increase the buffer size of any slots we can get access - * to, so future consolidations are likely to succeed. - */ - WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len)); return (0); } WT_ERR(ret); diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 741a8caf108..8723d492e13 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -54,13 +54,18 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) * Allocate memory for buffers now that the arrays are setup. Split * this out to make error handling simpler. */ + /* + * Cap the slot buffer to the log file size. + */ + log->slot_buf_size = (uint32_t)WT_MIN( + conn->log_file_max, WT_LOG_SLOT_BUF_SIZE); for (i = 0; i < WT_SLOT_POOL; i++) { WT_ERR(__wt_buf_init(session, - &log->slot_pool[i].slot_buf, WT_LOG_SLOT_BUF_INIT_SIZE)); + &log->slot_pool[i].slot_buf, (size_t)log->slot_buf_size)); F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); } WT_STAT_FAST_CONN_INCRV(session, - log_buffer_size, WT_LOG_SLOT_BUF_INIT_SIZE * WT_SLOT_POOL); + log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); if (0) { err: while (--i >= 0) __wt_buf_free(session, &log->slot_pool[i].slot_buf); @@ -101,12 +106,16 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, WT_LOG *log; WT_LOGSLOT *slot; int64_t new_state, old_state; - uint32_t allocated_slot, slot_grow_attempts; + uint32_t allocated_slot, slot_attempts; conn = S2C(session); log = conn->log; - slot_grow_attempts = 0; + slot_attempts = 0; + if (mysize >= (uint64_t)log->slot_buf_size) { + WT_STAT_FAST_CONN_INCR(session, log_slot_toobig); + return (ENOMEM); + } find_slot: #if WT_SLOT_ACTIVE == 1 allocated_slot = 0; @@ -146,12 +155,11 @@ join_slot: goto find_slot; } /* - * If the slot buffer isn't big enough to hold this update, mark - * the slot for a buffer size increase and find another slot. + * If the slot buffer isn't big enough to hold this update, try + * to find another slot. */ if (new_state > (int64_t)slot->slot_buf.memsize) { - F_SET(slot, WT_SLOT_BUF_GROW); - if (++slot_grow_attempts > 5) { + if (++slot_attempts > 5) { WT_STAT_FAST_CONN_INCR(session, log_slot_toosmall); return (ENOMEM); } @@ -310,24 +318,8 @@ __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size) int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) { - WT_DECL_RET; - ret = 0; - /* - * Grow the buffer if needed before returning it to the pool. - */ - if (F_ISSET(slot, WT_SLOT_BUF_GROW)) { - WT_STAT_FAST_CONN_INCR(session, log_buffer_grow); - WT_STAT_FAST_CONN_INCRV(session, - log_buffer_size, slot->slot_buf.memsize); - WT_ERR(__wt_buf_grow(session, - &slot->slot_buf, slot->slot_buf.memsize * 2)); - } -err: - /* - * No matter if there is an error, we always want to free - * the slot back to the pool. - */ + WT_UNUSED(session); /* * Make sure flags don't get retained between uses. * We have to reset them them here because multiple threads may @@ -335,62 +327,5 @@ err: */ slot->flags = WT_SLOT_INIT_FLAGS; slot->slot_state = WT_LOG_SLOT_FREE; - return (ret); -} - -/* - * __wt_log_slot_grow_buffers -- - * Increase the buffer size of all available slots in the buffer pool. - * Go to some lengths to include active (but unused) slots to handle - * the case where all log write record sizes exceed the size of the - * active buffer. - */ -int -__wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_LOG *log; - WT_LOGSLOT *slot; - int64_t orig_state; - uint64_t old_size, total_growth; - int i; - - conn = S2C(session); - log = conn->log; - total_growth = 0; - WT_STAT_FAST_CONN_INCR(session, log_buffer_grow); - /* - * Take the log slot lock to prevent other threads growing buffers - * at the same time. Could tighten the scope of this lock, or have - * a separate lock if there is contention. - */ - __wt_spin_lock(session, &log->log_slot_lock); - for (i = 0; i < WT_SLOT_POOL; i++) { - slot = &log->slot_pool[i]; - - /* Don't keep growing unrelated buffers. */ - if (slot->slot_buf.memsize > (10 * newsize) && - !F_ISSET(slot, WT_SLOT_BUF_GROW)) - continue; - - /* Avoid atomic operations if they won't succeed. */ - orig_state = slot->slot_state; - if ((orig_state != WT_LOG_SLOT_FREE && - orig_state != WT_LOG_SLOT_READY) || - !WT_ATOMIC_CAS8( - slot->slot_state, orig_state, WT_LOG_SLOT_PENDING)) - continue; - - /* We have a slot - now go ahead and grow the buffer. */ - old_size = slot->slot_buf.memsize; - F_CLR(slot, WT_SLOT_BUF_GROW); - WT_ERR(__wt_buf_grow(session, &slot->slot_buf, - WT_MAX(slot->slot_buf.memsize * 2, newsize))); - slot->slot_state = orig_state; - total_growth += slot->slot_buf.memsize - old_size; - } -err: __wt_spin_unlock(session, &log->log_slot_lock); - WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, total_growth); - return (ret); + return (0); } diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index 5946f81290d..c887af58540 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -15,7 +15,7 @@ */ typedef struct __wt_meta_track { enum { - WT_ST_EMPTY, /* Unused slot */ + WT_ST_EMPTY = 0, /* Unused slot */ WT_ST_CHECKPOINT, /* Complete a checkpoint */ WT_ST_DROP_COMMIT, /* Drop post commit */ WT_ST_FILEOP, /* File operation */ @@ -67,6 +67,35 @@ __meta_track_next(WT_SESSION_IMPL *session, WT_META_TRACK **trkp) } /* + * __meta_track_clear -- + * Clear the structure. + */ +static void +__meta_track_clear(WT_SESSION_IMPL *session, WT_META_TRACK *trk) +{ + __wt_free(session, trk->a); + __wt_free(session, trk->b); + memset(trk, 0, sizeof(WT_META_TRACK)); +} + +/* + * __meta_track_err -- + * Drop the last operation off the end of the list, something went wrong + * during initialization. + */ +static void +__meta_track_err(WT_SESSION_IMPL *session) +{ + WT_META_TRACK *trk; + + trk = session->meta_track_next; + --trk; + __meta_track_clear(session, trk); + + session->meta_track_next = trk; +} + +/* * __wt_meta_track_discard -- * Cleanup metadata tracking when closing a session. */ @@ -185,10 +214,7 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll) WT_ILLEGAL_VALUE(session); } -free: trk->op = WT_ST_EMPTY; - __wt_free(session, trk->a); - __wt_free(session, trk->b); - trk->dhandle = NULL; +free: __meta_track_clear(session, trk); return (ret); } @@ -346,14 +372,17 @@ __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) { + WT_DECL_RET; WT_META_TRACK *trk; WT_RET(__meta_track_next(session, &trk)); trk->op = WT_ST_REMOVE; - WT_RET(__wt_strdup(session, key, &trk->a)); - + WT_ERR(__wt_strdup(session, key, &trk->a)); return (0); + +err: __meta_track_err(session); + return (ret); } /* @@ -369,7 +398,7 @@ __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_RET(__meta_track_next(session, &trk)); trk->op = WT_ST_SET; - WT_RET(__wt_strdup(session, key, &trk->a)); + WT_ERR(__wt_strdup(session, key, &trk->a)); /* * If there was a previous value, keep it around -- if not, then this @@ -380,6 +409,10 @@ __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) trk->op = WT_ST_REMOVE; ret = 0; } + WT_ERR(ret); + return (0); + +err: __meta_track_err(session); return (ret); } @@ -391,14 +424,18 @@ int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri) { + WT_DECL_RET; WT_META_TRACK *trk; WT_RET(__meta_track_next(session, &trk)); trk->op = WT_ST_FILEOP; - WT_RET(__wt_strdup(session, olduri, &trk->a)); - WT_RET(__wt_strdup(session, newuri, &trk->b)); + WT_ERR(__wt_strdup(session, olduri, &trk->a)); + WT_ERR(__wt_strdup(session, newuri, &trk->b)); return (0); + +err: __meta_track_err(session); + return (ret); } /* @@ -409,13 +446,17 @@ int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename) { + WT_DECL_RET; WT_META_TRACK *trk; WT_RET(__meta_track_next(session, &trk)); trk->op = WT_ST_DROP_COMMIT; - WT_RET(__wt_strdup(session, filename, &trk->a)); + WT_ERR(__wt_strdup(session, filename, &trk->a)); return (0); + +err: __meta_track_err(session); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index c69344cb6b6..703bebb1597 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -29,7 +29,7 @@ typedef struct { /* Track whether all changes to the page are written. */ uint64_t max_txn; - uint64_t skipped_txn; + uint64_t first_dirty_txn; uint32_t orig_write_gen; /* @@ -162,7 +162,7 @@ typedef struct { * be evicted as new, in-memory pages, restoring the updates on * those pages. */ - WT_UPD_SKIPPED *skip; /* Skipped updates */ + WT_UPD_SKIPPED *skip; /* Skipped updates */ uint32_t skip_next; size_t skip_allocated; @@ -363,6 +363,19 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_STAT_FAST_DATA_INCR(session, rec_pages_eviction); } +#ifdef HAVE_DIAGNOSTIC + { + /* + * Check that transaction time always moves forward for a given page. + * If this check fails, reconciliation can free something that a future + * reconciliation will need. + */ + uint64_t oldest_id = __wt_txn_oldest_id(session); + WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id)); + mod->last_oldest_id = oldest_id; + } +#endif + /* Record the most recent transaction ID we will *not* write. */ mod->disk_snap_min = session->txn.snap_min; @@ -689,7 +702,7 @@ __rec_write_init(WT_SESSION_IMPL *session, * Running transactions may update the page after we write it, so * this is the highest ID we can be confident we will see. */ - r->skipped_txn = S2C(session)->txn_global.last_running; + r->first_dirty_txn = S2C(session)->txn_global.last_running; return (0); } @@ -838,6 +851,7 @@ static inline int __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, WT_ROW *rip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp) { + WT_DECL_RET; WT_ITEM ovfl; WT_PAGE *page; WT_UPDATE *upd, *upd_list, *upd_ovfl; @@ -850,12 +864,17 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, page = r->page; /* - * If we're called with an WT_INSERT reference, use its WT_UPDATE - * list, else is an on-page row-store WT_UPDATE list. + * If called with a WT_INSERT item, use its WT_UPDATE list (which must + * exist), otherwise check for an on-page row-store WT_UPDATE list + * (which may not exist). Return immediately if the item has no updates. */ - upd_list = ins == NULL ? WT_ROW_UPDATE(page, rip) : ins->upd; - skipped = 0; + if (ins == NULL) { + if ((upd_list = WT_ROW_UPDATE(page, rip)) == NULL) + return (0); + } else + upd_list = ins->upd; + skipped = 0; for (max_txn = WT_TXN_NONE, min_txn = UINT64_MAX, upd = upd_list; upd != NULL; upd = upd->next) { if ((txnid = upd->txnid) == WT_TXN_ABORTED) @@ -866,9 +885,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, max_txn = txnid; if (WT_TXNID_LT(txnid, min_txn)) min_txn = txnid; - if (WT_TXNID_LT(txnid, r->skipped_txn) && + if (WT_TXNID_LT(txnid, r->first_dirty_txn) && !__wt_txn_visible_all(session, txnid)) - r->skipped_txn = txnid; + r->first_dirty_txn = txnid; /* * Record whether any updates were skipped on the way to finding @@ -898,15 +917,15 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, r->max_txn = max_txn; /* - * If all updates are globally visible and no updates were skipped, the + * If no updates were skipped and all updates are globally visible, the * page can be marked clean and we're done, regardless of whether we're * evicting or checkpointing. * - * The oldest transaction ID may have moved while we were scanning the - * page, so it is possible to skip an update but then find that by the - * end of the scan, all updates are stable. + * We have to check both: the oldest transaction ID may have moved while + * we were scanning the update list, so it is possible to skip an update + * but then find that by the end of the scan, all updates are stable. */ - if (__wt_txn_visible_all(session, max_txn) && !skipped) + if (!skipped && __wt_txn_visible_all(session, max_txn)) return (0); /* @@ -976,8 +995,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ if (vpack != NULL && vpack->raw == WT_CELL_VALUE_OVFL_RM && !__wt_txn_visible_all(session, min_txn)) { - WT_RET(__wt_ovfl_txnc_search( - page, vpack->data, vpack->size, &ovfl)); + if ((ret = __wt_ovfl_txnc_search( + page, vpack->data, vpack->size, &ovfl)) != 0) + WT_PANIC_RET(session, ret, + "cached overflow item discarded early"); + /* * Create an update structure with an impossibly low transaction * ID and append it to the update list we're about to save. @@ -5064,23 +5086,37 @@ err: __wt_scr_free(session, &tkey); * be set before a subsequent checkpoint reads it, and because the * current checkpoint is waiting on this reconciliation to complete, * there's no risk of that happening). - * - * Otherwise, if no updates were skipped, we have a new maximum - * transaction written for the page (used to decide if a clean page can - * be evicted). The page only might be clean; if the write generation - * is unchanged since reconciliation started, clear it and update cache - * dirty statistics, if the write generation changed, then the page has - * been written since we started reconciliation, it cannot be - * discarded. */ if (r->leave_dirty) { - mod->first_dirty_txn = r->skipped_txn; + mod->first_dirty_txn = r->first_dirty_txn; btree->modified = 1; WT_FULL_BARRIER(); } else { + /* + * If no updates were skipped, we have a new maximum transaction + * written for the page (used to decide if a clean page can be + * evicted). Set the highest transaction ID for the page. + * + * Track the highest transaction ID for the tree (used to decide + * if it's safe to discard all of the pages in the tree without + * further checking). Reconciliation in the service of eviction + * is multi-threaded, only update the tree's maximum transaction + * ID when doing a checkpoint. That's sufficient, we only care + * about the highest transaction ID of any update currently in + * the tree, and checkpoint visits every dirty page in the tree. + */ mod->rec_max_txn = r->max_txn; + if (!F_ISSET(r, WT_EVICTING) && + !WT_TXNID_LT(btree->rec_max_txn, r->max_txn)) + btree->rec_max_txn = r->max_txn; + /* + * The page only might be clean; if the write generation is + * unchanged since reconciliation started, it's clean. If the + * write generation changed, the page has been written since + * we started reconciliation and remains dirty. + */ if (WT_ATOMIC_CAS4(mod->write_gen, r->orig_write_gen, 0)) __wt_cache_dirty_decr(session, page); } diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index c7c47a88f3c..4041a1d7b9f 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -9,6 +9,22 @@ #include "wt_internal.h" /* + * __wt_schema_create_strip -- + * Discard any configuration information from a schema entry that is not + * applicable to an session.create call, here for the wt dump command utility, + * which only wants to dump the schema information needed for load. + */ +int +__wt_schema_create_strip(WT_SESSION_IMPL *session, + const char *v1, const char *v2, char **value_ret) +{ + const char *cfg[] = + { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL }; + + return (__wt_config_collapse(session, cfg, value_ret)); +} + +/* * __wt_direct_io_size_check -- * Return a size from the configuration, complaining if it's insufficient * for direct I/O. diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 4f698806511..ef9735a8b98 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -314,8 +314,10 @@ __wt_open_cursor(WT_SESSION_IMPL *session, * copied. */ if ((*cursorp)->uri == NULL && - (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) + (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) { WT_TRET((*cursorp)->close(*cursorp)); + *cursorp = NULL; + } return (ret); } @@ -381,23 +383,6 @@ err: if (cursor != NULL) } /* - * __wt_session_create_strip -- - * Discard any configuration information from a schema entry that is not - * applicable to an session.create call, here for the wt dump command utility, - * which only wants to dump the schema information needed for load. - */ -int -__wt_session_create_strip(WT_SESSION *wt_session, - const char *v1, const char *v2, char **value_ret) -{ - WT_SESSION_IMPL *session = (WT_SESSION_IMPL *)wt_session; - const char *cfg[] = - { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL }; - - return (__wt_config_collapse(session, cfg, value_ret)); -} - -/* * __session_create -- * WT_SESSION->create method. */ diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 44c2daa3802..0310fdc207c 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -458,7 +458,6 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) stats->log_slot_joins.desc = "log: consolidated slot joins"; stats->log_slot_toosmall.desc = "log: failed to find a slot large enough for record"; - stats->log_buffer_grow.desc = "log: log buffer size increases"; stats->log_bytes_payload.desc = "log: log bytes of payload data"; stats->log_bytes_written.desc = "log: log bytes written"; stats->log_compress_writes.desc = "log: log records compressed"; @@ -631,7 +630,6 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->log_slot_transitions.v = 0; stats->log_slot_joins.v = 0; stats->log_slot_toosmall.v = 0; - stats->log_buffer_grow.v = 0; stats->log_bytes_payload.v = 0; stats->log_bytes_written.v = 0; stats->log_compress_writes.v = 0; diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 432746186fc..c9924056e91 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -98,7 +98,6 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session) WT_ASSERT(session, txn_state->snap_min == WT_TXN_NONE || session->txn.isolation == WT_ISO_READ_UNCOMMITTED || - session->id == S2C(session)->txn_global.checkpoint_id || !__wt_txn_visible_all(session, txn_state->snap_min)); txn_state->snap_min = WT_TXN_NONE; @@ -118,13 +117,13 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_TXN_STATE *s, *txn_state; uint64_t current_id, id; uint64_t prev_oldest_id, snap_min; - uint32_t ckpt_id, i, n, session_cnt; + uint32_t i, n, session_cnt; int32_t count; conn = S2C(session); txn = &session->txn; txn_global = &conn->txn_global; - txn_state = &txn_global->states[session->id]; + txn_state = WT_SESSION_TXN_STATE(session); current_id = snap_min = txn_global->current; prev_oldest_id = txn_global->oldest_id; @@ -157,12 +156,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) /* Walk the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); - ckpt_id = txn_global->checkpoint_id; for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) { - /* Skip the checkpoint transaction; it is never read from. */ - if (i == ckpt_id) - continue; - /* * Build our snapshot of any concurrent transaction IDs. * @@ -221,7 +215,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s; uint64_t current_id, id, oldest_id, prev_oldest_id, snap_min; - uint32_t ckpt_id, i, session_cnt; + uint32_t i, session_cnt; int32_t count; int last_running_moved; @@ -257,12 +251,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) /* Walk the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); - ckpt_id = txn_global->checkpoint_id; for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { - /* Skip the checkpoint transaction; it is never read from. */ - if (i == ckpt_id) - continue; - /* * Update the oldest ID. * @@ -310,15 +299,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) if (WT_TXNID_LT(prev_oldest_id, oldest_id) && WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) { WT_ORDERED_READ(session_cnt, conn->session_cnt); - ckpt_id = txn_global->checkpoint_id; for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { - /* - * Skip the checkpoint transaction; it is never read - * from. - */ - if (i == ckpt_id) - continue; - if ((id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, oldest_id)) oldest_id = id; @@ -408,19 +389,31 @@ __wt_txn_release(WT_SESSION_IMPL *session) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; + int was_oldest; txn = &session->txn; WT_ASSERT(session, txn->mod_count == 0); txn->notify = NULL; txn_global = &S2C(session)->txn_global; - txn_state = &txn_global->states[session->id]; + txn_state = WT_SESSION_TXN_STATE(session); + was_oldest = 0; /* Clear the transaction's ID from the global table. */ - if (F_ISSET(txn, WT_TXN_HAS_ID)) { + if (WT_SESSION_IS_CHECKPOINT(session)) { + WT_ASSERT(session, txn_state->id == WT_TXN_NONE); + txn->id = WT_TXN_NONE; + + /* Clear the global checkpoint transaction IDs. */ + txn_global->checkpoint_id = 0; + txn_global->checkpoint_pinned = WT_TXN_NONE; + } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); + + /* Quick check for the oldest transaction. */ + was_oldest = (txn->id == txn_global->last_running); txn->id = WT_TXN_NONE; } @@ -439,6 +432,14 @@ __wt_txn_release(WT_SESSION_IMPL *session) txn->isolation = session->isolation; /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; + + /* + * When the oldest transaction in the system completes, bump the oldest + * ID. This is racy and so not guaranteed, but in practice it keeps + * the oldest ID from falling too far behind. + */ + if (was_oldest) + __wt_txn_update_oldest(session, 1); } /* @@ -518,6 +519,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) */ __wt_txn_release_snapshot(session); ret = __wt_txn_log_commit(session, cfg); + WT_ASSERT(session, ret == 0); } /* @@ -648,19 +650,19 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) WT_TXN_GLOBAL *txn_global; WT_CONNECTION_IMPL *conn; WT_CONNECTION_STATS *stats; - uint64_t checkpoint_snap_min; + uint64_t checkpoint_pinned; conn = S2C(session); txn_global = &conn->txn_global; stats = &conn->stats; - checkpoint_snap_min = txn_global->checkpoint_snap_min; + checkpoint_pinned = txn_global->checkpoint_pinned; WT_STAT_SET(stats, txn_pinned_range, txn_global->current - txn_global->oldest_id); WT_STAT_SET(stats, txn_pinned_checkpoint_range, - checkpoint_snap_min == WT_TXN_NONE ? - 0 : txn_global->current - checkpoint_snap_min); + checkpoint_pinned == WT_TXN_NONE ? + 0 : txn_global->current - checkpoint_pinned); } /* diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index cfc993418c5..f317a3dc697 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -349,6 +349,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_ISOLATION saved_isolation; + WT_TXN_STATE *txn_state; void *saved_meta_next; u_int i; int full, fullckpt_logging, idle, tracking; @@ -358,6 +359,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); txn = &session->txn; txn_global = &conn->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); saved_isolation = session->isolation; full = fullckpt_logging = idle = tracking = 0; @@ -429,6 +431,22 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__checkpoint_verbose_track(session, "starting transaction", &verb_timer)); + if (full) + WT_ERR(__wt_epoch(session, &start)); + + /* + * Bump the global checkpoint generation, used to figure out whether + * checkpoint has visited a tree. There is no need for this to be + * atomic: it is only written while holding the checkpoint lock. + * + * We do need to update it before clearing the checkpoint's entry out + * of the transaction table, or a thread evicting in a tree could + * ignore the checkpoint's transaction. + */ + ++txn_global->checkpoint_gen; + WT_STAT_FAST_CONN_SET(session, + txn_checkpoint_generation, txn_global->checkpoint_gen); + /* * Start a snapshot transaction for the checkpoint. * @@ -436,30 +454,44 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * side effects on cursors, which applications can hold open across * calls to checkpoint. */ - if (full) - WT_ERR(__wt_epoch(session, &start)); WT_ERR(__wt_txn_begin(session, txn_cfg)); /* Ensure a transaction ID is allocated prior to sharing it globally */ WT_ERR(__wt_txn_id_check(session)); /* - * Save a copy of the checkpoint session ID so that refresh can skip - * the checkpoint transactions. We never do checkpoints in the default - * session with id zero. Save a copy of the snap min so that visibility - * checks for the checkpoint use the right ID. + * Save the checkpoint session ID. We never do checkpoints in the + * default session (with id zero). */ - WT_ASSERT(session, session->id != 0); + WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); txn_global->checkpoint_id = session->id; - txn_global->checkpoint_snap_min = session->txn.snap_min; + + txn_global->checkpoint_pinned = + WT_MIN(txn_state->id, txn_state->snap_min); /* - * No need for this to be atomic it is only written while holding the - * checkpoint lock. + * We're about to clear the checkpoint transaction from the global + * state table so the oldest ID can move forward. Make sure everything + * we've done above is scheduled. */ - txn_global->checkpoint_gen += 1; - WT_STAT_FAST_CONN_SET(session, - txn_checkpoint_generation, txn_global->checkpoint_gen); + WT_FULL_BARRIER(); + + /* + * Sanity check that the oldest ID hasn't moved on before we have + * cleared our entry. + */ + WT_ASSERT(session, + WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && + WT_TXNID_LE(txn_global->oldest_id, txn_state->snap_min)); + + /* + * Clear our entry from the global transaction session table. Any + * operation that needs to know about the ID for this checkpoint will + * consider the checkpoint ID in the global structure. Most operations + * can safely ignore the checkpoint ID (see the visible all check for + * details). + */ + txn_state->id = txn_state->snap_min = WT_TXN_NONE; /* Tell logging that we have started a database checkpoint. */ if (fullckpt_logging) @@ -478,10 +510,6 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Release the snapshot so we aren't pinning pages in cache. */ __wt_txn_release_snapshot(session); - /* Clear the global checkpoint transaction IDs */ - txn_global->checkpoint_id = 0; - txn_global->checkpoint_snap_min = WT_TXN_NONE; - WT_ERR(__checkpoint_verbose_track(session, "committing transaction", &verb_timer)); @@ -558,10 +586,6 @@ err: /* WT_TRET(__wt_txn_rollback(session, NULL)); } - /* Ensure the checkpoint IDs are cleared on the error path. */ - txn_global->checkpoint_id = 0; - txn_global->checkpoint_snap_min = WT_TXN_NONE; - /* * Tell logging that we have finished a database checkpoint. Do not * write a log record if the database was idle. @@ -813,10 +837,8 @@ __checkpoint_worker( force = 1; } if (!btree->modified && !force) { - if (!is_checkpoint) { - F_SET(btree, WT_BTREE_SKIP_CKPT); - goto done; - } + if (!is_checkpoint) + goto nockpt; deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -835,7 +857,12 @@ __checkpoint_worker( (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) && deleted < 2) { - F_SET(btree, WT_BTREE_SKIP_CKPT); +nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, + btree->checkpoint_gen); goto done; } } @@ -853,7 +880,7 @@ __checkpoint_worker( * Hold the lock until we're done (blocking hot backups from starting), * we don't want to race with a future hot backup. */ - __wt_spin_lock(session, &conn->hot_backup_lock); + WT_ERR(__wt_readlock(session, conn->hot_backup_lock)); hot_backup_locked = 1; if (conn->hot_backup) WT_CKPT_FOREACH(ckptbase, ckpt) { @@ -1063,16 +1090,8 @@ fake: /* WT_ERR(__wt_txn_checkpoint_log( session, 0, WT_TXN_LOG_CKPT_STOP, NULL)); - /* - * Update the checkpoint generation for this handle so visible - * updates newer than the checkpoint can be evicted. - */ -done: btree->checkpoint_gen = conn->txn_global.checkpoint_gen; - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); - -err: - /* +done: +err: /* * If the checkpoint didn't complete successfully, make sure the * tree is marked dirty. */ @@ -1080,7 +1099,7 @@ err: btree->modified = 1; if (hot_backup_locked) - __wt_spin_unlock(session, &conn->hot_backup_lock); + WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); __wt_meta_ckptlist_free(session, ckptbase); __wt_free(session, name_alloc); diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index 0ae201ea21e..28d0309242d 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -149,9 +149,9 @@ dump_config(WT_SESSION *session, const char *uri, int hex) /* Open a metadata cursor. */ if ((ret = session->open_cursor( - session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) { - fprintf(stderr, "%s: %s: session.open_cursor: %s\n", - progname, WT_METADATA_URI, session->strerror(session, ret)); + session, "metadata:create", NULL, NULL, &cursor)) != 0) { + fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname, + "metadata:create", session->strerror(session, ret)); return (1); } /* @@ -225,7 +225,7 @@ dump_json_table_begin( { WT_DECL_RET; const char *name; - char *jsonconfig, *stripped; + char *jsonconfig; jsonconfig = NULL; @@ -236,12 +236,7 @@ dump_json_table_begin( } ++name; - if ((ret = - __wt_session_create_strip(session, config, NULL, &stripped)) != 0) - return (util_err(session, ret, NULL)); - ret = dup_json_string(stripped, &jsonconfig); - free(stripped); - if (ret != 0) + if ((ret = dup_json_string(config, &jsonconfig)) != 0) return (util_cerr(cursor, "config dup", ret)); if (printf(" \"%s\" : [\n {\n", uri) < 0) goto eio; @@ -278,7 +273,7 @@ dump_json_table_cg(WT_SESSION *session, WT_CURSOR *cursor, WT_DECL_RET; const char *key, *skip, *value; int exact, once; - char *jsonconfig, *stripped; + char *jsonconfig; static const char * const indent = " "; once = 0; @@ -326,12 +321,7 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) if ((ret = cursor->get_value(cursor, &value)) != 0) return (util_cerr(cursor, "get_value", ret)); - if ((ret = __wt_session_create_strip( - session, value, NULL, &stripped)) != 0) - return (util_err(session, ret, NULL)); - ret = dup_json_string(stripped, &jsonconfig); - free(stripped); - if (ret != 0) + if ((ret = dup_json_string(value, &jsonconfig)) != 0) return (util_cerr(cursor, "config dup", ret)); ret = printf("%s\n" "%s{\n" @@ -362,67 +352,42 @@ dump_json_table_config(WT_SESSION *session, const char *uri) { WT_CURSOR *cursor; WT_DECL_RET; - WT_EXTENSION_API *wtext; int tret; char *value; /* Dump the config. */ - if (WT_PREFIX_MATCH(uri, "table:")) { - /* Open a metadata cursor. */ - if ((ret = session->open_cursor( - session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) { - fprintf(stderr, "%s: %s: session.open_cursor: %s\n", - progname, WT_METADATA_URI, - session->strerror(session, ret)); - return (1); - } + /* Open a metadata cursor. */ + if ((ret = session->open_cursor( + session, "metadata:create", NULL, NULL, &cursor)) != 0) { + fprintf(stderr, "%s: %s: session.open_cursor: %s\n", + progname, "metadata:create", + session->strerror(session, ret)); + return (1); + } - /* - * Search for the object itself, to make sure it - * exists, and get its config string. This where we - * find out a table object doesn't exist, use a simple - * error message. - */ - cursor->set_key(cursor, uri); - if ((ret = cursor->search(cursor)) == 0) { - if ((ret = cursor->get_value(cursor, &value)) != 0) - ret = util_cerr(cursor, "get_value", ret); - else if (dump_json_table_begin( - session, cursor, uri, value) != 0) - ret = 1; - } else if (ret == WT_NOTFOUND) - ret = util_err( - session, 0, "%s: No such object exists", uri); - else - ret = util_err(session, ret, "%s", uri); + /* + * Search for the object itself, to make sure it + * exists, and get its config string. This where we + * find out a table object doesn't exist, use a simple + * error message. + */ + cursor->set_key(cursor, uri); + if ((ret = cursor->search(cursor)) == 0) { + if ((ret = cursor->get_value(cursor, &value)) != 0) + ret = util_cerr(cursor, "get_value", ret); + else if (dump_json_table_begin( + session, cursor, uri, value) != 0) + ret = 1; + } else if (ret == WT_NOTFOUND) + ret = util_err( + session, 0, "%s: No such object exists", uri); + else + ret = util_err(session, ret, "%s", uri); - if ((tret = cursor->close(cursor)) != 0) { - tret = util_cerr(cursor, "close", tret); - if (ret == 0) - ret = tret; - } - } else { - /* - * We want to be able to dump the metadata file itself, but the - * configuration for that file lives in the turtle file. Reach - * down into the library and ask for the file's configuration, - * that will work in all cases. - * - * This where we find out a file object doesn't exist, use a - * simple error message. - */ - wtext = session-> - connection->get_extension_api(session->connection); - if ((ret = - wtext->metadata_search(wtext, session, uri, &value)) == 0) { - if (dump_json_table_begin( - session, NULL, uri, value) != 0) - ret = 1; - } else if (ret == WT_NOTFOUND) - ret = util_err( - session, 0, "%s: No such object exists", uri); - else - ret = util_err(session, ret, "%s", uri); + if ((tret = cursor->close(cursor)) != 0) { + tret = util_cerr(cursor, "close", tret); + if (ret == 0) + ret = tret; } return (ret); @@ -687,17 +652,19 @@ print_config(WT_SESSION *session, { WT_DECL_RET; char *value_ret; + const char *cfg[] = { v1, v2, NULL }; /* - * The underlying call will ignore v2 if v1 is NULL -- check here and - * swap in that case. + * The underlying call will stop if the first string is NULL -- check + * here and swap in that case. */ - if (v1 == NULL) { - v1 = v2; - v2 = NULL; + if (cfg[0] == NULL) { + cfg[0] = cfg[1]; + cfg[1] = NULL; } - if ((ret = __wt_session_create_strip(session, v1, v2, &value_ret)) != 0) + if ((ret = __wt_config_collapse( + (WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0) return (util_err(session, ret, NULL)); ret = printf("%s\n%s\n", key, value_ret); free((char *)value_ret); |