diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2016-03-22 14:55:00 +1100 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2016-03-22 14:55:05 +1100 |
commit | 945022433fa60895a6bf412414a89da5a8c14e8c (patch) | |
tree | d4038459477a402d62a5608e69db992be9fc2011 | |
parent | 8a5eb4206775a9497a44f01e9108743cc3cd25c4 (diff) | |
download | mongo-945022433fa60895a6bf412414a89da5a8c14e8c.tar.gz |
Import wiredtiger-wiredtiger-2.7.0-1122-g9cf8eb2.tar.gz from wiredtiger branch mongodb-3.4
ref: 444981a..9cf8eb2
SERVER-23040 Coverity analysis defect 98151: Dereference after null check
WT-2123 Don't clear allocated memory if not required
WT-2318 Configurable thread wake up time
WT-2322 Join cursor with isolation read-uncommitted may give different results with Bloom filters
WT-2345 Evicting tiny pages creates small pages on disk
WT-2375 Add tests for collators
WT-2381 Dump utility discards table config
WT-2384 lt, le conditions for ordering cursor in join cursor
WT-2391 De-prioritize eviction from indexes
WT-2404 Add streaming pack/unpack methods to the extension API
WT-2414 Avoid extractor calls for ordering cursor in join cursor
WT-2418 Rebalance operation failing with EBUSY
WT-2426 Deadlock caused by recent changes to checkpoint handle locking
WT-2431 Join statistics documentation needed
WT-2435 __wt_evict_file_exclusive_on/off cleanups
WT-2436 lt, le conditions for ref cursor with "strategy=bloom" in join cursor
WT-2443 Getting statistic for all indexes used in join cursor
WT-2444 Broken flag test in wtperf, whitespace
WT-2447 Join cursor reads main table
WT-2448 Add no_scale flag to relevant statistics
WT-2449 Configure should check for a 64-bit build
WT-2451 Allow eviction of metadata
WT-2454 checkpoint_sync=false does *not* prevent flushes/sync to disk.
WT-2456 Update Power8 CRC32 Code
WT-2457 Dropping an LSM table can fail with EBUSY when no user ops are active
WT-2459 Allow Configure scripts to provide the --tag option for libtool when compiling on PPC
WT-2460 Checkpoint failing with WT_ROLLBACK
WT-2461 Python sweep01 test failing
WT-2463 Test that measures idle CPU usage fails under valgrind
WT-2464 Valgrind errors.
WT-2465 Coverity 1352899: Dereference before null check
WT-2466 Coverity 1352893 Buffer not null terminated
WT-2467 Coverity 1352894: Logically dead code
WT-2468 Coverity 1352896: Explicit null dereferenced
WT-2469 Coverity 1352897: Integer overflowed argument
WT-2470 Coverity 1352898: Resource leak
WT-2471 Review WiredTiger "int" printf formats
WT-2473 MSVC doesn't support PRId64
WT-2475 Have reconf script remove cached configure results
WT-2476 btree->evict_lock is being accessed after being destroyed
WT-2477 Missing define in Windows wiredtiger_config.h
WT-2478 Valgrind test failures
WT-2481 Recent changes affect LSM performance
WT-2482 Coverity 1353015, 1353016, out-of-bounds access
WT-2483 readonly02 periodically fails
WT-2484 Coverity 1345809: unchecked return value
WT-2485 Test/format failure with Floating point exception
WT-2487 Release memory in manydbs test
WT-2489 Fix compiler warnings from /test/manydbs
WT-2490 search_near() returns wrong key for column-store
WT-2492 Windows test_config04.test_config04.test_invalid_config crashes
WT-2493 Verbose lsm_manager unsupported
WT-2494 Review calls to __wt_free, plus minor bug in an error path.
WT-2495 Missing memory initialization leads to crash on Windows
WT-2496 Testing revealed error unable to read root page
WT-2497 Enhance test/format to save a copy of backup
WT-2498 LSM tree drop hangs when a user cursor is open
WT-2499 LSM shutdown race causes segfault
WT-2501 Dropping a just opened LSM tree isn't safe
WT-2502 Memory leak in locking handles for checkpoint
WT-2503 Build warning in lsm_tree.c
WT-2506 Using an uninitialised value
135 files changed, 4213 insertions, 1433 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 5755e22dd2f..340c400ba7e 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -1668,7 +1668,7 @@ execute_workload(CONFIG *cfg) for (threads = cfg->workers, i = 0, workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) { lprintf(cfg, 0, 1, - "Starting workload #%d: %" PRId64 " threads, inserts=%" + "Starting workload #%u: %" PRId64 " threads, inserts=%" PRId64 ", reads=%" PRId64 ", updates=%" PRId64 ", truncate=%" PRId64 ", throttle=%" PRId64, i + 1, workp->threads, workp->insert, @@ -2267,7 +2267,7 @@ main(int argc, char *argv[]) * the compact operation, but not for the workloads. */ if (cfg->async_threads > 0) { - if (F_ISSET(cfg, CFG_TRUNCATE) > 0) { + if (F_ISSET(cfg, CFG_TRUNCATE)) { lprintf(cfg, 1, 0, "Cannot run truncate and async\n"); goto err; } @@ -2285,7 +2285,7 @@ main(int argc, char *argv[]) req_len = strlen(",async=(enabled=true,threads=)") + 4; cfg->async_config = dcalloc(req_len, 1); snprintf(cfg->async_config, req_len, - ",async=(enabled=true,threads=%d)", + ",async=(enabled=true,threads=%" PRIu32 ")", cfg->async_threads); } if ((ret = config_compress(cfg)) != 0) diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index c591499b907..a2b497b3142 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -337,7 +337,7 @@ generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno) static inline void extract_key(char *key_buf, uint64_t *keynop) { - sscanf(key_buf, "%" SCNu64, keynop); + (void)sscanf(key_buf, "%" SCNu64, keynop); } /* @@ -370,11 +370,11 @@ dmalloc(size_t len) * Call calloc, dying on failure. */ static inline void * -dcalloc(size_t num, size_t len) +dcalloc(size_t num, size_t size) { void *p; - if ((p = calloc(len, num)) == NULL) + if ((p = calloc(num, size)) == NULL) die(errno, "calloc"); return (p); } @@ -416,11 +416,9 @@ static inline char * dstrndup(const char *str, const size_t len) { char *p; - p = dcalloc(len + 1, 1); - strncpy(p, str, len); - if (p == NULL) - die(errno, "dstrndup"); + p = dcalloc(len + 1, sizeof(char)); + memcpy(p, str, len); return (p); } #endif diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 14258666d84..4e1f829c0c5 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -6,6 +6,7 @@ # If the directory exists, it is added to AUTO_SUBDIRS. # If a condition is included, the subdir is made conditional via AM_CONDITIONAL ext/collators/reverse +ext/collators/revint ext/compressors/lz4 LZ4 ext/compressors/nop ext/compressors/snappy SNAPPY @@ -30,6 +31,7 @@ test/cursor_order test/fops test/format test/huge +test/manydbs test/packing test/readonly test/recovery diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in index 06d73e2fe12..9251873be73 100644 --- a/src/third_party/wiredtiger/build_posix/configure.ac.in +++ b/src/third_party/wiredtiger/build_posix/configure.ac.in @@ -34,6 +34,22 @@ AC_PROG_CC(cc gcc) AC_PROG_CXX(c++ g++) AM_PROG_AS(as gas) +# This is a workaround as part of WT-2459. Currently, clang (v3.7) does not +# support compiling the ASM code we have to perform the CRC checks on PowerPC. +# To compile with clang we need to override the ASM compiler with CCAS to use +# gcc. Unfortunately, doing the compilation in this manner means libtool can't +# determine what tag to use for that one .S file. If we catch that we are using +# two different compilers for CC and CCAS and we are on a PowerPC system we +# overload the libtool flags to provide CC by default. +if test "$CC" != "$CCAS"; then + AS_CASE([$host_cpu], + [ppc64*], [AM_LIBTOOLFLAGS+="--tag=CC"], + [elf64lppc], [AM_LIBTOOLFLAGS+="--tag=CC"], + [powerpc*], [AM_LIBTOOLFLAGS+="--tag=CC"], + []) +fi +AC_SUBST(AM_LIBTOOLFLAGS) + if test "$GCC" = "yes"; then # The Solaris gcc compiler gets the additional -pthreads flag. if test "`uname -s`" = "SunOS"; then @@ -97,6 +113,13 @@ AC_SYS_LARGEFILE AC_C_BIGENDIAN +AC_MSG_CHECKING([for a 64-bit build]) +AC_COMPUTE_INT(ac_cv_sizeof_void_p, [sizeof(void *)]) +if test "$ac_cv_sizeof_void_p" != "8" ; then + AC_MSG_ERROR([WiredTiger requires a 64-bit build.]) +fi +AC_MSG_RESULT(yes) + # Linux requires _GNU_SOURCE to be defined case "$host_os" in linux*) AM_CFLAGS="$AM_CFLAGS -D_GNU_SOURCE" ;; diff --git a/src/third_party/wiredtiger/build_posix/reconf b/src/third_party/wiredtiger/build_posix/reconf index 8700c5da43d..16d4002d9b9 100755 --- a/src/third_party/wiredtiger/build_posix/reconf +++ b/src/third_party/wiredtiger/build_posix/reconf @@ -24,6 +24,7 @@ clean() aclocal.m4 \ auto-includes.chk \ autom4te.cache \ + config.cache \ config.hin \ config.hin~ \ config.log \ diff --git a/src/third_party/wiredtiger/build_win/filelist.win b/src/third_party/wiredtiger/build_win/filelist.win index 0a313026793..b6a9caf4a74 100644 --- a/src/third_party/wiredtiger/build_win/filelist.win +++ b/src/third_party/wiredtiger/build_win/filelist.win @@ -155,6 +155,7 @@ src/session/session_compact.c src/session/session_dhandle.c src/session/session_salvage.c src/support/cksum.c +src/support/cond_auto.c src/support/crypto.c src/support/err.c src/support/filename.c diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 5575bd9f790..02aee1e8825 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -76,12 +76,12 @@ lsm_config = [ Config('bloom', 'true', r''' create bloom filters on LSM tree chunks as they are merged''', type='boolean'), - Config('bloom_config', '', r''' - config string used when creating Bloom filter files, passed - to WT_SESSION::create'''), Config('bloom_bit_count', '16', r''' the number of bits used per item for LSM bloom filters''', min='2', max='1000'), + Config('bloom_config', '', r''' + config string used when creating Bloom filter files, passed + to WT_SESSION::create'''), Config('bloom_hash_count', '8', r''' the number of hash values per item used for LSM bloom filters''', @@ -299,6 +299,15 @@ file_meta = file_config + [ the file version'''), ] +lsm_meta = file_config + lsm_config + [ + Config('last', '', r''' + the last allocated chunk ID'''), + Config('chunks', '', r''' + active chunks in the LSM tree'''), + Config('old_chunks', '', r''' + obsolete chunks in the LSM tree'''), +] + table_only_config = [ Config('colgroups', '', r''' comma-separated list of names of column groups. Each column @@ -741,12 +750,16 @@ cursor_runtime_config = [ ] methods = { -'file.meta' : Method(file_meta), - 'colgroup.meta' : Method(colgroup_meta), +'file.config' : Method(file_config), + +'file.meta' : Method(file_meta), + 'index.meta' : Method(index_meta), +'lsm.meta' : Method(lsm_meta), + 'table.meta' : Method(table_meta), 'WT_CURSOR.close' : Method([]), diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 4ed7d7e3beb..350e0c50087 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -153,6 +153,7 @@ src/session/session_compact.c src/session/session_dhandle.c src/session/session_salvage.c src/support/cksum.c +src/support/cond_auto.c src/support/crypto.c src/support/err.c src/support/filename.c diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 2caaddcc15a..6762521ca76 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -758,6 +758,8 @@ majorp malloc marshall marshalled +maxcpu +maxdbs mbll mbss mem @@ -770,6 +772,7 @@ memset memsize metaconf metadata +metadata's metafile mfence minorp @@ -806,6 +809,7 @@ nfilename nhex nlpo nocase +noclear nocrypto nolock nonliteral @@ -844,8 +848,11 @@ parserp patchp pathname pathnames +pclose +pcpu perf pfx +popen poptable popthreads portably @@ -871,6 +878,7 @@ ps psp pthread ptr +ptrdiff pushms putK putV @@ -908,6 +916,7 @@ resize resizing ret retp +revint rf rle rmw @@ -988,6 +997,7 @@ t's tV tablename tcbench +td testutil th tid diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 09e5643a5d6..bd951e64999 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -109,6 +109,8 @@ connection_stats = [ ########################################## # System statistics ########################################## + ConnStat('cond_auto_wait', 'auto adjusting condition wait calls'), + ConnStat('cond_auto_wait_reset', 'auto adjusting condition resets'), ConnStat('cond_wait', 'pthread mutex condition wait calls'), ConnStat('file_open', 'files currently open', 'no_clear,no_scale'), ConnStat('memory_allocation', 'memory allocations'), @@ -124,7 +126,7 @@ connection_stats = [ ########################################## AsyncStat('async_alloc_race', 'number of allocation state races'), AsyncStat('async_alloc_view', 'number of operation slots viewed for allocation'), - AsyncStat('async_cur_queue', 'current work queue length'), + AsyncStat('async_cur_queue', 'current work queue length', 'no_scale'), AsyncStat('async_flush', 'number of flush calls'), AsyncStat('async_full', 'number of times operation allocation failed'), AsyncStat('async_max_queue', 'maximum work queue length', 'no_clear,no_scale'), @@ -218,6 +220,8 @@ connection_stats = [ LogStat('log_compress_write_fails', 'log records not compressed'), LogStat('log_compress_writes', 'log records compressed'), LogStat('log_flush', 'log flush operations'), + LogStat('log_force_write', 'log force write operations'), + LogStat('log_force_write_skip', 'log force write operations skipped'), LogStat('log_max_filesize', 'maximum log file size', 'no_clear,no_scale,size'), LogStat('log_prealloc_files', 'pre-allocated log files prepared'), LogStat('log_prealloc_max', 'number of pre-allocated log files to create', 'no_clear,no_scale'), @@ -238,6 +242,7 @@ connection_stats = [ LogStat('log_sync', 'log sync operations'), LogStat('log_sync_dir', 'log sync_dir operations'), LogStat('log_write_lsn', 'log server thread advances write LSN'), + LogStat('log_write_lsn_skip', 'log server thread write LSN walk skipped'), LogStat('log_writes', 'log write operations'), LogStat('log_zero_fills', 'log files manually zero-filled'), @@ -397,7 +402,7 @@ dsrc_stats = [ BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'), BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'), BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'), - BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'size'), + BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'), BlockStat('block_size', 'file size in bytes', 'no_scale,size'), ########################################## diff --git a/src/third_party/wiredtiger/examples/c/ex_async.c b/src/third_party/wiredtiger/examples/c/ex_async.c index 584c3e54b87..ecdbd2f4fea 100644 --- a/src/third_party/wiredtiger/examples/c/ex_async.c +++ b/src/third_party/wiredtiger/examples/c/ex_async.c @@ -218,7 +218,7 @@ main(void) */ ret = conn->close(conn, NULL); - printf("Searched for %d keys\n", ex_asynckeys.num_keys); + printf("Searched for %" PRIu32 " keys\n", ex_asynckeys.num_keys); return (ret); } diff --git a/src/third_party/wiredtiger/examples/c/ex_config_parse.c b/src/third_party/wiredtiger/examples/c/ex_config_parse.c index 124eff21130..be3c78bedd4 100644 --- a/src/third_party/wiredtiger/examples/c/ex_config_parse.c +++ b/src/third_party/wiredtiger/examples/c/ex_config_parse.c @@ -30,6 +30,7 @@ * configuration strings. */ +#include <inttypes.h> #include <stdio.h> #include <string.h> @@ -99,7 +100,7 @@ main(void) while ((ret = parser->next(parser, &k, &v)) == 0) { printf("%.*s:", (int)k.len, k.str); if (v.type == WT_CONFIG_ITEM_NUM) - printf("%d\n", (int)v.val); + printf("%" PRId64 "\n", v.val); else printf("%.*s\n", (int)v.len, v.str); } @@ -126,7 +127,7 @@ main(void) "log.file_max configuration: %s", wiredtiger_strerror(ret)); return (ret); } - printf("log file max: %d\n", (int)v.val); + printf("log file max: %" PRId64 "\n", v.val); /*! [nested get] */ ret = parser->close(parser); diff --git a/src/third_party/wiredtiger/examples/c/ex_extractor.c b/src/third_party/wiredtiger/examples/c/ex_extractor.c index fff9c79f8e0..8623f4759fc 100644 --- a/src/third_party/wiredtiger/examples/c/ex_extractor.c +++ b/src/third_party/wiredtiger/examples/c/ex_extractor.c @@ -99,11 +99,13 @@ my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session, * key(s). WiredTiger will perform the required operation * (such as a remove()). */ - fprintf(stderr, "EXTRACTOR: index op for year %d: %s %s\n", + fprintf(stderr, + "EXTRACTOR: index op for year %" PRIu16 ": %s %s\n", year, first_name, last_name); result_cursor->set_key(result_cursor, year); if ((ret = result_cursor->insert(result_cursor)) != 0) { - fprintf(stderr, "EXTRACTOR: op year %d: error %d\n", + fprintf(stderr, + "EXTRACTOR: op year %" PRIu16 ": error %d\n", year, ret); return (ret); } @@ -157,7 +159,7 @@ read_index(WT_SESSION *session) */ for (i = 0; i < 10 && RET_OK(ret); i++) { year = (uint16_t)((rand() % YEAR_SPAN) + YEAR_BASE); - printf("Year %d:\n", year); + printf("Year %" PRIu16 ":\n", year); cursor->set_key(cursor, year); if ((ret = cursor->search(cursor)) != 0) break; @@ -181,7 +183,7 @@ read_index(WT_SESSION *session) } } if (!RET_OK(ret)) - fprintf(stderr, "Error %d for year %d\n", ret, year); + fprintf(stderr, "Error %d for year %" PRIu16 "\n", ret, year); ret = cursor->close(cursor); return (ret); @@ -245,7 +247,8 @@ setup_table(WT_SESSION *session) cursor->set_key(cursor, p.id); cursor->set_value(cursor, p.last_name, p.first_name, p.term_start, p.term_end); - fprintf(stderr, "SETUP: table insert %d-%d: %s %s\n", + fprintf(stderr, + "SETUP: table insert %" PRIu16 "-%" PRIu16 ": %s %s\n", p.term_start, p.term_end, p.first_name, p.last_name); ret = cursor->insert(cursor); diff --git a/src/third_party/wiredtiger/examples/c/ex_schema.c b/src/third_party/wiredtiger/examples/c/ex_schema.c index fdf02d12302..70fc7eb2e62 100644 --- a/src/third_party/wiredtiger/examples/c/ex_schema.c +++ b/src/third_party/wiredtiger/examples/c/ex_schema.c @@ -69,7 +69,7 @@ main(void) { POP_RECORD *p; WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor2, *join_cursor; + WT_CURSOR *cursor, *cursor2, *join_cursor, *stat_cursor; WT_SESSION *session; const char *country; uint64_t recno, population; @@ -86,7 +86,8 @@ main(void) } else home = NULL; - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0) { + if ((ret = wiredtiger_open( + home, NULL, "create,statistics=(fast)", &conn)) != 0) { fprintf(stderr, "Error connecting to %s: %s\n", home, wiredtiger_strerror(ret)); return (ret); @@ -164,7 +165,8 @@ main(void) ret = cursor->get_key(cursor, &recno); ret = cursor->get_value(cursor, &country, &year, &population); printf("ID %" PRIu64, recno); - printf(": country %s, year %u, population %" PRIu64 "\n", + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population); } ret = cursor->close(cursor); @@ -185,7 +187,8 @@ main(void) ret = wiredtiger_struct_unpack(session, value.data, value.size, "5sHQ", &country, &year, &population); - printf(": country %s, year %u, population %" PRIu64 "\n", + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population); } /*! [List the records in the table using raw mode.] */ @@ -201,7 +204,9 @@ main(void) cursor->set_key(cursor, 2); if ((ret = cursor->search(cursor)) == 0) { ret = cursor->get_value(cursor, &country, &year, &population); - printf("ID 2: country %s, year %u, population %" PRIu64 "\n", + printf( + "ID 2: " + "country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population); } /*! [Read population from the primary column group] */ @@ -229,8 +234,8 @@ main(void) cursor->set_key(cursor, "AU\0\0\0"); ret = cursor->search(cursor); ret = cursor->get_value(cursor, &country, &year, &population); - printf("AU: country %s, year %u, population %" PRIu64 "\n", - country, (unsigned int)year, population); + printf("AU: country %s, year %" PRIu16 ", population %" PRIu64 "\n", + country, year, population); /*! [Search in a simple index] */ ret = cursor->close(cursor); @@ -241,8 +246,9 @@ main(void) cursor->set_key(cursor, "USA\0\0", (uint16_t)1900); ret = cursor->search(cursor); ret = cursor->get_value(cursor, &country, &year, &population); - printf("US 1900: country %s, year %u, population %" PRIu64 "\n", - country, (unsigned int)year, population); + printf( + "US 1900: country %s, year %" PRIu16 ", population %" PRIu64 "\n", + country, year, population); /*! [Search in a composite index] */ ret = cursor->close(cursor); @@ -255,7 +261,7 @@ main(void) "table:poptable(country,year)", NULL, NULL, &cursor); while ((ret = cursor->next(cursor)) == 0) { ret = cursor->get_value(cursor, &country, &year); - printf("country %s, year %u\n", country, year); + printf("country %s, year %" PRIu16 "\n", country, year); } /*! [Return a subset of values from the table] */ ret = cursor->close(cursor); @@ -273,7 +279,7 @@ main(void) ret = cursor->get_value(cursor, &value); ret = wiredtiger_struct_unpack( session, value.data, value.size, "5sH", &country, &year); - printf("country %s, year %u\n", country, year); + printf("country %s, year %" PRIu16 "\n", country, year); } /*! [Return a subset of values from the table using raw mode] */ ret = cursor->close(cursor); @@ -288,7 +294,7 @@ main(void) while ((ret = cursor->next(cursor)) == 0) { ret = cursor->get_key(cursor, &country, &year); ret = cursor->get_value(cursor, &recno); - printf("row ID %" PRIu64 ": country %s, year %u\n", + printf("row ID %" PRIu64 ": country %s, year %" PRIu16 "\n", recno, country, year); } /*! [Return the table's record number key using an index] */ @@ -305,7 +311,7 @@ main(void) while ((ret = cursor->next(cursor)) == 0) { ret = cursor->get_key(cursor, &country, &year); ret = cursor->get_value(cursor, &population); - printf("population %" PRIu64 ": country %s, year %u\n", + printf("population %" PRIu64 ": country %s, year %" PRIu16 "\n", population, country, year); } /*! [Return a subset of the value columns from an index] */ @@ -320,7 +326,7 @@ main(void) "index:poptable:country_plus_year()", NULL, NULL, &cursor); while ((ret = cursor->next(cursor)) == 0) { ret = cursor->get_key(cursor, &country, &year); - printf("country %s, year %u\n", country, year); + printf("country %s, year %" PRIu16 "\n", country, year); } /*! [Access only the index] */ ret = cursor->close(cursor); @@ -350,10 +356,19 @@ main(void) ret = join_cursor->get_value(join_cursor, &country, &year, &population); printf("ID %" PRIu64, recno); - printf(": country %s, year %u, population %" PRIu64 "\n", + printf( + ": country %s, year %" PRIu16 ", population %" PRIu64 "\n", country, year, population); } /*! [Join cursors] */ + + /*! [Statistics cursor join cursor] */ + ret = session->open_cursor(session, + "statistics:join", + join_cursor, NULL, &stat_cursor); + /*! [Statistics cursor join cursor] */ + + ret = stat_cursor->close(stat_cursor); ret = join_cursor->close(join_cursor); ret = cursor2->close(cursor2); ret = cursor->close(cursor); diff --git a/src/third_party/wiredtiger/examples/c/ex_stat.c b/src/third_party/wiredtiger/examples/c/ex_stat.c index 65402230eb8..6c5c15aacc6 100644 --- a/src/third_party/wiredtiger/examples/c/ex_stat.c +++ b/src/third_party/wiredtiger/examples/c/ex_stat.c @@ -39,6 +39,7 @@ int print_cursor(WT_CURSOR *); int print_database_stats(WT_SESSION *); int print_file_stats(WT_SESSION *); +int print_join_cursor_stats(WT_SESSION *); int print_overflow_pages(WT_SESSION *); int get_stat(WT_CURSOR *cursor, int stat_field, uint64_t *valuep); int print_derived_stats(WT_SESSION *); @@ -99,6 +100,37 @@ print_file_stats(WT_SESSION *session) } int +print_join_cursor_stats(WT_SESSION *session) +{ + WT_CURSOR *idx_cursor, *join_cursor, *stat_cursor; + int ret; + + ret = session->create( + session, "index:access:idx", "columns=(v)"); + ret = session->open_cursor( + session, "index:access:idx", NULL, NULL, &idx_cursor); + ret = idx_cursor->next(idx_cursor); + ret = session->open_cursor( + session, "join:table:access", NULL, NULL, &join_cursor); + ret = session->join(session, join_cursor, idx_cursor, "compare=gt"); + ret = join_cursor->next(join_cursor); + + /*! [statistics join cursor function] */ + if ((ret = session->open_cursor(session, + "statistics:join", join_cursor, NULL, &stat_cursor)) != 0) + return (ret); + + ret = print_cursor(stat_cursor); + ret = stat_cursor->close(stat_cursor); + /*! [statistics join cursor function] */ + + ret = join_cursor->close(join_cursor); + ret = idx_cursor->close(idx_cursor); + + return (ret); +} + +int print_overflow_pages(WT_SESSION *session) { /*! [statistics retrieve by key] */ @@ -204,7 +236,8 @@ main(void) ret = wiredtiger_open(home, NULL, "create,statistics=(all)", &conn); ret = conn->open_session(conn, NULL, NULL, &session); ret = session->create( - session, "table:access", "key_format=S,value_format=S"); + session, "table:access", + "key_format=S,value_format=S,columns=(k,v)"); ret = session->open_cursor( session, "table:access", NULL, NULL, &cursor); @@ -219,6 +252,8 @@ main(void) ret = print_file_stats(session); + ret = print_join_cursor_stats(session); + ret = print_overflow_pages(session); ret = print_derived_stats(session); diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java index be1077ee2df..7cc26acb479 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_schema.java @@ -76,7 +76,7 @@ public class ex_schema { throws WiredTigerException { Connection conn; - Cursor cursor, cursor2, join_cursor; + Cursor cursor, cursor2, join_cursor, stat_cursor; Session session; String country; long recno, population; @@ -106,7 +106,7 @@ public class ex_schema { home = null; try { - conn = wiredtiger.open(home, "create"); + conn = wiredtiger.open(home, "create,statistics=(fast)"); session = conn.open_session(null); } catch (WiredTigerException wte) { System.err.println("WiredTigerException: " + wte); @@ -368,6 +368,13 @@ public class ex_schema { ", population " + population); } /*! [Join cursors] */ + + /*! [Statistics cursor join cursor] */ + stat_cursor = session.open_cursor( + "statistics:join", join_cursor, null); + /*! [Statistics cursor join cursor] */ + + ret = stat_cursor.close(); ret = join_cursor.close(); ret = cursor2.close(); ret = cursor.close(); diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_stat.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_stat.java index b0b83a2d3b2..f8877a4620e 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_stat.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_stat.java @@ -92,6 +92,33 @@ public class ex_stat { } int + print_join_cursor_stats(Session session) + throws WiredTigerException + { + Cursor idx_cursor, join_cursor, stat_cursor; + int ret; + + ret = session.create("index:access:idx", "columns=(v)"); + idx_cursor = session.open_cursor("index:access:idx", null, null); + ret = idx_cursor.next(); + join_cursor = session.open_cursor("join:table:access", null, null); + ret = session.join(join_cursor, idx_cursor, "compare=gt"); + ret = join_cursor.next(); + + /*! [statistics join cursor function] */ + stat_cursor = session.open_cursor("statistics:join", join_cursor, null); + + ret = print_cursor(stat_cursor); + ret = stat_cursor.close(); + /*! [statistics join cursor function] */ + + ret = join_cursor.close(); + ret = idx_cursor.close(); + + return (ret); + } + + int print_overflow_pages(Session session) throws WiredTigerException { @@ -220,7 +247,8 @@ public class ex_stat { conn = wiredtiger.open(home, "create,statistics=(all)"); session = conn.open_session(null); - ret = session.create("table:access", "key_format=S,value_format=S"); + ret = session.create("table:access", + "key_format=S,value_format=S,columns=(k,v)"); cursor = session.open_cursor("table:access", null, null); cursor.putKeyString("key"); @@ -234,6 +262,8 @@ public class ex_stat { ret = print_file_stats(session); + ret = print_join_cursor_stats(session); + ret = print_overflow_pages(session); ret = print_derived_stats(session); diff --git a/src/third_party/wiredtiger/ext/collators/revint/Makefile.am b/src/third_party/wiredtiger/ext/collators/revint/Makefile.am new file mode 100644 index 00000000000..8c85c6a4701 --- /dev/null +++ b/src/third_party/wiredtiger/ext/collators/revint/Makefile.am @@ -0,0 +1,10 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_LTLIBRARIES = libwiredtiger_revint_collator.la +libwiredtiger_revint_collator_la_SOURCES = revint_collator.c + +# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well +# as installation, it will only build static libraries. As far as I can tell, +# the "approved" libtool way to turn them back on is by adding -rpath. +libwiredtiger_revint_collator_la_LDFLAGS = \ + -avoid-version -module -rpath /nowhere diff --git a/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c b/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c new file mode 100644 index 00000000000..30b5dc67556 --- /dev/null +++ b/src/third_party/wiredtiger/ext/collators/revint/revint_collator.c @@ -0,0 +1,153 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <errno.h> +#include <stdint.h> +#include <wiredtiger_ext.h> + +/* + * A simple WiredTiger collator for indices having a single integer key, + * where the ordering is descending (reversed). This collator also + * requires that primary key be an integer. + */ + +/* Local collator structure. */ +typedef struct { + WT_COLLATOR collator; /* Must come first */ + WT_EXTENSION_API *wt_api; /* Extension API */ +} REVINT_COLLATOR; + +/* + * revint_compare -- + * WiredTiger reverse integer collation, used for tests. + */ +static int +revint_compare(WT_COLLATOR *collator, + WT_SESSION *session, const WT_ITEM *k1, const WT_ITEM *k2, int *cmp) +{ + const REVINT_COLLATOR *revint_collator; + WT_EXTENSION_API *wtapi; + WT_PACK_STREAM *pstream; + int ret; + int64_t i1, i2, p1, p2; + + i1 = i2 = p1 = p2 = 0; + revint_collator = (const REVINT_COLLATOR *)collator; + wtapi = revint_collator->wt_api; + + /* + * All indices using this collator have an integer key, and the + * primary key is also an integer. A collator is usually passed the + * concatenation of index key and primary key (when available), + * hence we initially unpack using "ii". + * + * A collator may also be called with an item that includes a index + * key and no primary key. Among items having the same index key, + * an item with no primary key should sort before an item with a + * primary key. The reason is that if the application calls + * WT_CURSOR::search on a index key for which there are more than + * one value, the search key will not yet have a primary key. We + * want to position the cursor at the 'first' matching index key so + * that repeated calls to WT_CURSOR::next will see them all. + * + * To keep this code simple, we do not reverse the ordering + * when comparing primary keys. + */ + if ((ret = wtapi->unpack_start( + wtapi, session, "ii", k1->data, k1->size, &pstream)) != 0 || + (ret = wtapi->unpack_int(wtapi, pstream, &i1)) != 0) + goto err; + if ((ret = wtapi->unpack_int(wtapi, pstream, &p1)) != 0) + /* A missing primary key is OK and sorts first. */ + p1 = INT64_MIN; + if ((ret = wtapi->pack_close(wtapi, pstream, NULL)) != 0) + goto err; + + /* Unpack the second pair of numbers. */ + if ((ret = wtapi->unpack_start( + wtapi, session, "ii", k2->data, k2->size, &pstream)) != 0 || + (ret = wtapi->unpack_int(wtapi, pstream, &i2)) != 0) + goto err; + if ((ret = wtapi->unpack_int(wtapi, pstream, &p2)) != 0) + /* A missing primary key is OK and sorts first. */ + p2 = INT64_MIN; + if ((ret = wtapi->pack_close(wtapi, pstream, NULL)) != 0) + goto err; + + /* sorting is reversed */ + if (i1 < i2) + *cmp = 1; + else if (i1 > i2) + *cmp = -1; + /* compare primary keys next, not reversed */ + else if (p1 < p2) + *cmp = -1; + else if (p1 > p2) + *cmp = 1; + else + *cmp = 0; /* index key and primary key are same */ + +err: return (ret); +} + +/* + * revint_terminate -- + * Terminate is called to free the collator and any associated memory. + */ +static int +revint_terminate(WT_COLLATOR *collator, WT_SESSION *session) +{ + (void)session; /* Unused parameters */ + + /* Free the allocated memory. */ + free(collator); + return (0); +} + +/* + * wiredtiger_extension_init -- + * WiredTiger revint collation extension. + */ +int +wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) +{ + REVINT_COLLATOR *revint_collator; + + (void)config; /* Unused parameters */ + + if ((revint_collator = calloc(1, sizeof(REVINT_COLLATOR))) == NULL) + return (errno); + + revint_collator->collator.compare = revint_compare; + revint_collator->collator.terminate = revint_terminate; + revint_collator->wt_api = connection->get_extension_api(connection); + + return (connection->add_collator( + connection, "revint", &revint_collator->collator, NULL)); +} diff --git a/src/third_party/wiredtiger/src/async/async_op.c b/src/third_party/wiredtiger/src/async/async_op.c index 130c704757b..970c33c3360 100644 --- a/src/third_party/wiredtiger/src/async/async_op.c +++ b/src/third_party/wiredtiger/src/async/async_op.c @@ -349,14 +349,8 @@ __wt_async_op_init(WT_SESSION_IMPL *session) WT_ERR(__async_op_init(conn, op, i)); } return (0); -err: - if (async->async_ops != NULL) { - __wt_free(session, async->async_ops); - async->async_ops = NULL; - } - if (async->async_queue != NULL) { - __wt_free(session, async->async_queue); - async->async_queue = NULL; - } + +err: __wt_free(session, async->async_ops); + __wt_free(session, async->async_queue); return (ret); } diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index 03059c8f23a..812bf99acfb 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -812,8 +812,7 @@ __ckpt_string(WT_SESSION_IMPL *session, WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci)); WT_RET(__wt_buf_fmt(session, buf, - "version=%d", - ci->version)); + "version=%" PRIu8, ci->version)); if (ci->root_offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]")); else diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index d9b2f908737..adb745c99e7 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -369,7 +369,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_ERR_MSG(session, WT_ERROR, "unsupported WiredTiger file version: this build only " "supports major/minor versions up to %d/%d, and the file " - "is version %d/%d", + "is version %" PRIu16 "/%" PRIu16, WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION, desc->majorv, desc->minorv); diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 4c6ac198fe4..e05a430832e 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -206,10 +206,16 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t cksum; bool local_locked; - blk = WT_BLOCK_HEADER_REF(buf->mem); fh = block->fh; /* + * Clear the block header to ensure all of it is initialized, even the + * unused fields. + */ + blk = WT_BLOCK_HEADER_REF(buf->mem); + memset(blk, 0, sizeof(*blk)); + + /* * Swap the page-header as needed; this doesn't belong here, but it's * the best place to catch all callers. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index c11b7d35de6..1f3ac443495 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -173,13 +173,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) */ break; case BTREE_COL_VAR: + /* The search function doesn't check for empty pages. */ + if (page->pg_var_entries == 0) + return (false); + WT_ASSERT(session, cbt->slot < page->pg_var_entries); + /* - * If search returned an insert object, there may or may not be - * a matching on-page object, we have to check. Variable-length - * column-store pages don't map one-to-one to keys, but have - * "slots", check if search returned a valid slot. + * Column-store updates aren't stored on the page, instead they + * are stored as "insert" objects. If search returned an insert + * object we can't return, the returned on-page object must be + * checked for a match. */ - if (cbt->slot >= page->pg_var_entries) + if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH)) return (false); /* @@ -194,6 +199,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) return (false); break; case BTREE_ROW: + /* The search function doesn't check for empty pages. */ + if (page->pg_row_entries == 0) + return (false); + WT_ASSERT(session, cbt->slot < page->pg_row_entries); + /* * See above: for row-store, no insert object can have the same * key as an on-page object, we're done. @@ -201,15 +211,6 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) if (cbt->ins != NULL) return (false); - /* - * Check if searched returned a valid slot (the failure mode is - * an empty page, the search function doesn't check, and so the - * more exact test is "page->pg_row_entries == 0", but this test - * mirrors the column-store test). - */ - if (cbt->slot >= page->pg_row_entries) - return (false); - /* Updates are stored on the page, check for a delete. */ if (page->pg_row_upd != NULL && (upd = __wt_txn_read( session, page->pg_row_upd[cbt->slot])) != NULL) { @@ -1162,22 +1163,14 @@ int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) { WT_BTREE *btree; - WT_CURSOR_BTREE *cbt; WT_DECL_RET; WT_SESSION_IMPL *session; - cbt = (start != NULL) ? start : stop; - session = (WT_SESSION_IMPL *)cbt->iface.session; - btree = cbt->btree; + session = (WT_SESSION_IMPL *)start->iface.session; + btree = start->btree; WT_STAT_FAST_DATA_INCR(session, cursor_truncate); /* - * We always delete in a forward direction because it's faster, assert - * our caller provided us with a start cursor. - */ - WT_ASSERT(session, start != NULL); - - /* * For recovery, log the start and stop keys for a truncate operation, * not the individual records removed. On the other hand, for rollback * we need to keep track of all the in-memory operations. diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 795111d53f9..1f739c9572e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -337,8 +337,7 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) copy = WT_ROW_KEY_COPY(rip); (void)__wt_row_leaf_key_info( page, copy, &ikey, NULL, NULL, NULL); - if (ikey != NULL) - __wt_free(session, ikey); + __wt_free(session, ikey); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 52152a2fcac..1d33a7e7c9a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -350,7 +350,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) /* Initialize locks. */ WT_RET(__wt_rwlock_alloc( session, &btree->ovfl_lock, "btree overflow lock")); - WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush lock")); + WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush")); btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */ btree->modified = 0; /* Clean */ diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index ac9faef4ff2..5cf6a9bf2bc 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -281,10 +281,8 @@ err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); * On error, upd points to a single unlinked WT_UPDATE structure, * first_upd points to a list. */ - if (upd != NULL) - __wt_free(session, upd); - if (first_upd != NULL) - __wt_free_update_list(session, first_upd); + __wt_free(session, upd); + __wt_free_update_list(session, first_upd); __wt_scr_free(session, ¤t_key); __wt_scr_free(session, &las_addr); @@ -460,12 +458,12 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags WT_DECL_RET; WT_PAGE *page; u_int sleep_cnt, wait_cnt; - bool busy, cache_work, oldgen, stalled; + bool busy, cache_work, evict_soon, stalled; int force_attempts; btree = S2BT(session); - for (oldgen = stalled = false, + for (evict_soon = stalled = false, force_attempts = 0, sleep_cnt = wait_cnt = 0;;) { switch (ref->state) { case WT_REF_DELETED: @@ -486,7 +484,16 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags WT_RET(__wt_cache_eviction_check( session, 1, NULL)); WT_RET(__page_read(session, ref)); - oldgen = LF_ISSET(WT_READ_WONT_NEED) || + + /* + * If configured to not trash the cache, leave the page + * generation unset, we'll set it before returning to + * the oldest read generation, so the page is forcibly + * evicted as soon as possible. We don't do that set + * here because we don't want to evict the page before + * we "acquire" it. + */ + evict_soon = LF_ISSET(WT_READ_WONT_NEED) || F_ISSET(session, WT_SESSION_NO_CACHE); continue; case WT_REF_READING: @@ -575,20 +582,24 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags } /* - * If we read the page and we are configured to not - * trash the cache, set the oldest read generation so - * the page is forcibly evicted as soon as possible. + * If we read the page and are configured to not trash + * the cache, and no other thread has already used the + * page, set the oldest read generation so the page is + * forcibly evicted as soon as possible. * - * Otherwise, update the page's read generation. + * Otherwise, if we read the page, or, if configured to + * update the page's read generation and the page isn't + * already flagged for forced eviction, update the page + * read generation. */ page = ref->page; - if (oldgen && page->read_gen == WT_READGEN_NOTSET) - __wt_page_evict_soon(page); - else if (!LF_ISSET(WT_READ_NO_GEN) && - page->read_gen != WT_READGEN_OLDEST && - page->read_gen < __wt_cache_read_gen(session)) - page->read_gen = - __wt_cache_read_gen_bump(session); + if (page->read_gen == WT_READGEN_NOTSET) { + if (evict_soon) + __wt_page_evict_soon(page); + else + __wt_cache_read_gen_new(session, page); + } else if (!LF_ISSET(WT_READ_NO_GEN)) + __wt_cache_read_gen_bump(session, page); skip_evict: /* * Check if we need an autocommit transaction. diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c index 86360e83ddf..d94eb2ddd80 100644 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c @@ -412,6 +412,7 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); btree = S2BT(session); + evict_reset = false; /* * If the tree has never been written to disk, we're done, rebalance @@ -438,7 +439,8 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) * cache is the root page, and that cannot be evicted; however, this way * eviction ignores the tree entirely.) */ - WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); + WT_ERR(__wt_evict_file_exclusive_on(session)); + evict_reset = true; /* Recursively walk the tree. */ switch (rs->type) { @@ -470,7 +472,10 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) btree->root.page = rs->root; rs->root = NULL; -err: /* Discard any leftover root page we created. */ +err: if (evict_reset) + __wt_evict_file_exclusive_off(session); + + /* Discard any leftover root page we created. */ if (rs->root != NULL) { __wt_page_modify_clear(session, rs->root); __wt_page_out(session, &rs->root); diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 8d78bda79fb..0e064d306b6 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -1206,8 +1206,7 @@ __slvg_col_build_internal( __wt_root_ref_init(&ss->root_ref, page, true); if (0) { -err: if (addr != NULL) - __wt_free(session, addr); +err: __wt_free(session, addr); __wt_page_out(session, &page); } return (ret); @@ -1868,8 +1867,7 @@ __slvg_row_build_internal( __wt_root_ref_init(&ss->root_ref, page, false); if (0) { -err: if (addr != NULL) - __wt_free(session, addr); +err: __wt_free(session, addr); __wt_page_out(session, &page); } return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 3dea03316ce..4f16a290958 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -151,8 +151,7 @@ __wt_split_stash_discard_all( for (i = 0, stash = session->split_stash; i < session->split_stash_cnt; ++i, ++stash) - if (stash->p != NULL) - __wt_free(session_safe, stash->p); + __wt_free(session_safe, stash->p); __wt_free(session_safe, session->split_stash); session->split_stash_cnt = session->split_stash_alloc = 0; diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 5273f0ee2c3..57056eb5c99 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -17,6 +17,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) { struct timespec end, start; WT_BTREE *btree; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; @@ -25,8 +26,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_snap_min; uint32_t flags; - bool evict_reset; + conn = S2C(session); btree = S2BT(session); walk = NULL; txn = &session->txn; @@ -123,9 +124,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); - if (evict_reset) - __wt_evict_file_exclusive_off(session); + WT_ERR(__wt_evict_file_exclusive_on(session)); + __wt_evict_file_exclusive_off(session); WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); @@ -223,7 +223,7 @@ err: /* On error, clear any left-over tree walk. */ * so that eviction knows that the checkpoint has completed. */ WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); + conn->txn_global.checkpoint_gen); WT_STAT_FAST_DATA_SET(session, btree_checkpoint_generation, btree->checkpoint_gen); @@ -257,7 +257,8 @@ err: /* On error, clear any left-over tree walk. */ * before checkpointing the file). Start a flush to stable storage, * but don't wait for it. */ - if (ret == 0 && syncop == WT_SYNC_WRITE_LEAVES) + if (ret == 0 && + syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC)) WT_RET(btree->bm->sync(btree->bm, session, true)); return (ret); @@ -268,24 +269,18 @@ err: /* On error, clear any left-over tree walk. */ * Cache operations. */ int -__wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op) +__wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op) { - WT_DECL_RET; - WT_BTREE *btree; - - btree = S2BT(session); - switch (op) { case WT_SYNC_CHECKPOINT: case WT_SYNC_CLOSE: /* - * Set the checkpoint reference for reconciliation; it's ugly, - * but drilling a function parameter path from our callers to - * the reconciliation of the tree's root page is going to be - * worse. + * Make sure the checkpoint reference is set for + * reconciliation; it's ugly, but drilling a function parameter + * path from our callers to the reconciliation of the tree's + * root page is going to be worse. */ - WT_ASSERT(session, btree->ckpt == NULL); - btree->ckpt = ckptbase; + WT_ASSERT(session, S2BT(session)->ckpt != NULL); break; case WT_SYNC_DISCARD: case WT_SYNC_WRITE_LEAVES: @@ -295,23 +290,10 @@ __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op) switch (op) { case WT_SYNC_CHECKPOINT: case WT_SYNC_WRITE_LEAVES: - WT_ERR(__sync_file(session, op)); - break; + return (__sync_file(session, op)); case WT_SYNC_CLOSE: case WT_SYNC_DISCARD: - WT_ERR(__wt_evict_file(session, op)); - break; + return (__wt_evict_file(session, op)); + WT_ILLEGAL_VALUE(session); } - -err: switch (op) { - case WT_SYNC_CHECKPOINT: - case WT_SYNC_CLOSE: - btree->ckpt = NULL; - break; - case WT_SYNC_DISCARD: - case WT_SYNC_WRITE_LEAVES: - break; - } - - return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index ae2c20be1b6..952298f2456 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -226,7 +226,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_WITH_PAGE_INDEX(session, ret = __verify_tree(session, &btree->root, vs)); - WT_TRET(__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); + WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD)); } /* Unload the checkpoint. */ diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index 23eae75ec2b..4730267a545 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -211,7 +211,6 @@ descend: /* leaf_only: page = current->page; cbt->ref = current; - cbt->recno = recno; /* * Don't bother searching if the caller is appending a new record where @@ -225,13 +224,6 @@ leaf_only: } /* - * Set the on-page slot to an impossible value larger than any possible - * slot (it's used to interpret the search function's return after the - * search returns an insert list for a page that has no entries). - */ - cbt->slot = UINT32_MAX; - - /* * Search the leaf page. * * Search after a page is pinned does a search of the pinned page before @@ -244,28 +236,38 @@ leaf_only: * that's impossibly large for the page. We do have additional setup to * do in that case, the record may be appended to the page. */ - cbt->compare = 0; if (page->type == WT_PAGE_COL_FIX) { if (recno < page->pg_fix_recno) { + cbt->recno = page->pg_fix_recno; cbt->compare = 1; return (0); } if (recno >= page->pg_fix_recno + page->pg_fix_entries) { cbt->recno = page->pg_fix_recno + page->pg_fix_entries; goto past_end; - } else + } else { + cbt->recno = recno; + cbt->compare = 0; ins_head = WT_COL_UPDATE_SINGLE(page); + } } else { if (recno < page->pg_var_recno) { + cbt->recno = page->pg_var_recno; + cbt->slot = 0; cbt->compare = 1; return (0); } if ((cip = __col_var_search(page, recno, NULL)) == NULL) { cbt->recno = __col_var_last_recno(page); + cbt->slot = page->pg_var_entries == 0 ? + 0 : page->pg_var_entries - 1; goto past_end; } else { + cbt->recno = recno; cbt->slot = WT_COL_SLOT(page, cip); + cbt->compare = 0; ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot); + F_SET(cbt, WT_CBT_VAR_ONPAGE_MATCH); } } diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c index 8b9e858ec18..9fff092d079 100644 --- a/src/third_party/wiredtiger/src/btree/row_key.c +++ b/src/third_party/wiredtiger/src/btree/row_key.c @@ -52,6 +52,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_RET(__wt_scr_alloc(session, 0, &key)); WT_RET(__wt_scr_alloc(session, (uint32_t)__bitstr_size(page->pg_row_entries), &tmp)); + memset(tmp->mem, 0, tmp->memsize); if ((gap = btree->key_gap) == 0) gap = 1; diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 9d68c8e0ce7..6169a0a810a 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -461,6 +461,12 @@ leaf_only: cbt->ref = current; /* + * Clear current now that we have moved the reference into the btree + * cursor, so that cleanup never releases twice. + */ + current = NULL; + + /* * In the case of a right-side tree descent during an insert, do a fast * check for an append to the page, try to catch cursors appending data * into the tree. @@ -614,14 +620,7 @@ leaf_match: cbt->compare = 0; return (0); -err: /* - * Release the current page if the search started at the root. If the - * search didn't start at the root we should never have gone looking - * beyond the start page. - */ - WT_ASSERT(session, leaf == NULL || leaf == current); - if (leaf == NULL) - WT_TRET(__wt_page_release(session, current, 0)); +err: WT_TRET(__wt_page_release(session, current, 0)); return (ret); } diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 3549e41e80d..8796ec6b2fc 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -205,7 +205,7 @@ __wt_las_cursor( * useful more than once. */ *session_flags = - F_ISSET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); + F_MASK(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); conn = S2C(session); diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c index f480ab83dbd..96ef7a4e62a 100644 --- a/src/third_party/wiredtiger/src/config/config.c +++ b/src/third_party/wiredtiger/src/config/config.c @@ -16,9 +16,9 @@ static int __config_err(WT_CONFIG *conf, const char *msg, int err) { WT_RET_MSG(conf->session, err, - "Error parsing '%.*s' at byte %u: %s", + "Error parsing '%.*s' at offset %" WT_PTRDIFFT_FMT ": %s", (int)(conf->end - conf->orig), conf->orig, - (u_int)(conf->cur - conf->orig), msg); + conf->cur - conf->orig, msg); } /* diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 6e88f9b4d14..c752e5eb265 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -391,6 +391,61 @@ static const WT_CONFIG_CHECK confchk_colgroup_meta[] = { { NULL, NULL, NULL, NULL, NULL, 0 } }; +static const WT_CONFIG_CHECK confchk_file_config[] = { + { "allocation_size", "int", + NULL, "min=512B,max=128MB", + NULL, 0 }, + { "app_metadata", "string", NULL, NULL, NULL, 0 }, + { "block_allocation", "string", + NULL, "choices=[\"first\",\"best\"]", + NULL, 0 }, + { "block_compressor", "string", NULL, NULL, NULL, 0 }, + { "cache_resident", "boolean", NULL, NULL, NULL, 0 }, + { "checksum", "string", + NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", + NULL, 0 }, + { "collator", "string", NULL, NULL, NULL, 0 }, + { "columns", "list", NULL, NULL, NULL, 0 }, + { "dictionary", "int", NULL, "min=0", NULL, 0 }, + { "encryption", "category", + NULL, NULL, + confchk_WT_SESSION_create_encryption_subconfigs, 2 }, + { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 }, + { "huffman_key", "string", NULL, NULL, NULL, 0 }, + { "huffman_value", "string", NULL, NULL, NULL, 0 }, + { "internal_item_max", "int", NULL, "min=0", NULL, 0 }, + { "internal_key_max", "int", NULL, "min=0", NULL, 0 }, + { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 }, + { "internal_page_max", "int", + NULL, "min=512B,max=512MB", + NULL, 0 }, + { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, + { "key_gap", "int", NULL, "min=0", NULL, 0 }, + { "leaf_item_max", "int", NULL, "min=0", NULL, 0 }, + { "leaf_key_max", "int", NULL, "min=0", NULL, 0 }, + { "leaf_page_max", "int", + NULL, "min=512B,max=512MB", + NULL, 0 }, + { "leaf_value_max", "int", NULL, "min=0", NULL, 0 }, + { "log", "category", + NULL, NULL, + confchk_WT_SESSION_create_log_subconfigs, 1 }, + { "memory_page_max", "int", + NULL, "min=512B,max=10TB", + NULL, 0 }, + { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 }, + { "os_cache_max", "int", NULL, "min=0", NULL, 0 }, + { "prefix_compression", "boolean", NULL, NULL, NULL, 0 }, + { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, + { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, + { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "value_format", "format", + __wt_struct_confchk, NULL, + NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + static const WT_CONFIG_CHECK confchk_file_meta[] = { { "allocation_size", "int", NULL, "min=512B,max=128MB", @@ -466,6 +521,67 @@ static const WT_CONFIG_CHECK confchk_index_meta[] = { { NULL, NULL, NULL, NULL, NULL, 0 } }; +static const WT_CONFIG_CHECK confchk_lsm_meta[] = { + { "allocation_size", "int", + NULL, "min=512B,max=128MB", + NULL, 0 }, + { "app_metadata", "string", NULL, NULL, NULL, 0 }, + { "block_allocation", "string", + NULL, "choices=[\"first\",\"best\"]", + NULL, 0 }, + { "block_compressor", "string", NULL, NULL, NULL, 0 }, + { "cache_resident", "boolean", NULL, NULL, NULL, 0 }, + { "checksum", "string", + NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", + NULL, 0 }, + { "chunks", "string", NULL, NULL, NULL, 0 }, + { "collator", "string", NULL, NULL, NULL, 0 }, + { "columns", "list", NULL, NULL, NULL, 0 }, + { "dictionary", "int", NULL, "min=0", NULL, 0 }, + { "encryption", "category", + NULL, NULL, + confchk_WT_SESSION_create_encryption_subconfigs, 2 }, + { "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 }, + { "huffman_key", "string", NULL, NULL, NULL, 0 }, + { "huffman_value", "string", NULL, NULL, NULL, 0 }, + { "internal_item_max", "int", NULL, "min=0", NULL, 0 }, + { "internal_key_max", "int", NULL, "min=0", NULL, 0 }, + { "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 }, + { "internal_page_max", "int", + NULL, "min=512B,max=512MB", + NULL, 0 }, + { "key_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, + { "key_gap", "int", NULL, "min=0", NULL, 0 }, + { "last", "string", NULL, NULL, NULL, 0 }, + { "leaf_item_max", "int", NULL, "min=0", NULL, 0 }, + { "leaf_key_max", "int", NULL, "min=0", NULL, 0 }, + { "leaf_page_max", "int", + NULL, "min=512B,max=512MB", + NULL, 0 }, + { "leaf_value_max", "int", NULL, "min=0", NULL, 0 }, + { "log", "category", + NULL, NULL, + confchk_WT_SESSION_create_log_subconfigs, 1 }, + { "lsm", "category", + NULL, NULL, + confchk_WT_SESSION_create_lsm_subconfigs, 11 }, + { "memory_page_max", "int", + NULL, "min=512B,max=10TB", + NULL, 0 }, + { "old_chunks", "string", NULL, NULL, NULL, 0 }, + { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 }, + { "os_cache_max", "int", NULL, "min=0", NULL, 0 }, + { "prefix_compression", "boolean", NULL, NULL, NULL, 0 }, + { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, + { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, + { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "value_format", "format", + __wt_struct_confchk, NULL, + NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + static const WT_CONFIG_CHECK confchk_table_meta[] = { { "app_metadata", "string", NULL, NULL, NULL, 0 }, { "colgroups", "list", NULL, NULL, NULL, 0 }, @@ -985,6 +1101,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { "app_metadata=,collator=,columns=,source=,type=file", confchk_colgroup_meta, 5 }, + { "file.config", + "allocation_size=4KB,app_metadata=,block_allocation=best," + "block_compressor=,cache_resident=0,checksum=uncompressed," + "collator=,columns=,dictionary=0,encryption=(keyid=,name=)," + "format=btree,huffman_key=,huffman_value=,internal_item_max=0," + "internal_key_max=0,internal_key_truncate=,internal_page_max=4KB," + "key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0," + "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=)," + "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," + "prefix_compression=0,prefix_compression_min=4," + "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + "value_format=u", + confchk_file_config, 33 + }, { "file.meta", "allocation_size=4KB,app_metadata=,block_allocation=best," "block_compressor=,cache_resident=0,checkpoint=,checkpoint_lsn=," @@ -1005,6 +1135,23 @@ static const WT_CONFIG_ENTRY config_entries[] = { "index_key_columns=,key_format=u,source=,type=file,value_format=u", confchk_index_meta, 10 }, + { "lsm.meta", + "allocation_size=4KB,app_metadata=,block_allocation=best," + "block_compressor=,cache_resident=0,checksum=uncompressed,chunks=" + ",collator=,columns=,dictionary=0,encryption=(keyid=,name=)," + "format=btree,huffman_key=,huffman_value=,internal_item_max=0," + "internal_key_max=0,internal_key_truncate=,internal_page_max=4KB," + "key_format=u,key_gap=10,last=,leaf_item_max=0,leaf_key_max=0," + "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=)," + "lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=," + "bloom_hash_count=8,bloom_oldest=0,chunk_count_limit=0," + "chunk_max=5GB,chunk_size=10MB,merge_max=15,merge_min=0)," + "memory_page_max=5MB,old_chunks=,os_cache_dirty_max=0," + "os_cache_max=0,prefix_compression=0,prefix_compression_min=4," + "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + "value_format=u", + confchk_lsm_meta, 37 + }, { "table.meta", "app_metadata=,colgroups=,collator=,columns=,key_format=u," "value_format=u", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index bb67185f5c9..6d115c8fdcd 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -772,6 +772,19 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn) conn->extension_api.transaction_visible = __wt_ext_transaction_visible; conn->extension_api.version = wiredtiger_version; + /* Streaming pack/unpack API */ + conn->extension_api.pack_start = __wt_ext_pack_start; + conn->extension_api.unpack_start = __wt_ext_unpack_start; + conn->extension_api.pack_close = __wt_ext_pack_close; + conn->extension_api.pack_item = __wt_ext_pack_item; + conn->extension_api.pack_int = __wt_ext_pack_int; + conn->extension_api.pack_str = __wt_ext_pack_str; + conn->extension_api.pack_uint = __wt_ext_pack_uint; + conn->extension_api.unpack_item = __wt_ext_unpack_item; + conn->extension_api.unpack_int = __wt_ext_unpack_int; + conn->extension_api.unpack_str = __wt_ext_unpack_str; + conn->extension_api.unpack_uint = __wt_ext_unpack_uint; + return (&conn->extension_api); } @@ -1681,6 +1694,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "fileops", WT_VERB_FILEOPS }, { "log", WT_VERB_LOG }, { "lsm", WT_VERB_LSM }, + { "lsm_manager", WT_VERB_LSM_MANAGER }, { "metadata", WT_VERB_METADATA }, { "mutex", WT_VERB_MUTEX }, { "overflow", WT_VERB_OVERFLOW }, diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 1831aad5895..9a2c394e9a6 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -140,6 +140,12 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_cache_config(session, false, cfg)); /* + * The lowest possible page read-generation has a special meaning, it + * marks a page for forcible eviction; don't let it happen by accident. + */ + cache->read_gen = WT_READGEN_START_VALUE; + + /* * The target size must be lower than the trigger size or we will never * get any work done. */ @@ -147,8 +153,8 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); - WT_ERR(__wt_cond_alloc(session, - "cache eviction server", false, &cache->evict_cond)); + WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server", + false, 10000, WT_MILLION, &cache->evict_cond)); WT_ERR(__wt_cond_alloc(session, "eviction waiters", false, &cache->evict_waiter_cond)); WT_ERR(__wt_spin_init(session, &cache->evict_lock, "cache eviction")); @@ -246,7 +252,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) " bytes dirty and %" PRIu64 " pages dirty", cache->bytes_dirty, cache->pages_dirty); - WT_TRET(__wt_cond_destroy(session, &cache->evict_cond)); + WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond)); __wt_spin_destroy(session, &cache->evict_lock); __wt_spin_destroy(session, &cache->evict_walk_lock); diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 2fab08e3afa..5019ab59fe3 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -129,16 +129,19 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - bool evict_reset, marked_dead, no_schema_lock; + bool marked_dead, no_schema_lock; btree = S2BT(session); bm = btree->bm; dhandle = session->dhandle; - evict_reset = marked_dead = false; + marked_dead = false; if (!F_ISSET(dhandle, WT_DHANDLE_OPEN)) return (0); + /* Turn off eviction. */ + WT_RET(__wt_evict_file_exclusive_on(session)); + /* * If we don't already have the schema lock, make it an error to try * to acquire it. The problem is that we are holding an exclusive @@ -160,13 +163,6 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) __wt_spin_lock(session, &dhandle->close_lock); /* - * Ensure we aren't racing with the eviction server; inside the close - * lock so threads won't race setting/clearing the tree's "no eviction" - * flag. - */ - WT_ERR(__wt_evict_file_exclusive_on(session, &evict_reset)); - - /* * The close can fail if an update cannot be written, return the EBUSY * error to our caller for eventual retry. * @@ -204,13 +200,13 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) F_ISSET(dhandle, WT_DHANDLE_DEAD) || !F_ISSET(dhandle, WT_DHANDLE_OPEN)); -err: if (evict_reset) - __wt_evict_file_exclusive_off(session); - __wt_spin_unlock(session, &dhandle->close_lock); +err: __wt_spin_unlock(session, &dhandle->close_lock); if (no_schema_lock) F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK); + __wt_evict_file_exclusive_off(session); + return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 5999cf20b3b..757d69bf240 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -142,6 +142,8 @@ __logmgr_config( } WT_RET(__logmgr_sync_cfg(session, cfg)); + if (conn->log_cond != NULL) + WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); } @@ -468,7 +470,7 @@ __log_file_server(void *arg) locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { - WT_ERR(__wt_cond_signal( + WT_ERR(__wt_cond_auto_signal( session, conn->log_wrlsn_cond)); /* * We do not want to wait potentially a second @@ -667,31 +669,54 @@ __log_wrlsn_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_LOG *log; + WT_LSN prev; WT_SESSION_IMPL *session; int yield; + bool did_work; session = arg; conn = S2C(session); + log = conn->log; yield = 0; + WT_INIT_LSN(&prev); + did_work = false; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* - * Write out any log record buffers. + * Write out any log record buffers if anything was done + * since last time. Only call the function to walk the + * slots if the system is not idle. On an idle system + * the alloc_lsn will not advance and the written lsn will + * match the alloc_lsn. */ - WT_ERR(__wt_log_wrlsn(session, &yield)); + if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 || + __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0) + WT_ERR(__wt_log_wrlsn(session, &yield)); + else + WT_STAT_FAST_CONN_INCR(session, log_write_lsn_skip); + prev = log->alloc_lsn; + if (yield == 0) + did_work = true; + else + did_work = false; /* * If __wt_log_wrlsn did work we want to yield instead of sleep. */ if (yield++ < WT_THOUSAND) __wt_yield(); else - WT_ERR(__wt_cond_wait( - session, conn->log_wrlsn_cond, 10000)); + /* + * Send in false because if we did any work we would + * not be on this path. + */ + WT_ERR(__wt_cond_auto_wait( + session, conn->log_wrlsn_cond, did_work)); } /* * On close we need to do this one more time because there could * be straggling log writes that need to be written. */ - WT_ERR(__wt_log_force_write(session, 1)); + WT_ERR(__wt_log_force_write(session, 1, NULL)); WT_ERR(__wt_log_wrlsn(session, NULL)); if (0) { err: __wt_err(session, ret, "log wrlsn server error"); @@ -706,12 +731,13 @@ err: __wt_err(session, ret, "log wrlsn server error"); static WT_THREAD_RET __log_server(void *arg) { + struct timespec start, now; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_SESSION_IMPL *session; - int freq_per_sec; - bool locked, signalled; + uint64_t timediff; + bool did_work, locked, signalled; session = arg; conn = S2C(session); @@ -719,11 +745,10 @@ __log_server(void *arg) locked = signalled = false; /* - * Set this to the number of times per second we want to force out the - * log slot buffer. + * Set this to the number of milliseconds we want to run archive and + * pre-allocation. Start it so that we run on the first time through. */ -#define WT_FORCE_PER_SECOND 20 - freq_per_sec = WT_FORCE_PER_SECOND; + timediff = WT_THOUSAND; /* * The log server thread does a variety of work. It forces out any @@ -736,6 +761,7 @@ __log_server(void *arg) * don't want log records sitting in the buffer over the time it * takes to sync out an earlier file. */ + did_work = true; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * Slots depend on future activity. Force out buffered @@ -744,15 +770,14 @@ __log_server(void *arg) * and a buffer may need to wait for the write_lsn to advance * in the case of a synchronous buffer. We end up with a hang. */ - WT_ERR_BUSY_OK(__wt_log_force_write(session, 0)); + WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work)); /* * We don't want to archive or pre-allocate files as often as * we want to force out log buffers. Only do it once per second * or if the condition was signalled. */ - if (--freq_per_sec <= 0 || signalled) { - freq_per_sec = WT_FORCE_PER_SECOND; + if (timediff >= WT_THOUSAND || signalled) { /* * Perform log pre-allocation. @@ -793,8 +818,12 @@ __log_server(void *arg) } /* Wait until the next event. */ - WT_ERR(__wt_cond_wait_signal(session, conn->log_cond, - WT_MILLION / WT_FORCE_PER_SECOND, &signalled)); + + WT_ERR(__wt_epoch(session, &start)); + WT_ERR(__wt_cond_auto_wait_signal(session, conn->log_cond, + did_work, &signalled)); + WT_ERR(__wt_epoch(session, &now)); + timediff = WT_TIMEDIFF_MS(now, start); } if (0) { @@ -906,8 +935,9 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) */ WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session)); - WT_RET(__wt_cond_alloc(conn->log_wrlsn_session, - "log write lsn server", false, &conn->log_wrlsn_cond)); + WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session, + "log write lsn server", false, 10000, WT_MILLION, + &conn->log_wrlsn_cond)); WT_RET(__wt_thread_create(conn->log_wrlsn_session, &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session)); conn->log_wrlsn_tid_set = true; @@ -921,13 +951,13 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (conn->log_session != NULL) { WT_ASSERT(session, conn->log_cond != NULL); WT_ASSERT(session, conn->log_tid_set == true); - WT_RET(__wt_cond_signal(session, conn->log_cond)); + WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); } else { /* The log server gets its own session. */ WT_RET(__wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session)); - WT_RET(__wt_cond_alloc(conn->log_session, - "log server", false, &conn->log_cond)); + WT_RET(__wt_cond_auto_alloc(conn->log_session, + "log server", false, 50000, WT_MILLION, &conn->log_cond)); /* * Start the thread. @@ -963,7 +993,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) return (0); } if (conn->log_tid_set) { - WT_TRET(__wt_cond_signal(session, conn->log_cond)); + WT_TRET(__wt_cond_auto_signal(session, conn->log_cond)); WT_TRET(__wt_thread_join(session, conn->log_tid)); conn->log_tid_set = false; } @@ -978,7 +1008,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn->log_file_session = NULL; } if (conn->log_wrlsn_tid_set) { - WT_TRET(__wt_cond_signal(session, conn->log_wrlsn_cond)); + WT_TRET(__wt_cond_auto_signal(session, conn->log_wrlsn_cond)); WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid)); conn->log_wrlsn_tid_set = false; } @@ -999,9 +1029,9 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) } /* Destroy the condition variables now that all threads are stopped */ - WT_TRET(__wt_cond_destroy(session, &conn->log_cond)); + WT_TRET(__wt_cond_auto_destroy(session, &conn->log_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond)); - WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond)); + WT_TRET(__wt_cond_auto_destroy(session, &conn->log_wrlsn_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 58577b4587d..aff422654d7 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -210,10 +210,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* * If hash arrays were allocated, free them now. */ - if (s->dhhash != NULL) - __wt_free(session, s->dhhash); - if (s->tablehash != NULL) - __wt_free(session, s->tablehash); + __wt_free(session, s->dhhash); + __wt_free(session, s->tablehash); __wt_free(session, s->hazard); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index b097a8c08aa..2fb0c464a76 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -140,8 +140,9 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, * Start the backup and fill in the cursor's list. Acquire the schema * lock, we need a consistent view when creating a copy. */ - WT_WITH_SCHEMA_LOCK(session, ret, - ret = __backup_start(session, cb, cfg)); + WT_WITH_CHECKPOINT_LOCK(session, ret, + WT_WITH_SCHEMA_LOCK(session, ret, + ret = __backup_start(session, cb, cfg))); WT_ERR(ret); /* __wt_cursor_init is last so we don't have to clean up on error. */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c index fa6dd5c32f7..38a83217933 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_join.c +++ b/src/third_party/wiredtiger/src/cursor/cur_join.c @@ -8,6 +8,9 @@ #include "wt_internal.h" +static int __curjoin_insert_endpoint(WT_SESSION_IMPL *, + WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **); + /* * __curjoin_entry_iter_init -- * Initialize an iteration for the index managed by a join entry. @@ -17,42 +20,46 @@ static int __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp) { - WT_CURSOR *newcur; WT_CURSOR *to_dup; WT_DECL_RET; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char *def_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), NULL }; - const char *uri, **config; - char *uribuf; + const char *urimain, **config; + char *mainbuf, *uri; WT_CURSOR_JOIN_ITER *iter; size_t size; iter = NULL; - uribuf = NULL; + mainbuf = uri = NULL; to_dup = entry->ends[0].cursor; - uri = to_dup->uri; if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) config = &raw_cfg[0]; else config = &def_cfg[0]; + size = strlen(to_dup->internal_uri) + 3; + WT_ERR(__wt_calloc(session, size, 1, &uri)); + snprintf(uri, size, "%s()", to_dup->internal_uri); + urimain = cjoin->table->name; if (cjoin->projection != NULL) { - size = strlen(uri) + strlen(cjoin->projection) + 1; - WT_ERR(__wt_calloc(session, size, 1, &uribuf)); - snprintf(uribuf, size, "%s%s", uri, cjoin->projection); - uri = uribuf; + size = strlen(urimain) + strlen(cjoin->projection) + 1; + WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); + snprintf(mainbuf, size, "%s%s", urimain, cjoin->projection); + urimain = mainbuf; } - WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config, - &newcur)); - WT_ERR(__wt_cursor_dup_position(to_dup, newcur)); + WT_ERR(__wt_calloc_one(session, &iter)); + WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config, + &iter->cursor)); + WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); + WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, + &iter->main)); iter->cjoin = cjoin; iter->session = session; iter->entry = entry; - iter->cursor = newcur; iter->positioned = false; iter->isequal = (entry->ends_next == 1 && WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); @@ -61,7 +68,8 @@ __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (0) { err: __wt_free(session, iter); } - __wt_free(session, uribuf); + __wt_free(session, mainbuf); + __wt_free(session, uri); return (ret); } @@ -87,45 +95,80 @@ __curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, } /* - * __curjoin_entry_iter_next -- - * Get the next item in an iteration. + * __curjoin_split_key -- + * Copy the primary key from a cursor (either main table or index) + * to another cursor. When copying from an index file, the index + * key is also returned. * */ static int -__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_ITEM *primkey, - uint64_t *rp) +__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, + const char *repack_fmt, bool isindex) { WT_CURSOR *firstcg_cur; - WT_CURSOR_JOIN *cjoin; - WT_SESSION_IMPL *session; - uint64_t r; + WT_CURSOR_INDEX *cindex; + WT_ITEM *keyp; + const uint8_t *p; + + if (isindex) { + cindex = ((WT_CURSOR_INDEX *)fromcur); + /* + * Repack tells us where the index key ends; advance past + * that to get where the raw primary key starts. + */ + WT_RET(__wt_struct_repack(session, cindex->child->key_format, + repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, + &cindex->child->key, idxkey)); + WT_ASSERT(session, cindex->child->key.size > idxkey->size); + tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; + tocur->key.size = cindex->child->key.size - idxkey->size; + if (WT_CURSOR_RECNO(tocur)) { + p = (const uint8_t *)tocur->key.data; + WT_RET(__wt_vunpack_uint(&p, tocur->key.size, + &tocur->recno)); + } else + tocur->recno = 0; + } else { + firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; + keyp = &firstcg_cur->key; + if (WT_CURSOR_RECNO(tocur)) { + WT_ASSERT(session, keyp->size == sizeof(uint64_t)); + tocur->recno = *(uint64_t *)keyp->data; + WT_RET(__curjoin_pack_recno(session, tocur->recno, + cjoin->recno_buf, sizeof(cjoin->recno_buf), + &tocur->key)); + } else { + WT_ITEM_SET(tocur->key, *keyp); + tocur->recno = 0; + } + idxkey->data = NULL; + idxkey->size = 0; + } + return (0); +} +/* + * __curjoin_entry_iter_next -- + * Get the next item in an iteration. + * + */ +static int +__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) +{ if (iter->positioned) WT_RET(iter->cursor->next(iter->cursor)); else iter->positioned = true; - session = iter->session; - cjoin = iter->cjoin; - /* * Set our key to the primary key, we'll also need this * to check membership. */ - if (iter->entry->index != NULL) - firstcg_cur = ((WT_CURSOR_INDEX *)iter->cursor)->cg_cursors[0]; - else - firstcg_cur = ((WT_CURSOR_TABLE *)iter->cursor)->cg_cursors[0]; - if (WT_CURSOR_RECNO(&cjoin->iface)) { - r = *(uint64_t *)firstcg_cur->key.data; - WT_RET(__curjoin_pack_recno(session, r, cjoin->recno_buf, - sizeof(cjoin->recno_buf), primkey)); - *rp = r; - } else { - WT_ITEM_SET(*primkey, firstcg_cur->key); - *rp = 0; - } - iter->curkey = primkey; + WT_RET(__curjoin_split_key(iter->session, iter->cjoin, &iter->idxkey, + cursor, iter->cursor, iter->entry->repack_format, + iter->entry->index != NULL)); + iter->curkey = &cursor->key; iter->entry->stats.actual_count++; iter->entry->stats.accesses++; return (0); @@ -141,6 +184,7 @@ __curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter) { if (iter->positioned) { WT_RET(iter->cursor->reset(iter->cursor)); + WT_RET(iter->main->reset(iter->main)); WT_RET(__wt_cursor_dup_position( iter->cjoin->entries[0].ends[0].cursor, iter->cursor)); iter->positioned = false; @@ -172,6 +216,8 @@ __curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter) if (iter->cursor != NULL) WT_TRET(iter->cursor->close(iter->cursor)); + if (iter->main != NULL) + WT_TRET(iter->main->close(iter->main)); __wt_free(iter->session, iter); return (ret); @@ -227,10 +273,8 @@ __curjoin_get_value(WT_CURSOR *cursor, ...) !__curjoin_entry_iter_ready(iter)) WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()"); - if (iter->entry->index != NULL) - WT_ERR(__wt_curindex_get_valuev(iter->cursor, ap)); - else - WT_ERR(__wt_curtable_get_valuev(iter->cursor, ap)); + + WT_ERR(__wt_curtable_get_valuev(iter->main, ap)); err: va_end(ap); API_END_RET(session, ret); @@ -246,41 +290,26 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, { WT_COLLATOR *collator; WT_CURSOR *c; - WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; WT_DECL_RET; WT_DECL_ITEM(uribuf); WT_ITEM curkey, curvalue; - WT_TABLE *maintable; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; - const char *mainkey_str, *p; - size_t mainkey_len, size; - u_int i; + const char *uri; + size_t size; int cmp, skip; c = NULL; skip = 0; - if (entry->index != NULL) { + if (entry->index != NULL) /* - * Open a cursor having a projection of the keys of the - * index we're comparing against. Open it raw, we're - * going to compare it to the raw keys of the - * reference cursors. + * Open the raw index. We're avoiding any references + * to the main table, they may be expensive. */ - maintable = ((WT_CURSOR_TABLE *)entry->main)->table; - mainkey_str = maintable->colconf.str + 1; - for (p = mainkey_str, i = 0; - p != NULL && i < maintable->nkey_columns; i++) - p = strchr(p + 1, ','); - WT_ASSERT(session, p != 0); - mainkey_len = WT_PTRDIFF(p, mainkey_str); - size = strlen(entry->index->name) + mainkey_len + 3; - WT_ERR(__wt_scr_alloc(session, size, &uribuf)); - WT_ERR(__wt_buf_fmt(session, uribuf, "%s(%.*s)", - entry->index->name, (int)mainkey_len, mainkey_str)); - } else { + uri = entry->index->source; + else { /* * For joins on the main table, we just need the primary * key for comparison, we don't need any values. @@ -289,32 +318,38 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ERR(__wt_scr_alloc(session, size, &uribuf)); WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", cjoin->table->name)); + uri = uribuf->data; } - WT_ERR(__wt_open_cursor( - session, uribuf->data, &cjoin->iface, raw_cfg, &c)); + WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); /* Initially position the cursor if necessary. */ endmax = &entry->ends[entry->ends_next]; - if ((end = &entry->ends[0]) < endmax && - F_ISSET(end, WT_CURJOIN_END_GE)) { - WT_ERR(__wt_cursor_dup_position(end->cursor, c)); - if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) - skip = 1; + if ((end = &entry->ends[0]) < endmax) { + if (F_ISSET(end, WT_CURJOIN_END_GT) || + WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { + WT_ERR(__wt_cursor_dup_position(end->cursor, c)); + if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) + skip = 1; + } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { + if ((ret = c->next(c)) == WT_NOTFOUND) + goto done; + WT_ERR(ret); + } else + WT_ERR(__wt_illegal_value(session, NULL)); } collator = (entry->index == NULL) ? NULL : entry->index->collator; while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); if (entry->index != NULL) { - cindex = (WT_CURSOR_INDEX *)c; /* * Repack so it's comparable to the * reference endpoints. */ WT_ERR(__wt_struct_repack(session, - cindex->child->key_format, + c->key_format, (entry->repack_format != NULL ? - entry->repack_format : cindex->iface.key_format), - &cindex->child->key, &curkey)); + entry->repack_format : entry->index->idxkey_format), + &c->key, &curkey)); } for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, @@ -335,8 +370,12 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, goto done; } } - if (entry->index != NULL) - WT_ERR(c->get_value(c, &curvalue)); + if (entry->index != NULL) { + curvalue.data = + (unsigned char *)curkey.data + curkey.size; + WT_ASSERT(session, c->key.size > curkey.size); + curvalue.size = c->key.size - curkey.size; + } else WT_ERR(c->get_key(c, &curvalue)); WT_ERR(__wt_bloom_insert(bloom, &curvalue)); @@ -401,8 +440,13 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) { WT_BLOOM *bloom; WT_DECL_RET; + WT_CURSOR *origcur; WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2; WT_CURSOR_JOIN_ENDPOINT *end; + const char *def_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), NULL }; + const char *raw_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), "raw", NULL }; uint32_t f, k; if (cjoin->entries_next == 0) @@ -411,9 +455,27 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) "cursors"); je = &cjoin->entries[0]; + jeend = &cjoin->entries[cjoin->entries_next]; + + /* + * For a single compare=le endpoint in the first iterated entry, + * construct a companion compare=ge endpoint that will actually + * be iterated. + */ + if (((je = cjoin->entries) != jeend) && + je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) { + origcur = je->ends[0].cursor; + WT_RET(__curjoin_insert_endpoint(session, je, 0, &end)); + WT_RET(__wt_open_cursor(session, origcur->uri, + (WT_CURSOR *)cjoin, + F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, + &end->cursor)); + WT_RET(end->cursor->next(end->cursor)); + end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | + WT_CURJOIN_END_OWN_CURSOR; + } WT_RET(__curjoin_entry_iter_init(session, cjoin, je, &cjoin->iter)); - jeend = &cjoin->entries[cjoin->entries_next]; for (je = cjoin->entries; je < jeend; je++) { __wt_stat_join_init_single(&je->stats); for (end = &je->ends[0]; end < &je->ends[je->ends_next]; @@ -431,6 +493,10 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) F_SET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT); if (F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { + if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) + WT_RET_MSG(session, EINVAL, + "join cursors with Bloom filters cannot be " + "used with read-uncommitted isolation"); if (je->bloom == NULL) { /* * Look for compatible filters to be shared, @@ -604,6 +670,8 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ITEM *key, v; bool bloom_found; + if (skip_left && entry->ends_next == 1) + return (0); /* no checks to make */ key = cjoin->iter->curkey; entry->stats.accesses++; bloom_found = false; @@ -626,20 +694,30 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, bloom_found = true; } if (entry->index != NULL) { - memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ - c = entry->main; - c->set_key(c, key); - if ((ret = c->search(c)) == 0) - ret = c->get_value(c, &v); - else if (ret == WT_NOTFOUND) - WT_ERR_MSG(session, WT_ERROR, - "main table for join is missing entry."); - WT_TRET(c->reset(c)); - WT_ERR(ret); + /* + * If this entry is used by the iterator, then we already + * have the index key, and we won't have to do any extraction + * either. + */ + if (entry == cjoin->iter->entry) + WT_ITEM_SET(v, cjoin->iter->idxkey); + else { + memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ + c = entry->main; + c->set_key(c, key); + if ((ret = c->search(c)) == 0) + ret = c->get_value(c, &v); + else if (ret == WT_NOTFOUND) + WT_ERR_MSG(session, WT_ERROR, + "main table for join is missing entry"); + WT_TRET(c->reset(c)); + WT_ERR(ret); + } } else - v = *key; + WT_ITEM_SET(v, *key); - if ((idx = entry->index) != NULL && idx->extractor != NULL) { + if ((idx = entry->index) != NULL && idx->extractor != NULL && + entry != cjoin->iter->entry) { WT_CLEAR(extract_cursor); extract_cursor.iface = iface; extract_cursor.iface.session = &session->iface; @@ -667,7 +745,9 @@ err: if (ret == WT_NOTFOUND && bloom_found) static int __curjoin_next(WT_CURSOR *cursor) { + WT_CURSOR *c; WT_CURSOR_JOIN *cjoin; + WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; bool skip_left; @@ -683,9 +763,11 @@ __curjoin_next(WT_CURSOR *cursor) if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) WT_ERR(__curjoin_init_iter(session, cjoin)); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + iter = cjoin->iter; + nextkey: - if ((ret = __curjoin_entry_iter_next(cjoin->iter, &cursor->key, - &cursor->recno)) == 0) { + if ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) { F_SET(cursor, WT_CURSTD_KEY_EXT); /* @@ -702,13 +784,26 @@ nextkey: * If this is compare=eq on our outer iterator, * and we've moved past it, we're done. */ - if (cjoin->iter->isequal && i == 0) + if (iter->isequal && i == 0) break; goto nextkey; } skip_left = false; WT_ERR(ret); } + } else if (ret != WT_NOTFOUND) + WT_ERR(ret); + + if (ret == 0) { + /* + * Position the 'main' cursor, this will be used to + * retrieve values from the cursor join. + */ + c = iter->main; + c->set_key(c, iter->curkey); + if ((ret = c->search(c)) != 0) + WT_ERR(c->search(c)); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } if (0) { @@ -772,8 +867,11 @@ __curjoin_close(WT_CURSOR *cursor) if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) WT_TRET(__wt_bloom_close(entry->bloom)); for (end = &entry->ends[0]; - end < &entry->ends[entry->ends_next]; end++) + end < &entry->ends[entry->ends_next]; end++) { F_CLR(end->cursor, WT_CURSTD_JOINED); + if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR)) + WT_TRET(end->cursor->close(end->cursor)); + } __wt_free(session, entry->ends); __wt_free(session, entry->repack_format); } @@ -879,7 +977,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) { WT_CURSOR_INDEX *cindex; - WT_CURSOR_JOIN_ENDPOINT *end, *newend; + WT_CURSOR_JOIN_ENDPOINT *end; WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; bool hasins, needbloom, range_eq; @@ -1000,17 +1098,10 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, entry->bloom_hash_count = WT_MAX(entry->bloom_hash_count, bloom_hash_count); } - WT_ERR(__wt_realloc_def(session, &entry->ends_allocated, - entry->ends_next + 1, &entry->ends)); - if (!hasins) - ins = entry->ends_next; - newend = &entry->ends[ins]; - memmove(newend + 1, newend, - (entry->ends_next - ins) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); - memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); - entry->ends_next++; - newend->cursor = ref_cursor; - F_SET(newend, range); + WT_ERR(__curjoin_insert_endpoint(session, entry, + hasins ? ins : entry->ends_next, &end)); + end->cursor = ref_cursor; + F_SET(end, range); /* Open the main file with a projection of the indexed columns. */ if (entry->main == NULL && idx != NULL) { @@ -1049,7 +1140,28 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, cindex->iface.key_format); } -err: if (main_uri != NULL) - __wt_free(session, main_uri); +err: __wt_free(session, main_uri); return (ret); } + +/* + * __curjoin_insert_endpoint -- + * Insert a new entry into the endpoint array for the join entry. + */ +static int +__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp) +{ + WT_CURSOR_JOIN_ENDPOINT *newend; + + WT_RET(__wt_realloc_def(session, &entry->ends_allocated, + entry->ends_next + 1, &entry->ends)); + newend = &entry->ends[pos]; + memmove(newend + 1, newend, + (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); + memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); + entry->ends_next++; + *newendp = newend; + + return (0); +} diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 47436ac7237..0a13803da5d 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -397,7 +397,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, * The user may be trying to read a log record they just wrote. * Log records may be buffered, so force out any now. */ - WT_ERR(__wt_log_force_write(session, 1)); + WT_ERR(__wt_log_force_write(session, 1, NULL)); /* Log cursors block archiving. */ WT_ERR(__wt_readlock(session, log->log_archive_lock)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index df2cc3f546e..3d702e2ea8c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -31,6 +31,58 @@ } while (0) /* + * __wt_schema_create_final -- + * Create a single configuration line from a set of configuration strings, + * including all of the defaults declared for a session.create, and stripping + * any configuration strings that don't belong in a session.create. Here for + * the wt dump command utility, which reads a set of configuration strings and + * needs to add in the defaults and then collapse them into single string for + * a subsequent load. + */ +int +__wt_schema_create_final( + WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret) +{ + WT_DECL_RET; + u_int i; + const char **cfg; + + /* + * Count the entries in the original, + * Allocate a copy with the defaults as the first entry, + * Collapse the whole thing into a single configuration string (which + * also strips any entries that don't appear in the first entry). + */ + for (i = 0; cfg_arg[i] != NULL; ++i) + ; + WT_RET(__wt_calloc_def(session, i + 2, &cfg)); + cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_create); + for (i = 0; cfg_arg[i] != NULL; ++i) + cfg[i + 1] = cfg_arg[i]; + cfg[i + 1] = NULL; + + ret = __wt_config_collapse(session, cfg, value_ret); + + __wt_free(session, cfg); + return (ret); +} + +/* + * __schema_create_strip -- + * Discard any configuration information from a schema entry that is not + * applicable to an session.create call. Here for the metadata:create URI. + */ +static int +__schema_create_strip( + WT_SESSION_IMPL *session, const char *value, char **value_ret) +{ + const char *cfg[] = + { WT_CONFIG_BASE(session, WT_SESSION_create), value, NULL }; + + return (__wt_config_collapse(session, cfg, value_ret)); +} + +/* * __curmetadata_setkv -- * Copy key/value into the public cursor, stripping internal metadata for * "create-only" cursors. @@ -49,8 +101,7 @@ __curmetadata_setkv(WT_CURSOR_METADATA *mdc, WT_CURSOR *fc) c->key.data = fc->key.data; c->key.size = fc->key.size; if (F_ISSET(mdc, WT_MDC_CREATEONLY)) { - WT_RET(__wt_schema_create_strip( - session, fc->value.data, NULL, &value)); + WT_RET(__schema_create_strip(session, fc->value.data, &value)); ret = __wt_buf_set( session, &c->value, value, strlen(value) + 1); __wt_free(session, value); @@ -92,8 +143,7 @@ __curmetadata_metadata_search(WT_SESSION_IMPL *session, WT_CURSOR *cursor) WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value)); if (F_ISSET(mdc, WT_MDC_CREATEONLY)) { - ret = __wt_schema_create_strip( - session, value, NULL, &stripped); + ret = __schema_create_strip(session, value, &stripped); __wt_free(session, value); WT_RET(ret); value = stripped; diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index 34e64b34ccb..f7a8f5fc866 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -207,6 +207,8 @@ __curstat_next(WT_CURSOR *cursor) if (cst->notpositioned) { cst->notpositioned = false; cst->key = WT_STAT_KEY_MIN(cst); + if (cst->next_set != NULL) + WT_ERR((*cst->next_set)(session, cst, true, true)); } else if (cst->key < WT_STAT_KEY_MAX(cst)) ++cst->key; else if (cst->next_set != NULL) @@ -249,6 +251,8 @@ __curstat_prev(WT_CURSOR *cursor) if (cst->notpositioned) { cst->notpositioned = false; cst->key = WT_STAT_KEY_MAX(cst); + if (cst->next_set != NULL) + WT_ERR((*cst->next_set)(session, cst, false, true)); } else if (cst->key > WT_STAT_KEY_MIN(cst)) --cst->key; else if (cst->next_set != NULL) @@ -558,9 +562,6 @@ __wt_curstat_init(WT_SESSION_IMPL *session, else return (__wt_bad_object_type(session, uri)); - if (cst->next_set != NULL) - WT_RET((*cst->next_set)(session, cst, false, true)); - return (0); } diff --git a/src/third_party/wiredtiger/src/docs/command-line.dox b/src/third_party/wiredtiger/src/docs/command-line.dox index e2b376d5e3f..0f5c56d25ce 100644 --- a/src/third_party/wiredtiger/src/docs/command-line.dox +++ b/src/third_party/wiredtiger/src/docs/command-line.dox @@ -41,7 +41,7 @@ by default and commands that only read data will not run recovery. Perform a backup of a database or set of data sources. The \c backup command performs a backup of the database, copying the -database files to a \c specified directory, which can be subsequently +underlying files to a \c specified directory, which can be subsequently opened as a WiredTiger database. See @ref backup for more information, and @ref file_permissions for specifics on the copied file permissions. @@ -58,10 +58,10 @@ the named data sources. <hr> @section util_compact wt compact -Compact a table or file. +Compact a table. -The \c compact command attempts to rewrite the specified table or file -to consume less disk space. +The \c compact command attempts to rewrite the specified table to +consume less disk space. @subsection util_compact_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] compact uri</code> @@ -71,7 +71,7 @@ The \c compact command has no command-specific options. <hr> @section util_create wt create -Create a table or file. +Create a table. The \c create command creates the specified \c uri with the specified configuration. It is equivalent to a call to WT_SESSION::create with @@ -88,7 +88,7 @@ Include a configuration string to be passed to WT_SESSION::create. <hr> @section util_drop wt drop -Drop a table or file. +Drop a table. The \c drop command drops the specified \c uri. It is equivalent to a call to WT_SESSION::drop with the "force" configuration argument. @@ -136,10 +136,10 @@ printable characters unencoded). <hr> @section util_list wt list -List the tables and files in the database. +List the tables in the database. -By default, the \c list command prints out the tables and files stored in -the database. If a URI is specified as an argument, only information about +By default, the \c list command prints out the tables stored in the +database. If a URI is specified as an argument, only information about that data source is printed. @subsection util_list_synopsis Synopsis @@ -158,16 +158,16 @@ value is printed. <hr> @section util_load wt load -Load a table or file from dump output. +Load a table from dump output. The \c load command reads the standard input for data and loads it into -a table or file, creating the table or file if it does not yet exist. -The data should be the format produced by the \c dump command; see -@ref dump_formats for details. +a table, creating the table if it does not yet exist. The data should +be the format produced by the \c dump command; see @ref dump_formats for +details. -By default, if the table or file already exists, data in the file or -table will be overwritten by the new data (use the \c -n option to -make an attempt to overwrite existing data return an error). +By default, if the table already exists, data in the table will be +overwritten by the new data (use the \c -n option to make an attempt to +overwrite existing data return an error). @subsection util_load_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code> @@ -182,8 +182,8 @@ number keys. The \c -a option is only applicable when loading into a column store. @par <code>-f</code> -By default, the \c load command reads from the standard input; the \c --f option reads the input from the specified file. +By default, the \c load command reads from the standard input; the \c -f +option reads the input from the specified file. @par <code>-j</code> Load input in the JSON (<a href="http://www.json.org">JavaScript Object @@ -196,7 +196,7 @@ load command to fail if there's an attempt to overwrite already existing data. @par <code>-r</code> -By default, the \c load command uses the table or file name taken from the +By default, the \c load command uses the table name taken from the input; the \c -r option renames the data source. Additionally, \c uri and \c configuration pairs may be specified to the @@ -227,24 +227,23 @@ table:xxx block_allocation=first table:xxx prefix_compress=false <hr> @section util_loadtext wt loadtext -Load text into a table or file. +Load text into a table. The \c loadtext command reads the standard input for text and loads it -into a table or file. The input data should be printable characters, -with newline delimiters for each key or value. +into a table. The input data should be printable characters, with +newline delimiters for each key or value. -The \c loadtext command does not create the file if it does not yet +The \c loadtext command does not create the object if it does not yet exist. -In the case of inserting values into a column-store table or file, each -value is appended to the table or file; in the case of inserting values -into a row-store table or file, lines are handled in pairs, where the -first line is the key and the second line is the value. If the -row-store table or file already exists, data in the table or file will -be overwritten by the new data. +In the case of inserting values into a column-store table, each value +is appended to the table; in the case of inserting values into a +row-store table, lines are handled in pairs, where the first line is the +key and the second line is the value. If the row-store table already +exists, data in the table will be overwritten by the new data. @subsection util_loadtext_synopsis Synopsis -<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input]</code> +<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input] uri</code> @subsection util_loadtext_options Options The following are command-specific options for the \c loadtext command: @@ -275,7 +274,7 @@ to the default string format. <hr> @section util_read wt read -Read records from a table or file. +Read records from a table. The \c read command prints out the records associated with the specified keys from the specified data source. The data source must be configured @@ -291,9 +290,9 @@ The \c read command has no command-specific options. <hr> @section util_rename wt rename -Rename a table or file. +Rename a table. -The \c rename command renames the specified table or file. +The \c rename command renames the specified table. @subsection util_rename_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] rename uri name</code> @@ -303,11 +302,11 @@ The \c rename command has no command-specific options. <hr> @section util_salvage wt salvage -Recover data from a corrupted file. +Recover data from a corrupted table. The \c salvage command salvages the specified data source, discarding any -data that cannot be recovered. Underlying files are re-written in -place, overwriting the original file contents. +data that cannot be recovered. Underlying files are re-written in place, +overwriting the original file contents. @subsection util_salvage_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] salvage [-F force] uri</code> @@ -316,9 +315,9 @@ place, overwriting the original file contents. The following are command-specific options for the \c salvage command: @par <code>-F</code> -By default, salvage will refuse to salvage files that fail basic tests -(for example, files that don't appear to be in a WiredTiger format). -The \c -F option forces the salvage of the file, regardless. +By default, salvage will refuse to salvage tables that fail basic tests +(for example, tables that don't appear to be in a WiredTiger format). +The \c -F option forces the salvage of the table, regardless. <hr> @section util_stat wt stat @@ -339,11 +338,11 @@ Include only "fast" statistics in the output (equivalent to passing <hr> @section util_upgrade wt upgrade -Upgrade a table or file. +Upgrade a table. -The \c upgrade command upgrades the specified table or file, exiting -success if the data source is up-to-date, and failure if the data source -cannot be upgraded. +The \c upgrade command upgrades the specified table, exiting success if +the data source is up-to-date, and failure if the data source cannot be +upgraded. @subsection util_upgrade_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] upgrade uri</code> @@ -353,11 +352,10 @@ The \c upgrade command has no command-specific options. <hr> @section util_verify wt verify -Check the structural integrity of a table or file. +Check the structural integrity of a table. -The \c verify command verifies the specified table or file, exiting -success if the data source is correct, and failure if the data source is -corrupted. +The \c verify command verifies the specified table, exiting success if +the data source is correct, and failure if the data source is corrupted. @subsection util_verify_synopsis Synopsis <code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] verify uri</code> @@ -367,7 +365,7 @@ The \c verify command has no command-specific options. <hr> @section util_write wt write -Write records to a table or file. +Write records to a table. The \c write command stores records into the specified data source. The data source must be configured with string or record number keys and diff --git a/src/third_party/wiredtiger/src/docs/data-sources.dox b/src/third_party/wiredtiger/src/docs/data-sources.dox index d09d1cbc1b8..7f1879e0ffe 100644 --- a/src/third_party/wiredtiger/src/docs/data-sources.dox +++ b/src/third_party/wiredtiger/src/docs/data-sources.dox @@ -38,7 +38,7 @@ cursor types that give access to data managed by WiredTiger: key=<code>string</code>\, value=<code>string</code>\,<br> see @ref metadata for details} @row{<tt>statistics:[\<data source URI\>]</tt>, - database or data source statistics cursor, + database, data source or join statistics cursor, key=<code>int id</code>\,<br> value=<code>(string description\, string value\, uint64_t value)</code>\,<br> @@ -106,7 +106,9 @@ WiredTiger database as well as statistics for individual data sources. The statistics are at two levels: per-database and per-individual data source. Database-wide statistics are retrieved with the \c "statistics:" URI; individual data source statistics are available by specifying -\c "statistics:<data source URI>". +\c "statistics:<data source URI>". Additionally, statistics about a +join cursor can be retrieved by specifying \c "statistics:join" and +supplying the join cursor as an argument in the SESSION::open_cursor call. The statistic key is an integer from the list of keys in @ref_single statistics_keys "Statistics Keys". Statistics cursors return @@ -127,7 +129,11 @@ The following is an example of printing statistics about a table: @snippet ex_stat.c statistics table function -Both examples can use a common display routine that iterates through the +The following is an example of printing statistics about a join cursor: + +@snippet ex_stat.c statistics join cursor function + +These three examples can use a common display routine that iterates through the statistics until the cursor returns the end of the list. @snippet ex_stat.c statistics display function diff --git a/src/third_party/wiredtiger/src/docs/statistics.dox b/src/third_party/wiredtiger/src/docs/statistics.dox index 453da34c51a..0a29e351e4e 100644 --- a/src/third_party/wiredtiger/src/docs/statistics.dox +++ b/src/third_party/wiredtiger/src/docs/statistics.dox @@ -79,6 +79,15 @@ or logged: @snippet ex_all.c Statistics clear configuration +The following example opens a statistics cursor on an open join cursor: + +@snippet ex_schema.c Statistics cursor join cursor + +The statistics gathered will be organized by reference cursors participating +in the join (see WT_SESSION::join); the uri of each reference cursor appears +as a prefix in the description field returned as a value by the statistics +cursor. + @section statistics_log Statistics logging WiredTiger will optionally log database statistics into a file when the diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index df0a22ba0fe..8b3d61e4c19 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -2,6 +2,14 @@ @section version_271 Upgrading to Version 2.7.1 <dl> +<dt>LSM metadata</dt> +<dd> +There is a change to the format of LSM metadata in this release to fix bugs +in dump / load of tables of type LSM. Tables created with the old LSM metadata +format will be upgraded automatically, but once updated to the new version +<b>are no longer compatible with older releases of WiredTiger</b>. +</dd> + <dt>Column-store bulk-load cursors</dt> <dd> Historically, bulk-load of a column-store object ignored any key set in the diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 641864a8baa..ca98b1bd62a 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -18,13 +18,12 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_DECL_RET; WT_PAGE *page; WT_REF *next_ref, *ref; - bool evict_reset; /* * We need exclusive access to the file -- disable ordinary eviction * and drain any blocks already queued. */ - WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset)); + WT_RET(__wt_evict_file_exclusive_on(session)); /* Make sure the oldest transaction ID is up-to-date. */ __wt_txn_update_oldest(session, true); @@ -98,8 +97,7 @@ err: /* On error, clear any left-over tree walk. */ session, next_ref, WT_READ_NO_EVICT)); } - if (evict_reset) - __wt_evict_file_exclusive_off(session); + __wt_evict_file_exclusive_off(session); return (ret); } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 884c08a02df..50a00787f35 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -159,7 +159,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) bytes_max / WT_MEGABYTE)); } - return (__wt_cond_signal(session, cache->evict_cond)); + return (__wt_cond_auto_signal(session, cache->evict_cond)); } /* @@ -175,8 +175,8 @@ __evict_server(void *arg) WT_SESSION_IMPL *session; #ifdef HAVE_DIAGNOSTIC struct timespec now, stuck_ts; - uint64_t pages_evicted = 0; #endif + uint64_t pages_evicted = 0; u_int spins; session = arg; @@ -219,11 +219,11 @@ __evict_server(void *arg) /* Next time we wake up, reverse the sweep direction. */ cache->flags ^= WT_CACHE_WALK_REVERSE; -#ifdef HAVE_DIAGNOSTIC pages_evicted = 0; } else if (pages_evicted != cache->pages_evict) { - WT_ERR(__wt_epoch(session, &stuck_ts)); pages_evicted = cache->pages_evict; +#ifdef HAVE_DIAGNOSTIC + WT_ERR(__wt_epoch(session, &stuck_ts)); } else { /* After being stuck for 5 minutes, give up. */ WT_ERR(__wt_epoch(session, &now)); @@ -238,7 +238,8 @@ __evict_server(void *arg) WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping")); /* Don't rely on signals: check periodically. */ - WT_ERR(__wt_cond_wait(session, cache->evict_cond, 100000)); + WT_ERR(__wt_cond_auto_wait( + session, cache->evict_cond, pages_evicted != 0)); WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking")); } @@ -720,12 +721,32 @@ __evict_clear_walks(WT_SESSION_IMPL *session) } /* - * __evict_request_walk_clear -- + * __evict_clear_all_walks -- + * Clear the eviction walk points for all files a session is waiting on. + */ +static int +__evict_clear_all_walks(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + + conn = S2C(session); + + TAILQ_FOREACH(dhandle, &conn->dhqh, q) + if (WT_PREFIX_MATCH(dhandle->name, "file:")) + WT_WITH_DHANDLE(session, + dhandle, WT_TRET(__evict_clear_walk(session))); + return (ret); +} + +/* + * __evict_request_clear_walk -- * Request that the eviction server clear the tree's current eviction * point. */ static int -__evict_request_walk_clear(WT_SESSION_IMPL *session) +__evict_request_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -753,32 +774,12 @@ __evict_request_walk_clear(WT_SESSION_IMPL *session) } /* - * __evict_clear_all_walks -- - * Clear the eviction walk points for all files a session is waiting on. - */ -static int -__evict_clear_all_walks(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - - conn = S2C(session); - - TAILQ_FOREACH(dhandle, &conn->dhqh, q) - if (WT_PREFIX_MATCH(dhandle->name, "file:")) - WT_WITH_DHANDLE(session, - dhandle, WT_TRET(__evict_clear_walk(session))); - return (ret); -} - -/* * __wt_evict_file_exclusive_on -- * Get exclusive eviction access to a file and discard any of the file's * blocks queued for eviction. */ int -__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) +__wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -786,40 +787,39 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) WT_EVICT_ENTRY *evict; u_int i, elem; - *evict_resetp = false; - btree = S2BT(session); cache = S2C(session)->cache; - /* If the file was never evictable, there's no work to do. */ - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) - return (0); - /* - * Hold the walk lock to set the "no eviction" flag: no new pages from - * the file will be queued for eviction after this point. + * Hold the walk lock to set the no-eviction flag. + * + * The no-eviction flag can be set permanently, in which case we never + * increment the no-eviction count. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - F_SET(btree, WT_BTREE_NO_EVICTION); - *evict_resetp = true; + if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled != 0) + ++btree->evict_disabled; + __wt_spin_unlock(session, &cache->evict_walk_lock); + return (0); } - __wt_spin_unlock(session, &cache->evict_walk_lock); + ++btree->evict_disabled; - /* If some other operation has disabled eviction, we're done. */ - if (!*evict_resetp) - return (0); + /* + * Ensure no new pages from the file will be queued for eviction after + * this point. + */ + F_SET(btree, WT_BTREE_NO_EVICTION); + WT_FULL_BARRIER(); /* Clear any existing LRU eviction walk for the file. */ - WT_ERR(__evict_request_walk_clear(session)); - - /* Hold the evict lock to remove any queued pages from this file. */ - __wt_spin_lock(session, &cache->evict_lock); + WT_ERR(__evict_request_clear_walk(session)); /* * The eviction candidate list might reference pages from the file, - * clear it. + * clear it. Hold the evict lock to remove queued pages from a file. */ + __wt_spin_lock(session, &cache->evict_lock); elem = cache->evict_max; for (i = 0, evict = cache->evict_queue; i < elem; i++, evict++) if (evict->btree == btree) @@ -833,10 +833,11 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp) while (btree->evict_busy > 0) __wt_yield(); - return (0); - -err: F_CLR(btree, WT_BTREE_NO_EVICTION); - *evict_resetp = false; + if (0) { +err: --btree->evict_disabled; + F_CLR(btree, WT_BTREE_NO_EVICTION); + } + __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); } @@ -848,8 +849,10 @@ void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) { WT_BTREE *btree; + WT_CACHE *cache; btree = S2BT(session); + cache = S2C(session)->cache; /* * We have seen subtle bugs with multiple threads racing to turn @@ -857,10 +860,17 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, btree->evict_ref == NULL && - F_ISSET(btree, WT_BTREE_NO_EVICTION)); + WT_ASSERT(session, + btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - F_CLR(btree, WT_BTREE_NO_EVICTION); + /* + * The no-eviction flag can be set permanently, in which case we never + * increment the no-eviction count. + */ + __wt_spin_lock(session, &cache->evict_walk_lock); + if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) + F_CLR(btree, WT_BTREE_NO_EVICTION); + __wt_spin_unlock(session, &cache->evict_walk_lock); } /* @@ -890,7 +900,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - uint64_t cutoff; + uint64_t cutoff, read_gen_oldest; uint32_t candidates, entries; cache = S2C(session)->cache; @@ -931,34 +941,62 @@ __evict_lru_walk(WT_SESSION_IMPL *session) return (0); } - WT_ASSERT(session, cache->evict_queue[0].ref != NULL); - - /* Track the oldest read generation we have in the queue. */ - cache->read_gen_oldest = cache->evict_queue[0].ref->page->read_gen; - + /* Decide how many of the candidates we're going to try and evict. */ if (FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) + WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { /* * Take all candidates if we only gathered pages with an oldest * read generation set. */ cache->evict_candidates = entries; - else { - /* Find the bottom 25% of read generations. */ - cutoff = (3 * __evict_read_gen(&cache->evict_queue[0]) + - __evict_read_gen(&cache->evict_queue[entries - 1])) / 4; + } else { /* - * Don't take less than 10% or more than 50% of entries, - * regardless. That said, if there is only one entry, which is - * normal when populating an empty file, don't exclude it. + * Find the oldest read generation we have in the queue, used + * to set the initial value for pages read into the system. + * The queue is sorted, find the first "normal" generation. */ - for (candidates = 1 + entries / 10; - candidates < entries / 2; - candidates++) - if (__evict_read_gen( - &cache->evict_queue[candidates]) > cutoff) + read_gen_oldest = WT_READGEN_OLDEST; + for (candidates = 0; candidates < entries; ++candidates) { + read_gen_oldest = + __evict_read_gen(&cache->evict_queue[candidates]); + if (read_gen_oldest != WT_READGEN_OLDEST) break; - cache->evict_candidates = candidates; + } + + /* + * Take all candidates if we only gathered pages with an oldest + * read generation set. + * + * We normally never take more than 50% of the entries; if 50% + * of the entries were at the oldest read generation, take them. + */ + if (read_gen_oldest == WT_READGEN_OLDEST) + cache->evict_candidates = entries; + else if (candidates >= entries / 2) + cache->evict_candidates = candidates; + else { + /* Save the calculated oldest generation. */ + cache->read_gen_oldest = read_gen_oldest; + + /* Find the bottom 25% of read generations. */ + cutoff = + (3 * read_gen_oldest + __evict_read_gen( + &cache->evict_queue[entries - 1])) / 4; + + /* + * Don't take less than 10% or more than 50% of entries, + * regardless. That said, if there is only one entry, + * which is normal when populating an empty file, don't + * exclude it. + */ + for (candidates = 1 + entries / 10; + candidates < entries / 2; + candidates++) + if (__evict_read_gen( + &cache->evict_queue[candidates]) > cutoff) + break; + cache->evict_candidates = candidates; + } } cache->evict_current = cache->evict_queue; @@ -1127,23 +1165,27 @@ retry: while (slot < max_entries && ret == 0) { __wt_spin_unlock(session, &conn->dhandle_lock); dhandle_locked = false; - __wt_spin_lock(session, &cache->evict_walk_lock); - /* - * Re-check the "no eviction" flag -- it is used to enforce - * exclusive access when a handle is being closed. + * Re-check the "no eviction" flag, used to enforce exclusive + * access when a handle is being closed. If not set, remember + * the file to visit first, next loop. + * + * Only try to acquire the lock and simply continue if we fail; + * the lock is held while the thread turning off eviction clears + * the tree's current eviction point, and part of the process is + * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - /* Remember the file to visit first, next loop. */ - cache->evict_file_next = dhandle; - - WT_WITH_DHANDLE(session, dhandle, - ret = __evict_walk_file(session, &slot)); - WT_ASSERT(session, session->split_gen == 0); + if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + !__wt_spin_trylock(session, &cache->evict_walk_lock)) { + if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + cache->evict_file_next = dhandle; + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file(session, &slot)); + WT_ASSERT(session, session->split_gen == 0); + } + __wt_spin_unlock(session, &cache->evict_walk_lock); } - __wt_spin_unlock(session, &cache->evict_walk_lock); - /* * If we didn't find any candidates in the file, skip it next * time. @@ -1286,6 +1328,18 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)) continue; + /* + * It's possible (but unlikely) to visit a page without a read + * generation, if we race with the read instantiating the page. + * Ignore those pages, but set the page's read generation here + * to ensure a bug doesn't somehow leave a page without a read + * generation. + */ + if (page->read_gen == WT_READGEN_NOTSET) { + __wt_cache_read_gen_new(session, page); + continue; + } + /* Pages we no longer need (clean or dirty), are found money. */ if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || @@ -1311,13 +1365,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) internal_pages >= (int)(evict - start) / 2) continue; - /* - * If this page has never been considered for eviction, set its - * read generation to somewhere in the middle of the LRU list. - */ - if (page->read_gen == WT_READGEN_NOTSET) - page->read_gen = __wt_cache_read_gen_new(session); - fast: /* If the page can't be evicted, give up. */ if (!__wt_page_can_evict(session, ref, NULL)) continue; @@ -1477,7 +1524,6 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *page; WT_REF *ref; WT_RET(__evict_get_ref(session, is_server, &btree, &ref)); @@ -1506,9 +1552,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) * the page and some other thread may have evicted it by the time we * look at it. */ - page = ref->page; - if (page->read_gen != WT_READGEN_OLDEST) - page->read_gen = __wt_cache_read_gen_bump(session); + __wt_cache_read_gen_bump(session, ref->page); WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, false)); diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index ee495c52fc8..7cdf2bef43a 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -598,9 +598,14 @@ struct __wt_page { * read generation is incremented by the eviction server each time it * becomes active. To avoid incrementing a page's read generation too * frequently, it is set to a future point. + * + * Because low read generation values have special meaning, and there + * are places where we manipulate the value, use an initial value well + * outside of the special range. */ #define WT_READGEN_NOTSET 0 #define WT_READGEN_OLDEST 1 +#define WT_READGEN_START_VALUE 100 #define WT_READGEN_STEP 100 uint64_t read_gen; diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 703de0f2fc6..fd921677751 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -129,10 +129,11 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ - WT_REF *evict_ref; /* Eviction thread's location */ - uint64_t evict_priority; /* Relative priority of cached pages */ - u_int evict_walk_period; /* Skip this many LRU walks */ - u_int evict_walk_skips; /* Number of walks skipped */ + WT_REF *evict_ref; /* Eviction thread's location */ + uint64_t evict_priority; /* Relative priority of cached pages */ + u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_skips; /* Number of walks skipped */ + u_int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ enum { diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index a3961d6043e..9184a2fe6ed 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -75,9 +75,9 @@ struct __wt_cache { /* * Read information. */ - uint64_t read_gen; /* Page read generation (LRU) */ - uint64_t read_gen_oldest; /* The oldest read generation that - eviction knows about */ + uint64_t read_gen; /* Current page read generation */ + uint64_t read_gen_oldest; /* Oldest read generation the eviction + * server saw in its last queue load */ /* * Eviction thread information. diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index ee13eee84c5..8cf7555e716 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -28,34 +28,43 @@ __wt_cache_read_gen_incr(WT_SESSION_IMPL *session) /* * __wt_cache_read_gen_bump -- - * Get the read generation to keep a page in memory. + * Update the page's read generation. */ -static inline uint64_t -__wt_cache_read_gen_bump(WT_SESSION_IMPL *session) +static inline void +__wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page) { + /* Ignore pages set for forcible eviction. */ + if (page->read_gen == WT_READGEN_OLDEST) + return; + + /* Ignore pages already in the future. */ + if (page->read_gen > __wt_cache_read_gen(session)) + return; + /* - * We return read-generations from the future (where "the future" is - * measured by increments of the global read generation). The reason - * is because when acquiring a new hazard pointer for a page, we can - * check its read generation, and if the read generation isn't less - * than the current global generation, we don't bother updating the - * page. In other words, the goal is to avoid some number of updates - * immediately after each update we have to make. + * We set read-generations in the future (where "the future" is measured + * by increments of the global read generation). The reason is because + * when acquiring a new hazard pointer for a page, we can check its read + * generation, and if the read generation isn't less than the current + * global generation, we don't bother updating the page. In other + * words, the goal is to avoid some number of updates immediately after + * each update we have to make. */ - return (__wt_cache_read_gen(session) + WT_READGEN_STEP); + page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP; } /* * __wt_cache_read_gen_new -- * Get the read generation for a new page in memory. */ -static inline uint64_t -__wt_cache_read_gen_new(WT_SESSION_IMPL *session) +static inline void +__wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; cache = S2C(session)->cache; - return (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2; + page->read_gen = + (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2; } /* @@ -119,12 +128,11 @@ __wt_session_can_wait(WT_SESSION_IMPL *session) return (0); /* - * LSM sets the no-eviction flag when holding the LSM tree lock, - * in that case, or when holding the schema lock, we don't want to - * highjack the thread for eviction. + * LSM sets the no-eviction flag when holding the LSM tree lock, in that + * case, or when holding the schema lock, we don't want to highjack the + * thread for eviction. */ - if (F_ISSET(session, - WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA)) + if (F_ISSET(session, WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA)) return (0); return (1); @@ -224,11 +232,11 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) return (0); /* - * Threads operating on trees that cannot be evicted are ignored, - * mostly because they're not contributing to the problem. + * Threads operating on cache-resident trees are ignored because they're + * not contributing to the problem. */ btree = S2BT_SAFE(session); - if (btree != NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree != NULL && F_ISSET(btree, WT_BTREE_IN_MEMORY)) return (0); /* Check if eviction is needed. */ diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index e63db0e76cf..48a255134af 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -85,13 +85,15 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_upgrade 33 #define WT_CONFIG_ENTRY_WT_SESSION_verify 34 #define WT_CONFIG_ENTRY_colgroup_meta 35 -#define WT_CONFIG_ENTRY_file_meta 36 -#define WT_CONFIG_ENTRY_index_meta 37 -#define WT_CONFIG_ENTRY_table_meta 38 -#define WT_CONFIG_ENTRY_wiredtiger_open 39 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 40 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 41 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 42 +#define WT_CONFIG_ENTRY_file_config 36 +#define WT_CONFIG_ENTRY_file_meta 37 +#define WT_CONFIG_ENTRY_index_meta 38 +#define WT_CONFIG_ENTRY_lsm_meta 39 +#define WT_CONFIG_ENTRY_table_meta 40 +#define WT_CONFIG_ENTRY_wiredtiger_open 41 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 42 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 43 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 44 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 48db8b9ec23..4b35daf106e 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -213,10 +213,11 @@ struct __wt_cursor_btree { #define WT_CBT_NO_TXN 0x10 /* Non-transactional cursor (e.g. on a checkpoint) */ #define WT_CBT_SEARCH_SMALLEST 0x20 /* Row-store: small-key insert list */ +#define WT_CBT_VAR_ONPAGE_MATCH 0x40 /* Var-store: on-page recno match */ #define WT_CBT_POSITION_MASK /* Flags associated with position */ \ (WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \ - WT_CBT_SEARCH_SMALLEST) + WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH) uint8_t flags; }; @@ -287,8 +288,10 @@ struct __wt_cursor_join_iter { WT_SESSION_IMPL *session; WT_CURSOR_JOIN *cjoin; WT_CURSOR_JOIN_ENTRY *entry; - WT_CURSOR *cursor; - WT_ITEM *curkey; + WT_CURSOR *cursor; /* has null projection */ + WT_CURSOR *main; /* main table with projection */ + WT_ITEM *curkey; /* primary key */ + WT_ITEM idxkey; bool positioned; bool isequal; /* advancing means we're done */ }; @@ -303,6 +306,7 @@ struct __wt_cursor_join_endpoint { #define WT_CURJOIN_END_GT 0x04 /* include values > cursor */ #define WT_CURJOIN_END_GE (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ) #define WT_CURJOIN_END_LE (WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ) +#define WT_CURJOIN_END_OWN_CURSOR 0x08 /* must close cursor */ uint8_t flags; /* range for this endpoint */ }; #define WT_CURJOIN_END_RANGE(endp) \ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 55b0b8cd7ff..48c52d4a109 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -168,7 +168,7 @@ extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing); extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst); -extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_CACHE_OP op); +extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op); extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok); @@ -297,6 +297,7 @@ extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const c extern ssize_t __wt_json_strlen(const char *src, size_t srclen); extern int __wt_json_strncpy(char **pdst, size_t dstlen, const char *src, size_t srclen); extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp); +extern int __wt_schema_create_final( WT_SESSION_IMPL *session, char *cfg_arg[], char **value_ret); extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst); extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst); @@ -341,7 +342,7 @@ extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict_server_wake(WT_SESSION_IMPL *session); extern int __wt_evict_create(WT_SESSION_IMPL *session); extern int __wt_evict_destroy(WT_SESSION_IMPL *session); -extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, bool *evict_resetp); +extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); @@ -364,7 +365,7 @@ extern int __wt_log_open(WT_SESSION_IMPL *session); extern int __wt_log_close(WT_SESSION_IMPL *session); extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep); extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie); -extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry); +extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work); extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags); extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap); extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags); @@ -485,7 +486,9 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); @@ -558,6 +561,17 @@ extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *sizep, const char extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, ...); extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, ...); extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf); +extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp); +extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp); +extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp); +extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item); +extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i); +extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s); +extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u); +extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item); +extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip); +extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp); +extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up); extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell); extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size); @@ -576,7 +590,6 @@ extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted); -extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret); extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep); extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf); extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf); @@ -637,6 +650,11 @@ extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *ch extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]); extern uint32_t __wt_cksum(const void *chunk, size_t len); extern void __wt_cksum_init(void); +extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp); +extern int __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled); +extern int __wt_cond_auto_wait( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress); +extern int __wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out); extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out); extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); @@ -736,7 +754,7 @@ extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session); extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len); -extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]); +extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h index 6ccc0de3c03..ce6afdd6e9c 100644 --- a/src/third_party/wiredtiger/src/include/gcc.h +++ b/src/third_party/wiredtiger/src/include/gcc.h @@ -6,6 +6,7 @@ * See the file LICENSE for redistribution information. */ +#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ #define WT_SIZET_FMT "zu" /* size_t format string */ /* Add GCC-specific attributes to types and function declarations. */ diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h index f8b17022968..1b64186cbab 100644 --- a/src/third_party/wiredtiger/src/include/lint.h +++ b/src/third_party/wiredtiger/src/include/lint.h @@ -6,6 +6,7 @@ * See the file LICENSE for redistribution information. */ +#define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ #define WT_SIZET_FMT "zu" /* size_t format string */ #define WT_COMPILER_TYPE_ALIGN(x) diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index 7cb3ccc895d..444073087df 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -179,7 +179,7 @@ struct __wt_lsm_tree { int collator_owned; uint32_t refcnt; /* Number of users of the tree */ - uint8_t exclusive; /* Tree is locked exclusively */ + WT_SESSION_IMPL *excl_session; /* Session has exclusive lock */ #define LSM_TREE_MAX_QUEUE 100 uint32_t queue_ref; @@ -215,7 +215,7 @@ struct __wt_lsm_tree { size_t chunk_alloc; /* Space allocated for chunks */ uint32_t nchunks; /* Number of active chunks */ uint32_t last; /* Last allocated ID */ - int modified; /* Have there been updates? */ + bool modified; /* Have there been updates? */ WT_LSM_CHUNK **old_chunks; /* Array of old LSM chunks */ size_t old_alloc; /* Space allocated for old chunks */ @@ -242,13 +242,18 @@ struct __wt_lsm_tree { int64_t lsm_lookup_no_bloom; int64_t lsm_merge_throttle; -#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ -#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x02 /* Timer for merge aggression */ -#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ -#define WT_LSM_TREE_MERGES 0x08 /* Tree should run merges */ -#define WT_LSM_TREE_NEED_SWITCH 0x10 /* New chunk needs creating */ -#define WT_LSM_TREE_OPEN 0x20 /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x40 /* Throttle updates */ + /* + * The tree is open for business. This used to be a flag, but it is + * susceptible to races. + */ + bool active; + +#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x01 /* Timer for merge aggression */ +#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */ +#define WT_LSM_TREE_MERGES 0x04 /* Tree should run merges */ +#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */ +#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 4d3ca758dc7..07d52c61eac 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -198,7 +198,7 @@ /* Check if a string matches a prefix. */ #define WT_PREFIX_MATCH(str, pfx) \ - (((const char *)str)[0] == ((const char *)pfx)[0] && \ + (((const char *)(str))[0] == ((const char *)pfx)[0] && \ strncmp((str), (pfx), strlen(pfx)) == 0) /* Check if a string matches a prefix, and move past it. */ diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h index 99260a44875..d5be5bd8c60 100644 --- a/src/third_party/wiredtiger/src/include/msvc.h +++ b/src/third_party/wiredtiger/src/include/msvc.h @@ -13,6 +13,7 @@ #define inline __inline +#define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ #define WT_SIZET_FMT "Iu" /* size_t format string */ /* diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index f798bfb3ece..04679884930 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -20,6 +20,13 @@ struct __wt_condvar { int waiters; /* Numbers of waiters, or -1 if signalled with no waiters. */ + /* + * The following fields are only used for automatically adjusting + * condition variables. They could be in a separate structure. + */ + uint64_t min_wait; /* Minimum wait duration */ + uint64_t max_wait; /* Maximum wait duration */ + uint64_t prev_wait; /* Wait duration used last time */ }; /* diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index 784a55ef2ae..35b2ddc43db 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -677,8 +677,8 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session, if (fmt[0] != '\0' && fmt[1] == '\0') { pv.type = fmt[0]; - if ((ret = __unpack_read(session, &pv, &p, size)) == 0) - WT_UNPACK_PUT(session, pv, ap); + WT_RET(__unpack_read(session, &pv, &p, size)); + WT_UNPACK_PUT(session, pv, ap); return (0); } diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 8bc6c37b53e..f9170dc1a79 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -299,6 +299,8 @@ struct __wt_connection_stats { int64_t cache_bytes_dirty; int64_t cache_pages_dirty; int64_t cache_eviction_clean; + int64_t cond_auto_wait_reset; + int64_t cond_auto_wait; int64_t file_open; int64_t memory_allocation; int64_t memory_free; @@ -337,6 +339,8 @@ struct __wt_connection_stats { int64_t log_bytes_written; int64_t log_zero_fills; int64_t log_flush; + int64_t log_force_write; + int64_t log_force_write_skip; int64_t log_compress_writes; int64_t log_compress_write_fails; int64_t log_compress_small; @@ -344,6 +348,7 @@ struct __wt_connection_stats { int64_t log_scans; int64_t log_scan_rereads; int64_t log_write_lsn; + int64_t log_write_lsn_skip; int64_t log_sync; int64_t log_sync_dir; int64_t log_writes; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 0c314e0705f..1e263f22880 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -874,7 +874,7 @@ struct __wt_session { * updates). See @ref data_sources for more information. * <br> * @copydoc doc_cursor_types - * @param to_dup a cursor to duplicate + * @param to_dup a cursor to duplicate or gather statistics on * @configstart{WT_SESSION.open_cursor, see dist/api_data.py} * @config{append, append the value as a new record\, creating a new * record number key; valid only for cursors with record number keys., a @@ -3850,187 +3850,197 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_PAGES_DIRTY 1070 /*! cache: unmodified pages evicted */ #define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1071 +/*! connection: auto adjusting condition resets */ +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1072 +/*! connection: auto adjusting condition wait calls */ +#define WT_STAT_CONN_COND_AUTO_WAIT 1073 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1072 +#define WT_STAT_CONN_FILE_OPEN 1074 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1073 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1075 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1074 +#define WT_STAT_CONN_MEMORY_FREE 1076 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1075 +#define WT_STAT_CONN_MEMORY_GROW 1077 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1076 +#define WT_STAT_CONN_COND_WAIT 1078 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1077 +#define WT_STAT_CONN_RWLOCK_READ 1079 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1078 +#define WT_STAT_CONN_RWLOCK_WRITE 1080 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1079 +#define WT_STAT_CONN_READ_IO 1081 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1080 +#define WT_STAT_CONN_WRITE_IO 1082 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1081 +#define WT_STAT_CONN_CURSOR_CREATE 1083 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1082 +#define WT_STAT_CONN_CURSOR_INSERT 1084 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1083 +#define WT_STAT_CONN_CURSOR_NEXT 1085 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1084 +#define WT_STAT_CONN_CURSOR_PREV 1086 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1085 +#define WT_STAT_CONN_CURSOR_REMOVE 1087 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1086 +#define WT_STAT_CONN_CURSOR_RESET 1088 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1087 +#define WT_STAT_CONN_CURSOR_RESTART 1089 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1088 +#define WT_STAT_CONN_CURSOR_SEARCH 1090 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1089 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1091 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1090 +#define WT_STAT_CONN_CURSOR_UPDATE 1092 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1091 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1093 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1092 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1094 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1093 +#define WT_STAT_CONN_DH_SWEEP_REF 1095 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1094 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1096 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1095 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1097 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1096 +#define WT_STAT_CONN_DH_SWEEP_TOD 1098 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1097 +#define WT_STAT_CONN_DH_SWEEPS 1099 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1098 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1100 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1099 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1101 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1100 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1102 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1101 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1103 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1102 +#define WT_STAT_CONN_LOG_SLOT_RACES 1104 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1103 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1105 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1104 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1106 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1105 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1107 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1106 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1108 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1107 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1109 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1108 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1110 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1109 +#define WT_STAT_CONN_LOG_FLUSH 1111 +/*! log: log force write operations */ +#define WT_STAT_CONN_LOG_FORCE_WRITE 1112 +/*! log: log force write operations skipped */ +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1113 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1110 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1114 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1111 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1115 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1112 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1116 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1113 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1117 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1114 +#define WT_STAT_CONN_LOG_SCANS 1118 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1115 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1119 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1116 +#define WT_STAT_CONN_LOG_WRITE_LSN 1120 +/*! log: log server thread write LSN walk skipped */ +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1121 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1117 +#define WT_STAT_CONN_LOG_SYNC 1122 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1118 +#define WT_STAT_CONN_LOG_SYNC_DIR 1123 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1119 +#define WT_STAT_CONN_LOG_WRITES 1124 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1120 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1125 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1121 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1126 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1122 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1127 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1123 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1128 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1124 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1129 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1125 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1130 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1126 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1131 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1127 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1132 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1128 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1133 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1129 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1134 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1130 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1135 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1131 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1136 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1132 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1137 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1133 +#define WT_STAT_CONN_REC_PAGES 1138 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1134 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1139 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1135 +#define WT_STAT_CONN_REC_PAGE_DELETE 1140 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1136 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1141 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1137 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1142 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1138 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1143 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1139 +#define WT_STAT_CONN_SESSION_OPEN 1144 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1140 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1145 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1141 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1146 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1142 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1147 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1143 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1148 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1144 +#define WT_STAT_CONN_PAGE_SLEEP 1149 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1145 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1150 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1146 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1151 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1147 +#define WT_STAT_CONN_TXN_BEGIN 1152 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1148 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1153 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1149 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1154 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1150 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1155 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1151 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1156 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1152 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1157 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1153 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1158 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1154 +#define WT_STAT_CONN_TXN_CHECKPOINT 1159 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1155 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1160 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1156 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1161 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1157 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1162 /*! transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1158 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1163 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1159 +#define WT_STAT_CONN_TXN_SYNC 1164 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1160 +#define WT_STAT_CONN_TXN_COMMIT 1165 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1161 +#define WT_STAT_CONN_TXN_ROLLBACK 1166 /*! * @} diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h index 0db876b56f3..7d97d97dcf5 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h +++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h @@ -268,8 +268,9 @@ struct __wt_extension_api { WT_SESSION *session, const char *key, const char *value); /*! - * Pack a structure into a buffer. - * See ::wiredtiger_struct_pack for details. + * Pack a structure into a buffer. Deprecated in favor of stream + * based pack and unpack API. See WT_EXTENSION_API::pack_start for + * details. * * @param wt_api the extension handle * @param session the session handle @@ -282,8 +283,8 @@ struct __wt_extension_api { void *buffer, size_t size, const char *format, ...); /*! - * Calculate the size required to pack a structure. - * See ::wiredtiger_struct_size for details. + * Calculate the size required to pack a structure. Deprecated in + * favor of stream based pack and unpack API. * * @param wt_api the extension handle * @param session the session handle @@ -296,8 +297,9 @@ struct __wt_extension_api { size_t *sizep, const char *format, ...); /*! - * Unpack a structure from a buffer. - * See ::wiredtiger_struct_unpack for details. + * Unpack a structure from a buffer. Deprecated in favor of stream + * based pack and unpack API. See WT_EXTENSION_API::unpack_start for + * details. * * @param wt_api the extension handle * @param session the session handle @@ -309,6 +311,130 @@ struct __wt_extension_api { int (*struct_unpack)(WT_EXTENSION_API *wt_api, WT_SESSION *session, const void *buffer, size_t size, const char *format, ...); + /* + * Streaming pack/unpack API. + */ + /*! + * Start a packing operation into a buffer. + * See ::wiredtiger_pack_start for details. + * + * @param session the session handle + * @param format the data format, see @ref packing + * @param buffer a pointer to memory to hold the packed data + * @param size the size of the buffer + * @param[out] psp the new packing stream handle + * @errors + */ + int (*pack_start)(WT_EXTENSION_API *wt_api, + WT_SESSION *session, const char *format, + void *buffer, size_t size, WT_PACK_STREAM **psp); + + /*! + * Start an unpacking operation from a buffer. + * See ::wiredtiger_unpack_start for details. + * + * @param session the session handle + * @param format the data format, see @ref packing + * @param buffer a pointer to memory holding the packed data + * @param size the size of the buffer + * @param[out] psp the new packing stream handle + * @errors + */ + int (*unpack_start)(WT_EXTENSION_API *wt_api, + WT_SESSION *session, const char *format, + const void *buffer, size_t size, WT_PACK_STREAM **psp); + + /*! + * Close a packing stream. + * + * @param ps the packing stream handle + * @param[out] usedp the number of bytes in the buffer used by the + * stream + * @errors + */ + int (*pack_close)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, size_t *usedp); + + /*! + * Pack an item into a packing stream. + * + * @param ps the packing stream handle + * @param item an item to pack + * @errors + */ + int (*pack_item)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, WT_ITEM *item); + + /*! + * Pack a signed integer into a packing stream. + * + * @param ps the packing stream handle + * @param i a signed integer to pack + * @errors + */ + int (*pack_int)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, int64_t i); + + /*! + * Pack a string into a packing stream. + * + * @param ps the packing stream handle + * @param s a string to pack + * @errors + */ + int (*pack_str)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, const char *s); + + /*! + * Pack an unsigned integer into a packing stream. + * + * @param ps the packing stream handle + * @param u an unsigned integer to pack + * @errors + */ + int (*pack_uint)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, uint64_t u); + + /*! + * Unpack an item from a packing stream. + * + * @param ps the packing stream handle + * @param item an item to unpack + * @errors + */ + int (*unpack_item)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, WT_ITEM *item); + + /*! + * Unpack a signed integer from a packing stream. + * + * @param ps the packing stream handle + * @param[out] ip the unpacked signed integer + * @errors + */ + int (*unpack_int)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, int64_t *ip); + + /*! + * Unpack a string from a packing stream. + * + * @param ps the packing stream handle + * @param[out] sp the unpacked string + * @errors + */ + int (*unpack_str)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, const char **sp); + + /*! + * Unpack an unsigned integer from a packing stream. + * + * @param ps the packing stream handle + * @param[out] up the unpacked unsigned integer + * @errors + */ + int (*unpack_uint)(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, uint64_t *up); + /*! * Return the current transaction ID. * diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 03145d8408c..e41073299a8 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -29,7 +29,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) log = conn->log; log->ckpt_lsn = *ckp_lsn; if (conn->log_cond != NULL) - WT_RET(__wt_cond_signal(session, conn->log_cond)); + WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); } @@ -46,7 +46,7 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) conn = S2C(session); log = conn->log; - WT_RET(__wt_log_force_write(session, 1)); + WT_RET(__wt_log_force_write(session, 1, NULL)); WT_RET(__wt_log_wrlsn(session, NULL)); if (start) *lsn = log->write_start_lsn; @@ -118,9 +118,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) */ if (log->sync_dir_lsn.l.file < min_lsn->l.file) { WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_force_sync: sync directory %s to LSN %d/%lu", - log->log_dir_fh->name, - min_lsn->l.file, min_lsn->l.offset)); + "log_force_sync: sync directory %s to LSN %" PRIu32 + "/%" PRIu32, + log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset)); WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh)); log->sync_dir_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync_dir); @@ -130,7 +130,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) */ if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) { WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_force_sync: sync %s to LSN %d/%lu", + "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, log->log_fh->name, min_lsn->l.file, min_lsn->l.offset)); WT_ERR(__wt_fsync(session, log->log_fh)); log->sync_lsn = *min_lsn; @@ -273,7 +273,7 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, * These may be files needed by backup. Force the current slot * to get written to the file. */ - WT_RET(__wt_log_force_write(session, 1)); + WT_RET(__wt_log_force_write(session, 1, NULL)); WT_RET(__log_get_files(session, WT_LOG_FILENAME, &files, &count)); /* Filter out any files that are below the checkpoint LSN. */ @@ -697,7 +697,7 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR_MSG(session, WT_ERROR, "unsupported WiredTiger file version: this build " " only supports major/minor versions up to %d/%d, " - " and the file is version %d/%d", + " and the file is version %" PRIu16 "/%" PRIu16, WT_LOG_MAJOR_VERSION, WT_LOG_MINOR_VERSION, desc->majorv, desc->minorv); } @@ -824,7 +824,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) if (create_log) { WT_STAT_FAST_CONN_INCR(session, log_prealloc_missed); if (conn->log_cond != NULL) - WT_RET(__wt_cond_signal( + WT_RET(__wt_cond_auto_signal( session, conn->log_cond)); } } @@ -1129,7 +1129,8 @@ __wt_log_open(WT_SESSION_IMPL *session) } log->fileid = lastlog; WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_open: first log %d last log %d", firstlog, lastlog)); + "log_open: first log %" PRIu32 " last log %" PRIu32, + firstlog, lastlog)); if (firstlog == UINT32_MAX) { WT_ASSERT(session, logcount == 0); WT_INIT_LSN(&log->first_lsn); @@ -1251,10 +1252,8 @@ __log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, bool *hole) } } -err: if (buf != NULL) - __wt_free(session, buf); - if (zerobuf != NULL) - __wt_free(session, zerobuf); +err: __wt_free(session, buf); + __wt_free(session, zerobuf); return (ret); } @@ -1338,7 +1337,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); - WT_ERR(__wt_cond_signal(session, conn->log_wrlsn_cond)); + WT_ERR(__wt_cond_auto_signal(session, conn->log_wrlsn_cond)); if (++yield_count < WT_THOUSAND) __wt_yield(); else @@ -1395,7 +1394,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) (log->sync_dir_lsn.l.file < sync_lsn.l.file)) { WT_ASSERT(session, log->log_dir_fh != NULL); WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_release: sync directory %s to LSN %u/%lu", + "log_release: sync directory %s to LSN %" PRIu32 + "/%" PRIu32, log->log_dir_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); WT_ERR(__wt_directory_sync_fh( @@ -1410,7 +1410,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) if (F_ISSET(slot, WT_SLOT_SYNC) && __wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) { WT_ERR(__wt_verbose(session, WT_VERB_LOG, - "log_release: sync log %s to LSN %u/%lu", + "log_release: sync log %s to LSN %" PRIu32 + "/%" PRIu32, log->log_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); WT_STAT_FAST_CONN_INCR(session, log_sync); @@ -1477,7 +1478,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, if (LF_ISSET(WT_LOGSCAN_RECOVER)) WT_RET(__wt_verbose(session, WT_VERB_LOG, - "__wt_log_scan truncating to %u/%u", + "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32, log->trunc_lsn.l.file, log->trunc_lsn.l.offset)); if (log != NULL) { @@ -1758,14 +1759,25 @@ err: WT_STAT_FAST_CONN_INCR(session, log_scans); * Wrapper function that takes the lock. */ int -__wt_log_force_write(WT_SESSION_IMPL *session, bool retry) +__wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) { WT_LOG *log; WT_MYSLOT myslot; + uint32_t joined; log = S2C(session)->log; memset(&myslot, 0, sizeof(myslot)); + WT_STAT_FAST_CONN_INCR(session, log_force_write); + if (did_work != NULL) + *did_work = true; myslot.slot = log->active_slot; + joined = WT_LOG_SLOT_JOINED(log->active_slot->slot_state); + if (joined == 0) { + WT_STAT_FAST_CONN_INCR(session, log_force_write_skip); + if (did_work != NULL) + *did_work = false; + return (0); + } return (__wt_log_slot_switch(session, &myslot, retry, true)); } @@ -1998,10 +2010,10 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, * XXX I've seen times when conditions are NULL. */ if (conn->log_cond != NULL) { - WT_ERR(__wt_cond_signal(session, conn->log_cond)); + WT_ERR(__wt_cond_auto_signal(session, conn->log_cond)); __wt_yield(); } else - WT_ERR(__wt_log_force_write(session, 1)); + WT_ERR(__wt_log_force_write(session, 1, NULL)); } if (LF_ISSET(WT_LOG_FLUSH)) { /* Wait for our writes to reach the OS */ @@ -2128,7 +2140,7 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) WT_RET(__wt_log_flush_lsn(session, &lsn, false)); WT_RET(__wt_verbose(session, WT_VERB_LOG, - "log_flush: flags %d LSN %u/%lu", + "log_flush: flags %#" PRIx32 " LSN %" PRIu32 "/%" PRIu32, flags, lsn.l.file, lsn.l.offset)); /* * If the user wants write-no-sync, there is nothing more to do. diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 2844516e78f..570d1c9ce48 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -253,7 +253,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* * If we didn't find any free slots signal the worker thread. */ - (void)__wt_cond_signal(session, conn->log_wrlsn_cond); + (void)__wt_cond_auto_signal(session, conn->log_wrlsn_cond); __wt_yield(); } /* NOTREACHED */ diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 0197b6481f4..e023b2b407e 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -1556,7 +1556,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_ERR(ret); /* Make sure we have exclusive access if and only if we want it */ - WT_ASSERT(session, !bulk || lsm_tree->exclusive); + WT_ASSERT(session, !bulk || lsm_tree->excl_session != NULL); WT_ERR(__wt_calloc_one(session, &clsm)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index cf581475d2c..943a5894ab3 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -390,7 +390,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); dhandle_locked = true; TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) continue; WT_ERR(__wt_epoch(session, &now)); pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 : @@ -433,8 +433,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); WT_ERR(__wt_verbose(session, WT_VERB_LSM_MANAGER, - "MGR %s: queue %d mod %d nchunks %d" - " flags 0x%x aggressive %d pushms %" PRIu64 + "MGR %s: queue %" PRIu32 " mod %d " + "nchunks %" PRIu32 + " flags %#" PRIx32 " aggressive %" PRIu32 + " pushms %" PRIu64 " fillms %" PRIu64, lsm_tree->name, lsm_tree->queue_ref, lsm_tree->modified, lsm_tree->nchunks, @@ -648,7 +650,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, * is checked. */ (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + if (!lsm_tree->active) { (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1); return (0); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index 29325066da7..6d907284546 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -60,10 +60,11 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { struct timespec now; uint64_t msec_since_last_merge, msec_to_create_merge; - u_int new_aggressive; + uint32_t new_aggressive; new_aggressive = 0; + WT_ASSERT(session, lsm_tree->merge_min != 0); /* * If the tree is open read-only or we are compacting, be very * aggressive. Otherwise, we can spend a long time waiting for merges @@ -124,8 +125,9 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (new_aggressive > lsm_tree->merge_aggressiveness) { WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge %s got aggressive (old %u new %u), " - "merge_min %d, %u / %" PRIu64, + "LSM merge %s got aggressive " + "(old %" PRIu32 " new %" PRIu32 "), " + "merge_min %u, %" PRIu64 " / %" PRIu64, lsm_tree->name, lsm_tree->merge_aggressiveness, new_aggressive, lsm_tree->merge_min, msec_since_last_merge, lsm_tree->chunk_fill_ms)); @@ -410,7 +412,8 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 + "Merging %s: Chunk[%u] id %" PRIu32 + ", gen: %" PRIu32 ", size: %" PRIu64 ", records: %" PRIu64, lsm_tree->name, verb, lsm_tree->chunk[verb]->id, lsm_tree->chunk[verb]->generation, @@ -460,7 +463,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) #define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) WT_ERR(EINTR); WT_STAT_FAST_CONN_INCRV(session, diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c index d76b2a48aa7..e19e2cd0126 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c @@ -9,17 +9,17 @@ #include "wt_internal.h" /* - * __wt_lsm_meta_read -- - * Read the metadata for an LSM tree. + * __lsm_meta_read_v0 -- + * Read v0 of LSM metadata. */ -int -__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +static int +__lsm_meta_read_v0( + WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf) { WT_CONFIG cparser, lparser; WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata; WT_DECL_RET; WT_LSM_CHUNK *chunk; - char *lsmconfig; u_int nchunks; chunk = NULL; /* -Wconditional-uninitialized */ @@ -28,8 +28,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) F_SET(lsm_tree, WT_LSM_TREE_MERGES); - WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig)); - WT_ERR(__wt_config_init(session, &cparser, lsmconfig)); + WT_ERR(__wt_config_init(session, &cparser, lsmconf)); while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) { if (WT_STRING_MATCH("key_format", ck.str, ck.len)) { __wt_free(session, lsm_tree->key_format); @@ -48,7 +47,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * from the file configuration. */ WT_ERR(__wt_config_getones( - session, lsmconfig, "file_config", &fileconf)); + session, lsmconf, "file_config", &fileconf)); WT_CLEAR(metadata); WT_ERR_NOTFOUND_OK(__wt_config_subgets( session, &fileconf, "app_metadata", &metadata)); @@ -160,16 +159,292 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) */ } WT_ERR_NOTFOUND_OK(ret); +err: return (ret); +} + +/* + * __lsm_meta_read_v1 -- + * Read v1 of LSM metadata. + */ +static int +__lsm_meta_read_v1( + WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *lsmconf) +{ + WT_CONFIG lparser; + WT_CONFIG_ITEM cv, lk, lv, metadata; + WT_DECL_ITEM(buf); + WT_DECL_RET; + WT_LSM_CHUNK *chunk; + const char *file_cfg[] = { + WT_CONFIG_BASE(session, file_config), NULL, NULL, NULL }; + char *fileconf; + u_int nchunks; + + chunk = NULL; /* -Wconditional-uninitialized */ + + WT_ERR(__wt_config_getones(session, lsmconf, "key_format", &cv)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format)); + WT_ERR(__wt_config_getones(session, lsmconf, "value_format", &cv)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format)); + + WT_ERR(__wt_config_getones(session, lsmconf, "collator", &cv)); + if (cv.len != 0 && !WT_STRING_MATCH("none", cv.str, cv.len)) { + /* Extract the application-supplied metadata (if any). */ + WT_CLEAR(metadata); + WT_ERR_NOTFOUND_OK(__wt_config_getones( + session, lsmconf, "app_metadata", &metadata)); + WT_ERR(__wt_collator_config(session, lsm_tree->name, + &cv, &metadata, + &lsm_tree->collator, &lsm_tree->collator_owned)); + WT_ERR(__wt_strndup(session, + cv.str, cv.len, &lsm_tree->collator_name)); + } + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.auto_throttle", &cv)); + if (cv.val) + F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); + else + F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom", &cv)); + FLD_SET(lsm_tree->bloom, + (cv.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_oldest", &cv)); + if (cv.val != 0) + FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); + + if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) + WT_ERR_MSG(session, EINVAL, + "Bloom filters can only be created on newest and oldest " + "chunks if bloom filters are enabled"); + + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.bloom_bit_count", &cv)); + lsm_tree->bloom_bit_count = (uint32_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.bloom_config", &cv)); + /* Don't include the brackets. */ + if (cv.type == WT_CONFIG_ITEM_STRUCT) { + cv.str++; + cv.len -= 2; + } + WT_ERR(__wt_config_check(session, + WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); + WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config)); + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.bloom_hash_count", &cv)); + lsm_tree->bloom_hash_count = (uint32_t)cv.val; + + WT_ERR(__wt_config_getones( + session, lsmconf, "lsm.chunk_count_limit", &cv)); + lsm_tree->chunk_count_limit = (uint32_t)cv.val; + if (cv.val == 0) + F_SET(lsm_tree, WT_LSM_TREE_MERGES); + else + F_CLR(lsm_tree, WT_LSM_TREE_MERGES); + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_max", &cv)); + lsm_tree->chunk_max = (uint64_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.chunk_size", &cv)); + lsm_tree->chunk_size = (uint64_t)cv.val; + + if (lsm_tree->chunk_size > lsm_tree->chunk_max) + WT_ERR_MSG(session, EINVAL, + "Chunk size (chunk_size) must be smaller than or equal to " + "the maximum chunk size (chunk_max)"); + + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_max", &cv)); + lsm_tree->merge_max = (uint32_t)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "lsm.merge_min", &cv)); + lsm_tree->merge_min = (uint32_t)cv.val; + + if (lsm_tree->merge_min > lsm_tree->merge_max) + WT_ERR_MSG(session, EINVAL, + "LSM merge_min must be less than or equal to merge_max"); + + WT_ERR(__wt_config_getones(session, lsmconf, "last", &cv)); + lsm_tree->last = (u_int)cv.val; + WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv)); + WT_ERR(__wt_config_subinit(session, &lparser, &cv)); + for (nchunks = 0; (ret = + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { + if (WT_STRING_MATCH("id", lk.str, lk.len)) { + WT_ERR(__wt_realloc_def(session, + &lsm_tree->chunk_alloc, + nchunks + 1, &lsm_tree->chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); + lsm_tree->chunk[nchunks++] = chunk; + chunk->id = (uint32_t)lv.val; + WT_ERR(__wt_lsm_tree_chunk_name(session, + lsm_tree, chunk->id, &chunk->uri)); + F_SET(chunk, + WT_LSM_CHUNK_ONDISK | + WT_LSM_CHUNK_STABLE); + } else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { + WT_ERR(__wt_lsm_tree_bloom_name( + session, lsm_tree, chunk->id, &chunk->bloom_uri)); + F_SET(chunk, WT_LSM_CHUNK_BLOOM); + continue; + } else if (WT_STRING_MATCH("chunk_size", lk.str, lk.len)) { + chunk->size = (uint64_t)lv.val; + continue; + } else if (WT_STRING_MATCH("count", lk.str, lk.len)) { + chunk->count = (uint64_t)lv.val; + continue; + } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) { + chunk->generation = (uint32_t)lv.val; + continue; + } + } + WT_ERR_NOTFOUND_OK(ret); + lsm_tree->nchunks = nchunks; + + WT_ERR(__wt_config_getones(session, lsmconf, "old_chunks", &cv)); + WT_ERR(__wt_config_subinit(session, &lparser, &cv)); + for (nchunks = 0; (ret = + __wt_config_next(&lparser, &lk, &lv)) == 0; ) { + if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { + WT_ERR(__wt_strndup(session, + lv.str, lv.len, &chunk->bloom_uri)); + F_SET(chunk, WT_LSM_CHUNK_BLOOM); + continue; + } + WT_ERR(__wt_realloc_def(session, + &lsm_tree->old_alloc, nchunks + 1, + &lsm_tree->old_chunks)); + WT_ERR(__wt_calloc_one(session, &chunk)); + lsm_tree->old_chunks[nchunks++] = chunk; + WT_ERR(__wt_strndup(session, + lk.str, lk.len, &chunk->uri)); + F_SET(chunk, WT_LSM_CHUNK_ONDISK); + } + WT_ERR_NOTFOUND_OK(ret); + lsm_tree->nold_chunks = nchunks; + + /* + * Set up the config for each chunk. + * + * Make the memory_page_max double the chunk size, so application + * threads don't immediately try to force evict the chunk when the + * worker thread clears the NO_EVICTION flag. + */ + file_cfg[1] = lsmconf; + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_buf_fmt(session, buf, + "key_format=u,value_format=u,memory_page_max=%" PRIu64, + 2 * lsm_tree->chunk_max)); + file_cfg[2] = buf->data; + WT_ERR(__wt_config_collapse(session, file_cfg, &fileconf)); + lsm_tree->file_config = fileconf; + + /* + * Ignore any other values: the metadata entry might have been + * created by a future release, with unknown options. + */ +err: __wt_scr_free(session, &buf); + return (ret); +} + +/* + * __lsm_meta_upgrade_v1 -- + * Upgrade to v1 of LSM metadata. + */ +static int +__lsm_meta_upgrade_v1(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + WT_DECL_ITEM(buf); + WT_DECL_RET; + const char *new_cfg[] = { + WT_CONFIG_BASE(session, lsm_meta), NULL, NULL, NULL }; + + /* Include the custom config that used to be embedded in file_config. */ + new_cfg[1] = lsm_tree->file_config; + + WT_ERR(__wt_scr_alloc(session, 0, &buf)); + WT_ERR(__wt_buf_fmt(session, buf, + "key_format=%s,value_format=%s", + lsm_tree->key_format, lsm_tree->value_format)); + + WT_ERR(__wt_buf_catfmt(session, buf, ",collator=%s", + lsm_tree->collator_name != NULL ? lsm_tree->collator_name : "")); + + WT_ERR(__wt_buf_catfmt(session, buf, ",lsm=(")); + + WT_ERR(__wt_buf_catfmt(session, buf, "auto_throttle=%d", + F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE))); + + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom=%d", + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_MERGED))); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_oldest=%d", + FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_bit_count=%" PRIu32, + lsm_tree->bloom_bit_count)); + if (lsm_tree->bloom_config != NULL && + strlen(lsm_tree->bloom_config) > 0) + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=(%s)", + lsm_tree->bloom_config)); + else + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_config=")); + WT_ERR(__wt_buf_catfmt(session, buf, ",bloom_hash_count=%" PRIu32, + lsm_tree->bloom_hash_count)); + + WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_count_limit=%" PRIu32, + lsm_tree->chunk_count_limit)); + WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_max=%" PRIu64, + lsm_tree->chunk_max)); + WT_ERR(__wt_buf_catfmt(session, buf, ",merge_max=%" PRIu32, + lsm_tree->merge_max)); + WT_ERR(__wt_buf_catfmt(session, buf, ",merge_min=%" PRIu32, + lsm_tree->merge_min)); + + WT_ERR(__wt_buf_catfmt(session, buf, ")")); + + new_cfg[2] = buf->data; + WT_ERR(__wt_config_merge(session, new_cfg, NULL, &lsm_tree->config)); + +err: __wt_scr_free(session, &buf); + return (ret); +} +/* + * __wt_lsm_meta_read -- + * Read the metadata for an LSM tree. + */ +int +__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + WT_CONFIG_ITEM cval; + WT_DECL_RET; + char *lsmconf; + bool upgrade; + + /* LSM trees inherit the merge setting from the connection. */ + if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) + F_SET(lsm_tree, WT_LSM_TREE_MERGES); + + WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconf)); + upgrade = false; + ret = __wt_config_getones(session, lsmconf, "file_config", &cval); + if (ret == 0) { + ret = __lsm_meta_read_v0(session, lsm_tree, lsmconf); + __wt_free(session, lsmconf); + WT_RET(ret); + upgrade = true; + } else if (ret == WT_NOTFOUND) { + lsm_tree->config = lsmconf; + ret = 0; + WT_RET(__lsm_meta_read_v1(session, lsm_tree, lsmconf)); + } /* - * If the default merge_min was not overridden, calculate it now. We - * do this here so that trees created before merge_min was added get a - * sane value. + * If the default merge_min was not overridden, calculate it now. */ if (lsm_tree->merge_min < 2) lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2); - -err: __wt_free(session, lsmconfig); + /* + * If needed, upgrade the configuration. We need to do this after + * we have fixed the merge_min value. + */ + if (upgrade) + WT_RET(__lsm_meta_upgrade_v1(session, lsm_tree)); return (ret); } @@ -184,32 +459,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_DECL_RET; WT_LSM_CHUNK *chunk; u_int i; + const char *new_cfg[] = { NULL, NULL, NULL }; + char *new_metadata; bool first; + new_metadata = NULL; + WT_RET(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_buf_fmt(session, buf, - "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)", - lsm_tree->key_format, lsm_tree->value_format, - lsm_tree->bloom_config, lsm_tree->file_config)); - if (lsm_tree->collator_name != NULL) - WT_ERR(__wt_buf_catfmt( - session, buf, ",collator=%s", lsm_tree->collator_name)); WT_ERR(__wt_buf_catfmt(session, buf, - ",last=%" PRIu32 - ",chunk_count_limit=%" PRIu32 - ",chunk_max=%" PRIu64 - ",chunk_size=%" PRIu64 - ",auto_throttle=%" PRIu32 - ",merge_max=%" PRIu32 - ",merge_min=%" PRIu32 - ",bloom=%" PRIu32 - ",bloom_bit_count=%" PRIu32 - ",bloom_hash_count=%" PRIu32, - lsm_tree->last, lsm_tree->chunk_count_limit, - lsm_tree->chunk_max, lsm_tree->chunk_size, - F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0, - lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom, - lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count)); + ",last=%" PRIu32, lsm_tree->last)); WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=[")); for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; @@ -243,9 +501,15 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) session, buf, ",bloom=\"%s\"", chunk->bloom_uri)); } WT_ERR(__wt_buf_catfmt(session, buf, "]")); - ret = __wt_metadata_update(session, lsm_tree->name, buf->data); + + /* Update the existing configuration with the new values. */ + new_cfg[0] = lsm_tree->config; + new_cfg[1] = buf->data; + WT_ERR(__wt_config_collapse(session, new_cfg, &new_metadata)); + ret = __wt_metadata_update(session, lsm_tree->name, new_metadata); WT_ERR(ret); err: __wt_scr_free(session, &buf); + __wt_free(session, new_metadata); return (ret); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index 7c188bf3dc7..cb1ddf22f84 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -27,6 +27,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) WT_UNUSED(final); /* Only used in diagnostic builds */ + WT_ASSERT(session, !lsm_tree->active); /* * The work unit queue should be empty, but it's worth checking * since work units use a different locking scheme to regular tree @@ -85,19 +86,27 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) * Close an LSM tree structure. */ static int -__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) { WT_DECL_RET; int i; - /* Stop any active merges. */ - F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE); + /* + * Stop any new work units being added. The barrier is necessary + * because we rely on the state change being visible before checking + * the tree queue state. + */ + lsm_tree->active = false; + WT_READ_BARRIER(); /* - * Wait for all LSM operations and work units that were in flight to - * finish. + * Wait for all LSM operations to drain. If WiredTiger is shutting + * down also wait for the tree reference count to go to zero, otherwise + * we know a user is holding a reference to the tree, so exclusive + * access is not available. */ - for (i = 0; lsm_tree->refcnt > 1 || lsm_tree->queue_ref > 0; ++i) { + for (i = 0; + lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1); ++i) { /* * Remove any work units from the manager queues. Do this step * repeatedly in case a work unit was in the process of being @@ -114,11 +123,14 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (i % WT_THOUSAND == 0) { WT_WITHOUT_LOCKS(session, ret = __wt_lsm_manager_clear_tree(session, lsm_tree)); - WT_RET(ret); + WT_ERR(ret); } __wt_yield(); } return (0); + +err: lsm_tree->active = true; + return (ret); } /* @@ -142,7 +154,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) * is unconditional. */ (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); - WT_TRET(__lsm_tree_close(session, lsm_tree)); + WT_TRET(__lsm_tree_close(session, lsm_tree, true)); WT_TRET(__lsm_tree_discard(session, lsm_tree, true)); } @@ -157,9 +169,12 @@ static int __lsm_tree_set_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *uri) { - if (lsm_tree->name != NULL) - __wt_free(session, lsm_tree->name); - WT_RET(__wt_strdup(session, uri, &lsm_tree->name)); + void *p; + + WT_RET(__wt_strdup(session, uri, &p)); + + __wt_free(session, lsm_tree->name); + lsm_tree->name = p; lsm_tree->filename = lsm_tree->name + strlen("lsm:"); return (0); } @@ -306,15 +321,15 @@ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) { - WT_CONFIG_ITEM cval; - WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = - { WT_CONFIG_BASE(session, WT_SESSION_create), config, NULL }; - char *tmpconfig; + { WT_CONFIG_BASE(session, lsm_meta), config, NULL }; + const char *metadata; - /* If the tree is open, it already exists. */ + metadata = NULL; + + /* If the tree can be opened, it already exists. */ WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); if (ret == 0) { @@ -323,128 +338,9 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, } WT_RET_NOTFOUND_OK(ret); - /* - * If the tree has metadata, it already exists. - * - * !!! - * Use a local variable: we don't care what the existing configuration - * is, but we don't want to overwrite the real config. - */ - if (__wt_metadata_search(session, uri, &tmpconfig) == 0) { - __wt_free(session, tmpconfig); - return (exclusive ? EEXIST : 0); - } - WT_RET_NOTFOUND_OK(ret); - - /* In-memory configurations don't make sense for LSM. */ - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - WT_RET_MSG(session, EINVAL, - "LSM trees not supported by in-memory configurations"); - - WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); - if (WT_STRING_MATCH("r", cval.str, cval.len)) - WT_RET_MSG(session, EINVAL, - "LSM trees cannot be configured as column stores"); - - WT_RET(__wt_calloc_one(session, &lsm_tree)); - - WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); - - WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->key_format)); - WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->value_format)); - - WT_ERR(__wt_config_gets_none(session, cfg, "collator", &cval)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->collator_name)); - - WT_ERR(__wt_config_gets(session, cfg, "cache_resident", &cval)); - if (cval.val != 0) - WT_ERR_MSG(session, EINVAL, - "The cache_resident flag is not compatible with LSM"); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval)); - if (cval.val) - F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); - else - F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval)); - FLD_SET(lsm_tree->bloom, - (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval)); - if (cval.val != 0) - FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); - - if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && - FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) - WT_ERR_MSG(session, EINVAL, - "Bloom filters can only be created on newest and oldest " - "chunks if bloom filters are enabled"); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval)); - if (cval.type == WT_CONFIG_ITEM_STRUCT) { - cval.str++; - cval.len -= 2; - } - WT_ERR(__wt_config_check(session, - WT_CONFIG_REF(session, WT_SESSION_create), cval.str, cval.len)); - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &lsm_tree->bloom_config)); - - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval)); - lsm_tree->bloom_bit_count = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval)); - lsm_tree->bloom_hash_count = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_count_limit", &cval)); - lsm_tree->chunk_count_limit = (uint32_t)cval.val; - if (cval.val == 0) - F_SET(lsm_tree, WT_LSM_TREE_MERGES); - else - F_CLR(lsm_tree, WT_LSM_TREE_MERGES); - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval)); - lsm_tree->chunk_max = (uint64_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval)); - lsm_tree->chunk_size = (uint64_t)cval.val; - if (lsm_tree->chunk_size > lsm_tree->chunk_max) - WT_ERR_MSG(session, EINVAL, - "Chunk size (chunk_size) must be smaller than or equal to " - "the maximum chunk size (chunk_max)"); - WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval)); - lsm_tree->merge_max = (uint32_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval)); - lsm_tree->merge_min = (uint32_t)cval.val; - if (lsm_tree->merge_min > lsm_tree->merge_max) - WT_ERR_MSG(session, EINVAL, - "LSM merge_min must be less than or equal to merge_max"); - if (!F_ISSET(S2C(session), WT_CONN_READONLY)) { - /* - * Set up the config for each chunk. - * - * Make the memory_page_max double the chunk size, so - * application threads don't immediately try to force evict - * the chunk when the worker thread clears the NO_EVICTION flag. - */ - WT_ERR(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_buf_fmt(session, buf, - "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64, - config, 2 * lsm_tree->chunk_max)); - WT_ERR(__wt_strndup( - session, buf->data, buf->size, &lsm_tree->file_config)); - - /* Create the first chunk and flush the metadata. */ - WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); - - /* Discard our partially populated handle. */ - ret = __lsm_tree_discard(session, lsm_tree, false); - lsm_tree = NULL; - } else { - F_CLR(lsm_tree, WT_LSM_TREE_MERGES); - FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OFF); - FLD_CLR(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); + WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata)); + WT_ERR(__wt_metadata_insert(session, uri, metadata)); } /* @@ -452,16 +348,12 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ - if (ret == 0) - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __lsm_tree_open(session, uri, true, &lsm_tree)); + WT_WITH_HANDLE_LIST_LOCK(session, + ret = __lsm_tree_open(session, uri, true, &lsm_tree)); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); - if (0) { -err: WT_TRET(__lsm_tree_discard(session, lsm_tree, false)); - } - __wt_scr_free(session, &buf); +err: __wt_free(session, metadata); return (ret); } @@ -483,27 +375,26 @@ __lsm_tree_find(WT_SESSION_IMPL *session, /* See if the tree is already open. */ TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) if (strcmp(uri, lsm_tree->name) == 0) { - /* - * Short circuit if the handle is already held - * exclusively or exclusive access is requested and - * there are references held. - */ - if ((exclusive && lsm_tree->refcnt > 0) || - lsm_tree->exclusive) - return (EBUSY); - if (exclusive) { /* * Make sure we win the race to switch on the * exclusive flag. */ - if (!__wt_atomic_cas8( - &lsm_tree->exclusive, 0, 1)) + if (!__wt_atomic_cas_ptr( + &lsm_tree->excl_session, NULL, session)) return (EBUSY); - /* Make sure there are no readers */ - if (!__wt_atomic_cas32( - &lsm_tree->refcnt, 0, 1)) { - lsm_tree->exclusive = 0; + + /* + * Drain the work queue before checking for + * open cursors - otherwise we can generate + * spurious busy returns. + */ + (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); + if (__lsm_tree_close( + session, lsm_tree, false) != 0 || + lsm_tree->refcnt != 1) { + __wt_lsm_tree_release( + session, lsm_tree); return (EBUSY); } } else { @@ -513,11 +404,11 @@ __lsm_tree_find(WT_SESSION_IMPL *session, * We got a reference, check if an exclusive * lock beat us to it. */ - if (lsm_tree->exclusive) { + if (lsm_tree->excl_session != NULL) { WT_ASSERT(session, lsm_tree->refcnt > 0); - (void)__wt_atomic_sub32( - &lsm_tree->refcnt, 1); + __wt_lsm_tree_release( + session, lsm_tree); return (EBUSY); } } @@ -609,7 +500,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, * with getting handles exclusive. */ lsm_tree->refcnt = 1; - lsm_tree->exclusive = exclusive ? 1 : 0; + lsm_tree->excl_session = exclusive ? session : NULL; lsm_tree->queue_ref = 0; /* Set a flush timestamp as a baseline. */ @@ -617,7 +508,9 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Now the tree is setup, make it visible to others. */ TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q); - F_SET(lsm_tree, WT_LSM_TREE_ACTIVE | WT_LSM_TREE_OPEN); + if (!exclusive) + lsm_tree->active = true; + F_SET(lsm_tree, WT_LSM_TREE_OPEN); *treep = lsm_tree; @@ -644,7 +537,7 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session, ret = __lsm_tree_open(session, uri, exclusive, treep); WT_ASSERT(session, ret != 0 || - (exclusive ? 1 : 0) == (*treep)->exclusive); + (*treep)->excl_session == (exclusive ? session : NULL)); return (ret); } @@ -656,8 +549,11 @@ void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_ASSERT(session, lsm_tree->refcnt > 0); - if (lsm_tree->exclusive) - lsm_tree->exclusive = 0; + if (lsm_tree->excl_session == session) { + /* We cleared the active flag when getting exclusive access. */ + lsm_tree->active = true; + lsm_tree->excl_session = NULL; + } (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1); } @@ -874,7 +770,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) F_CLR(lsm_tree, WT_LSM_TREE_NEED_SWITCH); ++lsm_tree->dsk_gen; - lsm_tree->modified = 1; + lsm_tree->modified = true; /* * Set the switch transaction in the previous chunk unless this is @@ -970,9 +866,7 @@ __wt_lsm_tree_drop( WT_WITH_HANDLE_LIST_LOCK(session, ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_RET(ret); - - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); + WT_ASSERT(session, !lsm_tree->active); /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); @@ -1001,6 +895,7 @@ __wt_lsm_tree_drop( WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree)); ret = __wt_metadata_remove(session, name); + WT_ASSERT(session, !lsm_tree->active); err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); WT_WITH_HANDLE_LIST_LOCK(session, @@ -1033,9 +928,6 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree)); WT_RET(ret); - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); - /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; @@ -1073,8 +965,8 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, err: if (locked) WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree)); - if (old != NULL) - __wt_free(session, old); + __wt_free(session, old); + /* * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. @@ -1108,9 +1000,6 @@ __wt_lsm_tree_truncate( ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_RET(ret); - /* Shut down the LSM worker. */ - WT_ERR(__lsm_tree_close(session, lsm_tree)); - /* Prevent any new opens. */ WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree)); locked = true; @@ -1314,8 +1203,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact force flush %s flags 0x%" PRIx32 - " chunk %u flags 0x%" - PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); + " chunk %" PRIu32 " flags 0x%" PRIx32, + name, lsm_tree->flags, chunk->id, chunk->flags)); flushing = true; /* * Make sure the in-memory chunk gets flushed do not push a @@ -1337,7 +1226,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) } /* Wait for the work unit queues to drain. */ - while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + while (lsm_tree->active) { /* * The flush flag is cleared when the chunk has been flushed. * Continue to push forced flushes until the chunk is on disk. @@ -1348,7 +1237,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush done %s chunk %u. " + "Compact flush done %s chunk %" PRIu32 ". " "Start compacting progress %" PRIu64, name, chunk->id, lsm_tree->merge_progressing)); @@ -1359,7 +1248,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) progress = lsm_tree->merge_progressing; } else { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush retry %s chunk %u", + "Compact flush retry %s chunk %" PRIu32, name, chunk->id)); WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, @@ -1419,7 +1308,6 @@ err: __wt_lsm_tree_release(session, lsm_tree); return (ret); - } /* diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index 7723818f607..87771e2cb6c 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -29,7 +29,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session, cookie->nchunks = 0; WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Take a copy of the current state of the LSM tree. */ @@ -72,14 +72,14 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_LSM_CHUNK *chunk, *evict_chunk, *flush_chunk; - u_int i; + uint32_t i; *chunkp = NULL; chunk = evict_chunk = flush_chunk = NULL; WT_ASSERT(session, lsm_tree->queue_ref > 0); WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE) || lsm_tree->nchunks == 0) + if (!lsm_tree->active || lsm_tree->nchunks == 0) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Search for a chunk to evict and/or a chunk to flush. */ @@ -118,7 +118,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s: return chunk %u of %u: %s", + "Flush%s: return chunk %" PRIu32 " of %" PRIu32 ": %s", force ? " w/ force" : "", i, lsm_tree->nchunks, chunk->uri)); @@ -322,7 +322,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, */ saved_isolation = session->txn.isolation; session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES); + ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; WT_TRET(__wt_session_release_btree(session)); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c index 7562cb1cae3..0874da8db13 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c @@ -20,7 +20,7 @@ int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER, - "Start LSM worker %d type 0x%x", args->id, args->type)); + "Start LSM worker %u type %#" PRIx32, args->id, args->type)); return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); } @@ -59,9 +59,8 @@ __lsm_worker_general_op( */ if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s chunk %d %s", - force ? " w/ force" : "", - chunk->id, chunk->uri)); + "Flush%s chunk %" PRIu32 " %s", + force ? " w/ force" : "", chunk->id, chunk->uri)); ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); WT_ASSERT(session, chunk->refcnt > 0); @@ -140,7 +139,7 @@ __lsm_worker(void *arg) if (ret == WT_NOTFOUND) { F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING); ret = 0; - } else if (ret == EBUSY) + } else if (ret == EBUSY || ret == EINTR) ret = 0; /* Paranoia: clear session state. */ @@ -164,7 +163,7 @@ __lsm_worker(void *arg) if (ret != 0) { err: __wt_lsm_manager_free_work_unit(session, entry); WT_PANIC_MSG(session, ret, - "Error in LSM worker thread %d", cookie->id); + "Error in LSM worker thread %u", cookie->id); } return (WT_THREAD_RET_VALUE); } diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index df4cd2cb4d6..0a864432daf 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -212,8 +212,7 @@ __ckpt_last_name( if (found && a.val < found) continue; - if (*namep != NULL) - __wt_free(session, *namep); + __wt_free(session, *namep); WT_ERR(__wt_strndup(session, k.str, k.len, namep)); found = a.val; } @@ -221,7 +220,7 @@ __ckpt_last_name( ret = WT_NOTFOUND; if (0) { -err: __wt_free(session, namep); +err: __wt_free(session, *namep); } return (ret); } diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 61cc009c983..e5f2727b5b6 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -67,18 +67,16 @@ __wt_metadata_cursor_open( btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree; /* - * Set special flags for the metadata file: eviction (the metadata file - * is in-memory and never evicted), logging (the metadata file is always - * logged if possible). + * Special settings for metadata: skew eviction so metadata almost + * always stays in cache and make sure metadata is logged if possible. * - * Test flags before setting them so updates can't race in subsequent - * opens (the first update is safe because it's single-threaded from + * Test before setting so updates can't race in subsequent opens (the + * first update is safe because it's single-threaded from * wiredtiger_open). */ - if (!F_ISSET(btree, WT_BTREE_IN_MEMORY)) - F_SET(btree, WT_BTREE_IN_MEMORY); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) - F_SET(btree, WT_BTREE_NO_EVICTION); + if (btree->evict_priority == 0) + WT_WITH_BTREE(session, btree, + __wt_evict_priority_set(session, WT_EVICT_INT_SKEW)); if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); diff --git a/src/third_party/wiredtiger/src/os_posix/os_alloc.c b/src/third_party/wiredtiger/src/os_posix/os_alloc.c index 3876f9a1afe..cfc7b80450e 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_alloc.c +++ b/src/third_party/wiredtiger/src/os_posix/os_alloc.c @@ -18,22 +18,13 @@ #include <gperftools/tcmalloc.h> #define calloc tc_calloc +#define malloc tc_malloc #define realloc tc_realloc #define posix_memalign tc_posix_memalign #define free tc_free #endif /* - * There's no malloc interface, WiredTiger never calls malloc. - * - * The problem is an application might allocate memory, write secret stuff in - * it, free the memory, then WiredTiger allocates the memory and uses it for a - * file page or log record, then writes it to disk, without having overwritten - * it fully. That results in the secret stuff being protected by WiredTiger's - * permission mechanisms, potentially inappropriate for the secret stuff. - */ - -/* * __wt_calloc -- * ANSI calloc function. */ @@ -67,12 +58,46 @@ __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) } /* - * __wt_realloc -- - * ANSI realloc function. + * __wt_malloc -- + * ANSI malloc function. */ int -__wt_realloc(WT_SESSION_IMPL *session, - size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +__wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) +{ + void *p; + + /* + * Defensive: if our caller doesn't handle errors correctly, ensure a + * free won't fail. + */ + *(void **)retp = NULL; + + /* + * !!! + * This function MUST handle a NULL WT_SESSION_IMPL handle. + */ + WT_ASSERT(session, bytes_to_allocate != 0); + + if (session != NULL) + WT_STAT_FAST_CONN_INCR(session, memory_allocation); + + if ((p = malloc(bytes_to_allocate)) == NULL) + WT_RET_MSG(session, __wt_errno(), + "memory allocation of %" WT_SIZET_FMT " bytes failed", + bytes_to_allocate); + + *(void **)retp = p; + return (0); +} + +/* + * __realloc_func -- + * ANSI realloc function. + */ +static int +__realloc_func(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, bool clear_memory, + void *retp) { void *p; size_t bytes_allocated; @@ -107,15 +132,12 @@ __wt_realloc(WT_SESSION_IMPL *session, bytes_to_allocate); /* - * Clear the allocated memory -- an application might: allocate memory, - * write secret stuff into it, free the memory, then we re-allocate the - * memory and use it for a file page or log record, and then write it to - * disk. That would result in the secret stuff being protected by the - * WiredTiger permission mechanisms, potentially inappropriate for the - * secret stuff. + * Clear the allocated memory, parts of WiredTiger depend on allocated + * memory being cleared. */ - memset((uint8_t *) - p + bytes_allocated, 0, bytes_to_allocate - bytes_allocated); + if (clear_memory) + memset((uint8_t *)p + bytes_allocated, + 0, bytes_to_allocate - bytes_allocated); /* Update caller's bytes allocated value. */ if (bytes_allocated_ret != NULL) @@ -126,9 +148,33 @@ __wt_realloc(WT_SESSION_IMPL *session, } /* + * __wt_realloc -- + * WiredTiger's realloc API. + */ +int +__wt_realloc(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +{ + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, true, retp)); +} + +/* + * __wt_realloc_noclear -- + * WiredTiger's realloc API, not clearing allocated memory. + */ +int +__wt_realloc_noclear(WT_SESSION_IMPL *session, + size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) +{ + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, false, retp)); +} + +/* * __wt_realloc_aligned -- * ANSI realloc function that aligns to buffer boundaries, configured with - * the "buffer_alignment" key to wiredtiger_open. + * the "buffer_alignment" key to wiredtiger_open. */ int __wt_realloc_aligned(WT_SESSION_IMPL *session, @@ -184,10 +230,6 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session, __wt_free(session, p); p = newp; - /* Clear the allocated memory (see above). */ - memset((uint8_t *)p + bytes_allocated, 0, - bytes_to_allocate - bytes_allocated); - /* Update caller's bytes allocated value. */ if (bytes_allocated_ret != NULL) *bytes_allocated_ret = bytes_to_allocate; @@ -200,11 +242,11 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session, * If there is no posix_memalign function, or no alignment configured, * fall back to realloc. * - * Windows note: Visual C CRT memalign does not match Posix behavior - * and would also double each allocation so it is bad for memory use + * Windows note: Visual C CRT memalign does not match POSIX behavior + * and would also double each allocation so it is bad for memory use. */ - return (__wt_realloc( - session, bytes_allocated_ret, bytes_to_allocate, retp)); + return (__realloc_func( + session, bytes_allocated_ret, bytes_to_allocate, false, retp)); } /* @@ -221,13 +263,14 @@ __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) return (0); } - WT_RET(__wt_calloc(session, len + 1, 1, &p)); + WT_RET(__wt_malloc(session, len + 1, &p)); /* * Don't change this to strncpy, we rely on this function to duplicate * "strings" that contain nul bytes. */ memcpy(p, str, len); + ((uint8_t *)p)[len] = '\0'; *(void **)retp = p; return (0); diff --git a/src/third_party/wiredtiger/src/os_posix/os_stdio.c b/src/third_party/wiredtiger/src/os_posix/os_stdio.c index 7ab107eda1e..65a0f40a659 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_stdio.c +++ b/src/third_party/wiredtiger/src/os_posix/os_stdio.c @@ -46,8 +46,7 @@ __wt_fopen(WT_SESSION_IMPL *session, if (*fpp == NULL) ret = __wt_errno(); - if (pathbuf != NULL) - __wt_free(session, pathbuf); + __wt_free(session, pathbuf); if (ret == 0) return (0); diff --git a/src/third_party/wiredtiger/src/packing/pack_stream.c b/src/third_party/wiredtiger/src/packing/pack_stream.c index 98da5b405c3..1393eb9a9c1 100644 --- a/src/third_party/wiredtiger/src/packing/pack_stream.c +++ b/src/third_party/wiredtiger/src/packing/pack_stream.c @@ -65,8 +65,7 @@ wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp) if (usedp != NULL) *usedp = WT_PTRDIFF(ps->p, ps->start); - if (ps != NULL) - __wt_free(ps->pack.session, ps); + __wt_free(ps->pack.session, ps); return (0); } @@ -327,3 +326,139 @@ wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up) } return (0); } + +/* + * __wt_ext_pack_start -- + * WT_EXTENSION.pack_start method. + */ +int +__wt_ext_pack_start(WT_EXTENSION_API *wt_api, + WT_SESSION *wt_session, const char *format, + void *buffer, size_t size, WT_PACK_STREAM **psp) +{ + WT_CONNECTION_IMPL *conn; + + conn = (WT_CONNECTION_IMPL *)wt_api->conn; + if (wt_session == NULL) + wt_session = (WT_SESSION *)conn->default_session; + return (wiredtiger_pack_start(wt_session, format, buffer, size, psp)); +} + +/* + * __wt_ext_unpack_start -- + * WT_EXTENSION.unpack_start + */ +int +__wt_ext_unpack_start(WT_EXTENSION_API *wt_api, + WT_SESSION *wt_session, const char *format, + const void *buffer, size_t size, WT_PACK_STREAM **psp) +{ + WT_CONNECTION_IMPL *conn; + + conn = (WT_CONNECTION_IMPL *)wt_api->conn; + if (wt_session == NULL) + wt_session = (WT_SESSION *)conn->default_session; + return (wiredtiger_unpack_start(wt_session, format, buffer, size, psp)); +} + +/* + * __wt_ext_pack_close -- + * WT_EXTENSION.pack_close + */ +int +__wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) +{ + WT_UNUSED(wt_api); + return (wiredtiger_pack_close(ps, usedp)); +} + +/* + * __wt_ext_pack_item -- + * WT_EXTENSION.pack_item + */ +int +__wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) +{ + WT_UNUSED(wt_api); + return (wiredtiger_pack_item(ps, item)); +} + +/* + * __wt_ext_pack_int -- + * WT_EXTENSION.pack_int + */ +int +__wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) +{ + WT_UNUSED(wt_api); + return (wiredtiger_pack_int(ps, i)); +} + +/* + * __wt_ext_pack_str -- + * WT_EXTENSION.pack_str + */ +int +__wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) +{ + WT_UNUSED(wt_api); + return (wiredtiger_pack_str(ps, s)); +} + +/* + * __wt_ext_pack_uint -- + * WT_EXTENSION.pack_uint + */ +int +__wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) +{ + WT_UNUSED(wt_api); + return (wiredtiger_pack_uint(ps, u)); +} + +/* + * __wt_ext_unpack_item -- + * WT_EXTENSION.unpack_item + */ +int +__wt_ext_unpack_item(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, WT_ITEM *item) +{ + WT_UNUSED(wt_api); + return (wiredtiger_unpack_item(ps, item)); +} + +/* + * __wt_ext_unpack_int -- + * WT_EXTENSION.unpack_int + */ +int +__wt_ext_unpack_int(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, int64_t *ip) +{ + WT_UNUSED(wt_api); + return (wiredtiger_unpack_int(ps, ip)); +} + +/* + * __wt_ext_unpack_str -- + * WT_EXTENSION.unpack_str + */ +int +__wt_ext_unpack_str(WT_EXTENSION_API *wt_api, + WT_PACK_STREAM *ps, const char **sp) +{ + WT_UNUSED(wt_api); + return (wiredtiger_unpack_str(ps, sp)); +} + +/* + * __wt_ext_unpack_uint -- + * WT_EXTENSION.unpack_uint + */ +int +__wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) +{ + WT_UNUSED(wt_api); + return (wiredtiger_unpack_uint(ps, up)); +} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index f245ff5d921..a69f335c9b3 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1960,12 +1960,21 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); /* - * Clear the disk page's header and block-manager space, set the page - * type (the type doesn't change, and setting it later would require - * additional code in a few different places). + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + corrected_page_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). */ dsk = r->disk_image.mem; - memset(dsk, 0, WT_PAGE_HEADER_BYTE_SIZE(btree)); dsk->type = page->type; /* @@ -3026,13 +3035,13 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) * The data isn't laid out on a page boundary or nul padded; copy it to * a clean, aligned, padded buffer before writing it. * - * Allocate a scratch buffer to hold the new disk image. Copy the - * WT_PAGE_HEADER header onto the scratch buffer, most of the header - * information remains unchanged between the pages. + * Allocate a scratch buffer to hold the new disk image. Copy the disk + * page's header and block-manager space into the scratch buffer, most + * of the header information remains unchanged between the pages. */ WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); dsk = tmp->mem; - memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_SIZE); + memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); /* * For each split chunk we've created, update the disk image and copy diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index 9b3b76b62de..756f1fdcc6c 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -9,22 +9,6 @@ #include "wt_internal.h" /* - * __wt_schema_create_strip -- - * Discard any configuration information from a schema entry that is not - * applicable to an session.create call, here for the wt dump command utility, - * which only wants to dump the schema information needed for load. - */ -int -__wt_schema_create_strip(WT_SESSION_IMPL *session, - const char *v1, const char *v2, char **value_ret) -{ - const char *cfg[] = - { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL }; - - return (__wt_config_collapse(session, cfg, value_ret)); -} - -/* * __wt_direct_io_size_check -- * Return a size from the configuration, complaining if it's insufficient * for direct I/O. diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c index 49318f80959..e7ce4e42498 100644 --- a/src/third_party/wiredtiger/src/schema/schema_open.c +++ b/src/third_party/wiredtiger/src/schema/schema_open.c @@ -109,8 +109,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) err: __wt_scr_free(session, &buf); __wt_schema_destroy_colgroup(session, &colgroup); - if (cgconfig != NULL) - __wt_free(session, cgconfig); + __wt_free(session, cgconfig); return (ret); } diff --git a/src/third_party/wiredtiger/src/schema/schema_plan.c b/src/third_party/wiredtiger/src/schema/schema_plan.c index 612a2d2d192..12a1aa9c22f 100644 --- a/src/third_party/wiredtiger/src/schema/schema_plan.c +++ b/src/third_party/wiredtiger/src/schema/schema_plan.c @@ -212,7 +212,7 @@ __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, WT_ASSERT(session, !value_only || coltype == WT_PROJ_VALUE); WT_RET(__wt_buf_catfmt( - session, plan, "%d%c", cg, coltype)); + session, plan, "%u%c", cg, coltype)); /* * Set the current column group and column @@ -226,7 +226,7 @@ __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, if (current_col < col) { if (col - current_col > 1) WT_RET(__wt_buf_catfmt(session, - plan, "%d", col - current_col)); + plan, "%u", col - current_col)); WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_SKIP)); } @@ -375,8 +375,8 @@ __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, pv.type = 'u'; if (pv.havesize) - WT_RET(__wt_buf_catfmt( - session, format, "%d%c", (int)pv.size, pv.type)); + WT_RET(__wt_buf_catfmt(session, + format, "%" PRIu32 "%c", pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } while (have_next); @@ -399,8 +399,8 @@ __wt_struct_truncate(WT_SESSION_IMPL *session, while (ncols-- > 0) { WT_RET(__pack_next(&pack, &pv)); if (pv.havesize) - WT_RET(__wt_buf_catfmt( - session, format, "%d%c", (int)pv.size, pv.type)); + WT_RET(__wt_buf_catfmt(session, + format, "%" PRIu32 "%c", pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c index e7752b60ca4..d9a798b6ed8 100644 --- a/src/third_party/wiredtiger/src/schema/schema_truncate.c +++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c @@ -131,22 +131,19 @@ int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) { - WT_CURSOR *cursor; WT_DATA_SOURCE *dsrc; WT_DECL_RET; const char *uri; - cursor = (start != NULL) ? start : stop; - uri = cursor->internal_uri; + uri = start->internal_uri; if (WT_PREFIX_MATCH(uri, "file:")) { - if (start != NULL) - WT_CURSOR_NEEDKEY(start); + WT_CURSOR_NEEDKEY(start); if (stop != NULL) WT_CURSOR_NEEDKEY(stop); - WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)cursor)->btree, + WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree, ret = __wt_btcur_range_truncate( - (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop)); + (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop)); } else if (WT_PREFIX_MATCH(uri, "table:")) ret = __wt_table_range_truncate( (WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop); diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c index e60a7107786..52be76bb7a5 100644 --- a/src/third_party/wiredtiger/src/schema/schema_worker.c +++ b/src/third_party/wiredtiger/src/schema/schema_worker.c @@ -126,7 +126,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_checkpoint) ; - else if (file_func == __wt_checkpoint_list) + else if (file_func == __wt_checkpoint_get_handles) ; else if (file_func == __wt_checkpoint_sync) ; diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 2414229681b..bb496494234 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -40,7 +40,8 @@ __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) /* Stop when there are no positioned cursors. */ if (session->ncursors == 0) break; - WT_TRET(cursor->reset(cursor)); + if (!F_ISSET(cursor, WT_CURSTD_JOINED)) + WT_TRET(cursor->reset(cursor)); /* Optionally, free the cursor buffers */ if (free_buffers) { __wt_buf_free(session, &cursor->key); @@ -492,10 +493,13 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config) /* * We can't disallow type entirely, a configuration string might * innocently include it, for example, a dump/load pair. If the - * URI type prefix and the type are the same, let it go. + * underlying type is "file", it's OK ("file" is the underlying + * type for every type); if the URI type prefix and the type are + * the same, let it go. */ if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 && + !WT_STRING_MATCH("file", cval.str, cval.len) && (strncmp(uri, cval.str, cval.len) != 0 || uri[cval.len] != ':')) WT_ERR_MSG(session, EINVAL, @@ -1597,7 +1601,7 @@ __open_session(WT_CONNECTION_IMPL *conn, if (i == conn->session_size) WT_ERR_MSG(session, ENOMEM, "only configured to support %" PRIu32 " sessions" - " (including %d additional internal sessions)", + " (including %" PRIu32 " additional internal sessions)", conn->session_size, WT_EXTRA_INTERNAL_SESSIONS); /* diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index 242d9ac5cc4..ddf4d3dfa33 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -577,7 +577,7 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) * files, since changes to the underlying file are visible to the in * memory pages. */ - WT_ERR(__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); + WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD)); /* * We lock checkpoint handles that we are overwriting, so the handle diff --git a/src/third_party/wiredtiger/src/support/cond_auto.c b/src/third_party/wiredtiger/src/support/cond_auto.c new file mode 100644 index 00000000000..ec95622f333 --- /dev/null +++ b/src/third_party/wiredtiger/src/support/cond_auto.c @@ -0,0 +1,136 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "wt_internal.h" + +/* + * This is an implementation of condition variables that automatically adjust + * the wait time depending on whether the wake is resulting in useful work. + */ + +/* + * __wt_cond_auto_alloc -- + * Allocate and initialize an automatically adjusting condition variable. + */ +int +__wt_cond_auto_alloc( + WT_SESSION_IMPL *session, const char *name, + bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp) +{ + WT_CONDVAR *cond; + + WT_RET(__wt_cond_alloc(session, name, is_signalled, condp)); + cond = *condp; + + cond->min_wait = min; + cond->max_wait = max; + cond->prev_wait = min; + + return (0); +} + +/* + * __wt_cond_auto_signal -- + * Signal a condition variable. + */ +int +__wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) +{ + + WT_ASSERT(session, cond->min_wait != 0); + return (__wt_cond_signal(session, cond)); +} + +/* + * __wt_cond_auto_wait_signal -- + * Wait on a mutex, optionally timing out. If we get it before the time + * out period expires, let the caller know. + * TODO: Can this version of the API be removed, now that we have the + * auto adjusting condition variables? + */ +int +__wt_cond_auto_wait_signal( + WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled) +{ + uint64_t delta; + + /* + * Catch cases where this function is called with a condition variable + * that was initialized non-auto. + */ + WT_ASSERT(session, cond->min_wait != 0); + + WT_STAT_FAST_CONN_INCR(session, cond_auto_wait); + if (progress) + cond->prev_wait = cond->min_wait; + else { + delta = WT_MAX(1, (cond->max_wait - cond->min_wait) / 10); + cond->prev_wait = WT_MIN( + cond->max_wait, cond->prev_wait + delta); + } + + WT_RET(__wt_cond_wait_signal( + session, cond, cond->prev_wait, signalled)); + + if (progress || *signalled) + WT_STAT_FAST_CONN_INCR(session, cond_auto_wait_reset); + if (*signalled) + cond->prev_wait = cond->min_wait; + + return (0); +} + +/* + * __wt_cond_auto_wait -- + * Wait on a mutex, optionally timing out. If we get it before the time + * out period expires, let the caller know. + */ +int +__wt_cond_auto_wait( + WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress) +{ + bool signalled; + + /* + * Call the signal version so the wait period is reset if the + * condition is woken explicitly. + */ + WT_RET(__wt_cond_auto_wait_signal(session, cond, progress, &signalled)); + + return (0); +} + +/* + * __wt_cond_auto_destroy -- + * Destroy a condition variable. + */ +int +__wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) +{ + return (__wt_cond_destroy(session, condp)); +} diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c index edd0bc9f648..1e1aaeab5b5 100644 --- a/src/third_party/wiredtiger/src/support/huffman.c +++ b/src/third_party/wiredtiger/src/support/huffman.c @@ -492,11 +492,12 @@ __wt_huffman_open(WT_SESSION_IMPL *session, uint8_t symbol; uint32_t weighted_length; - printf("leaf depth %" PRIu16 "..%" PRIu16 ", memory use: " - "codes %u# * %uB + code2symbol %u# * %uB\n", + printf("leaf depth %" PRIu16 "..%" PRIu16 + ", memory use: codes %u# * %" WT_SIZET_FMT + "B + code2symbol %u# * %" WT_SIZET_FMT "B\n", huffman->min_depth, huffman->max_depth, - huffman->numSymbols, (u_int)sizeof(WT_HUFFMAN_CODE), - 1U << huffman->max_depth, (u_int)sizeof(uint16_t)); + huffman->numSymbols, sizeof(WT_HUFFMAN_CODE), + 1U << huffman->max_depth, sizeof(uint16_t)); /* * measure quality of computed Huffman codes, for different max bit diff --git a/src/third_party/wiredtiger/src/support/power8/crc32.S b/src/third_party/wiredtiger/src/support/power8/crc32.S index 3ef2928aaa1..c0b81143f07 100644 --- a/src/third_party/wiredtiger/src/support/power8/crc32.S +++ b/src/third_party/wiredtiger/src/support/power8/crc32.S @@ -65,14 +65,13 @@ #define off96 r30 #define off112 r31 -#define const1 v25 -#define const2 v26 +#define const1 v24 +#define const2 v25 -#define byteswap v27 -#define mask_32bit v28 -#define mask_64bit v29 -#define zeroes v30 -#define ones v31 +#define byteswap v26 +#define mask_32bit v27 +#define mask_64bit v28 +#define zeroes v29 #ifdef BYTESWAP_DATA #define VPERM(A, B, C, D) vperm A, B, C, D @@ -90,31 +89,6 @@ FUNC_START(__crc32_vpmsum) std r26,-48(r1) std r25,-56(r1) - li r31, -256 - stvx v20, r31, r1 - li r31, -240 - stvx v21, r31, r1 - li r31, -224 - stvx v22, r31, r1 - li r31, -208 - stvx v23, r31, r1 - li r31, -192 - stvx v24, r31, r1 - li r31, -176 - stvx v25, r31, r1 - li r31, -160 - stvx v26, r31, r1 - li r31, -144 - stvx v27, r31, r1 - li r31, -128 - stvx v28, r31, r1 - li r31, -112 - stvx v29, r31, r1 - li r31, -96 - stvx v30, r31, r1 - li r31, -80 - stvx v31, r31, r1 - li off16,16 li off32,32 li off48,48 @@ -124,13 +98,28 @@ FUNC_START(__crc32_vpmsum) li off112,112 li r0,0 + /* Enough room for saving 10 non volatile VMX registers */ + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + stvx v20,0,r6 + stvx v21,off16,r6 + stvx v22,off32,r6 + stvx v23,off48,r6 + stvx v24,off64,r6 + stvx v25,off80,r6 + stvx v26,off96,r6 + stvx v27,off112,r6 + stvx v28,0,r7 + stvx v29,off16,r7 + mr r10,r3 vxor zeroes,zeroes,zeroes - vspltisw ones,-1 + vspltisw v0,-1 - vsldoi mask_32bit,zeroes,ones,4 - vsldoi mask_64bit,zeroes,ones,8 + vsldoi mask_32bit,zeroes,v0,4 + vsldoi mask_64bit,zeroes,v0,8 /* Get the initial value into v8 */ vxor v8,v8,v8 @@ -596,30 +585,20 @@ FUNC_START(__crc32_vpmsum) /* Get it into r3 */ MFVRD(r3, v0) - li r31, -256 - lvx v20, r31, r1 - li r31, -240 - lvx v21, r31, r1 - li r31, -224 - lvx v22, r31, r1 - li r31, -208 - lvx v23, r31, r1 - li r31, -192 - lvx v24, r31, r1 - li r31, -176 - lvx v25, r31, r1 - li r31, -160 - lvx v26, r31, r1 - li r31, -144 - lvx v27, r31, r1 - li r31, -128 - lvx v28, r31, r1 - li r31, -112 - lvx v29, r31, r1 - li r31, -96 - lvx v30, r31, r1 - li r31, -80 - lvx v31, r31, r1 +.Lout: + subi r6,r1,56+10*16 + subi r7,r1,56+2*16 + + lvx v20,0,r6 + lvx v21,off16,r6 + lvx v22,off32,r6 + lvx v23,off48,r6 + lvx v24,off64,r6 + lvx v25,off80,r6 + lvx v26,off96,r6 + lvx v27,off112,r6 + lvx v28,0,r7 + lvx v29,off16,r7 ld r31,-8(r1) ld r30,-16(r1) @@ -786,6 +765,7 @@ FUNC_START(__crc32_vpmsum) .Lzero: mr r3,r10 - blr + b .Lout + FUNC_END(__crc32_vpmsum) #endif diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c index 94020ba2621..aea98dc49ef 100644 --- a/src/third_party/wiredtiger/src/support/scratch.c +++ b/src/third_party/wiredtiger/src/support/scratch.c @@ -45,7 +45,7 @@ __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_RET(__wt_realloc_aligned( session, &buf->memsize, size, &buf->mem)); else - WT_RET(__wt_realloc( + WT_RET(__wt_realloc_noclear( session, &buf->memsize, size, &buf->mem)); } diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 0df38bfe6b0..2a826eda962 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -581,6 +581,8 @@ static const char * const __stats_connection_desc[] = { "cache: tracked dirty bytes in the cache", "cache: tracked dirty pages in the cache", "cache: unmodified pages evicted", + "connection: auto adjusting condition resets", + "connection: auto adjusting condition wait calls", "connection: files currently open", "connection: memory allocations", "connection: memory frees", @@ -619,6 +621,8 @@ static const char * const __stats_connection_desc[] = { "log: log bytes written", "log: log files manually zero-filled", "log: log flush operations", + "log: log force write operations", + "log: log force write operations skipped", "log: log records compressed", "log: log records not compressed", "log: log records too small to compress", @@ -626,6 +630,7 @@ static const char * const __stats_connection_desc[] = { "log: log scan operations", "log: log scan records requiring two reads", "log: log server thread advances write LSN", + "log: log server thread write LSN walk skipped", "log: log sync operations", "log: log sync_dir operations", "log: log write operations", @@ -773,6 +778,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_bytes_dirty */ /* not clearing cache_pages_dirty */ stats->cache_eviction_clean = 0; + stats->cond_auto_wait_reset = 0; + stats->cond_auto_wait = 0; /* not clearing file_open */ stats->memory_allocation = 0; stats->memory_free = 0; @@ -811,6 +818,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->log_bytes_written = 0; stats->log_zero_fills = 0; stats->log_flush = 0; + stats->log_force_write = 0; + stats->log_force_write_skip = 0; stats->log_compress_writes = 0; stats->log_compress_write_fails = 0; stats->log_compress_small = 0; @@ -818,6 +827,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->log_scans = 0; stats->log_scan_rereads = 0; stats->log_write_lsn = 0; + stats->log_write_lsn_skip = 0; stats->log_sync = 0; stats->log_sync_dir = 0; stats->log_writes = 0; @@ -974,6 +984,8 @@ __wt_stat_connection_aggregate( to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty); to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty); to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); + to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset); + to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait); to->file_open += WT_STAT_READ(from, file_open); to->memory_allocation += WT_STAT_READ(from, memory_allocation); to->memory_free += WT_STAT_READ(from, memory_free); @@ -1012,6 +1024,8 @@ __wt_stat_connection_aggregate( to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); to->log_zero_fills += WT_STAT_READ(from, log_zero_fills); to->log_flush += WT_STAT_READ(from, log_flush); + to->log_force_write += WT_STAT_READ(from, log_force_write); + to->log_force_write_skip += WT_STAT_READ(from, log_force_write_skip); to->log_compress_writes += WT_STAT_READ(from, log_compress_writes); to->log_compress_write_fails += WT_STAT_READ(from, log_compress_write_fails); @@ -1021,6 +1035,7 @@ __wt_stat_connection_aggregate( to->log_scans += WT_STAT_READ(from, log_scans); to->log_scan_rereads += WT_STAT_READ(from, log_scan_rereads); to->log_write_lsn += WT_STAT_READ(from, log_write_lsn); + to->log_write_lsn_skip += WT_STAT_READ(from, log_write_lsn_skip); to->log_sync += WT_STAT_READ(from, log_sync); to->log_sync_dir += WT_STAT_READ(from, log_sync_dir); to->log_writes += WT_STAT_READ(from, log_writes); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index e8fd8c0c119..7a768a8fe20 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -344,7 +344,7 @@ retry: current_id - oldest_id > 10000 && oldest_session != NULL) { (void)__wt_verbose(session, WT_VERB_TRANSACTION, "old snapshot %" PRIu64 - " pinned in session %d [%s]" + " pinned in session %" PRIu32 " [%s]" " with snap_min %" PRIu64 "\n", oldest_id, oldest_session->id, oldest_session->lastop, diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 4bb8ccdc6f0..1eebc9e9d04 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -8,6 +8,10 @@ #include "wt_internal.h" +static int __checkpoint_lock_tree( + WT_SESSION_IMPL *, bool, bool, const char *[]); +static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); + /* * __wt_checkpoint_name_ok -- * Complain if the checkpoint name isn't acceptable. @@ -224,11 +228,11 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * __wt_checkpoint_list -- + * __wt_checkpoint_get_handles -- * Get a list of handles to flush. */ int -__wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]) +__wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_DECL_RET; const char *name; @@ -254,6 +258,13 @@ __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]) if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, true, true, cfg)); + if (ret != 0) { + WT_TRET(__wt_session_release_btree(session)); + return (ret); + } + session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } @@ -267,7 +278,7 @@ __checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[]) { WT_UNUSED(cfg); - return (__wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES)); + return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); } /* @@ -371,15 +382,20 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Configure logging only if doing a full checkpoint. */ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED); + /* Keep track of handles acquired for locking. */ + WT_ERR(__wt_meta_track_on(session)); + tracking = true; + /* * Get a list of handles we want to flush; this may pull closed objects * into the session cache, but we're going to do that eventually anyway. */ + WT_ASSERT(session, session->ckpt_handle_next == 0); WT_WITH_SCHEMA_LOCK(session, ret, WT_WITH_TABLE_LOCK(session, ret, WT_WITH_HANDLE_LIST_LOCK(session, ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_list, NULL)))); + session, cfg, __wt_checkpoint_get_handles, NULL)))); WT_ERR(ret); /* @@ -408,12 +424,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * completion. Do it after flushing the pages to give the * asynchronous flush as much time as possible before we wait. */ - if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); - - /* Start the checkpoint for real. */ - WT_ERR(__wt_meta_track_on(session)); - tracking = true; + WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); /* Tell logging that we are about to start a database checkpoint. */ if (full && logging) @@ -427,6 +438,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_epoch(session, &start)); /* + * Start the checkpoint for real. + * * Bump the global checkpoint generation, used to figure out whether * checkpoint has visited a tree. There is no need for this to be * atomic: it is only written while holding the checkpoint lock. @@ -490,7 +503,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_txn_checkpoint_log( session, full, WT_TXN_LOG_CKPT_START, NULL)); - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint)); + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper)); /* * Clear the dhandle so the visibility check doesn't get confused about @@ -509,8 +522,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * Checkpoints have to hit disk (it would be reasonable to configure for * lazy checkpoints, but we don't support them yet). */ - if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); + WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); WT_ERR(__checkpoint_verbose_track(session, "sync completed", &verb_timer)); @@ -754,14 +766,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* - * __checkpoint_worker -- - * Checkpoint a tree. + * __checkpoint_lock_tree -- + * Acquire the locks required to checkpoint a tree. */ static int -__checkpoint_worker(WT_SESSION_IMPL *session, - const char *cfg[], bool is_checkpoint, bool need_tracking) +__checkpoint_lock_tree(WT_SESSION_IMPL *session, + bool is_checkpoint, bool need_tracking, const char *cfg[]) { - WT_BM *bm; WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; WT_CONFIG dropconf; @@ -769,19 +780,15 @@ __checkpoint_worker(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - WT_LSN ckptlsn; - int deleted, was_modified; - bool fake_ckpt, force, hot_backup_locked; - const char *name; char *name_alloc; + const char *name; + bool hot_backup_locked; btree = S2BT(session); - bm = btree->bm; conn = S2C(session); ckpt = ckptbase = NULL; dhandle = session->dhandle; - was_modified = btree->modified; - fake_ckpt = hot_backup_locked = false; + hot_backup_locked = false; name_alloc = NULL; /* @@ -800,15 +807,6 @@ __checkpoint_worker(WT_SESSION_IMPL *session, WT_ASSERT(session, !need_tracking || WT_IS_METADATA(session, dhandle) || WT_META_TRACKING(session)); - /* - * Set the checkpoint LSN to the maximum LSN so that if logging is - * disabled, recovery will never roll old changes forward over the - * non-logged changes in this checkpoint. If logging is enabled, a - * real checkpoint LSN will be assigned later for this checkpoint and - * overwrite this. - */ - WT_MAX_LSN(&ckptlsn); - /* Get the list of checkpoints for this file. */ WT_RET(__wt_meta_ckptlist_get(session, dhandle->name, &ckptbase)); @@ -859,74 +857,15 @@ __checkpoint_worker(WT_SESSION_IMPL *session, /* Drop checkpoints with the same name as the one we're taking. */ __drop(ckptbase, name, strlen(name)); - /* - * Check for clean objects not requiring a checkpoint. - * - * If we're closing a handle, and the object is clean, we can skip the - * checkpoint, whatever checkpoints we have are sufficient. (We might - * not have any checkpoints if the object was never modified, and that's - * OK: the object creation code doesn't mark the tree modified so we can - * skip newly created trees here.) - * - * If the application repeatedly checkpoints an object (imagine hourly - * checkpoints using the same explicit or internal name), there's no - * reason to repeat the checkpoint for clean objects. The test is if - * the only checkpoint we're deleting is the last one in the list and - * it has the same name as the checkpoint we're about to take, skip the - * work. (We can't skip checkpoints that delete more than the last - * checkpoint because deleting those checkpoints might free up space in - * the file.) This means an application toggling between two (or more) - * checkpoint names will repeatedly take empty checkpoints, but that's - * not likely enough to make detection worthwhile. - * - * Checkpoint read-only objects otherwise: the application must be able - * to open the checkpoint in a cursor after taking any checkpoint, which - * means it must exist. - */ - force = false; - F_CLR(btree, WT_BTREE_SKIP_CKPT); - if (!btree->modified && cfg != NULL) { - ret = __wt_config_gets(session, cfg, "force", &cval); - if (ret != 0 && ret != WT_NOTFOUND) - WT_ERR(ret); - if (ret == 0 && cval.val != 0) - force = true; - } - if (!btree->modified && !force) { - if (!is_checkpoint) - goto nockpt; - - deleted = 0; - WT_CKPT_FOREACH(ckptbase, ckpt) - if (F_ISSET(ckpt, WT_CKPT_DELETE)) - ++deleted; - /* - * Complicated test: if the last checkpoint in the object has - * the same name as the checkpoint we're taking (correcting for - * internal checkpoint names with their generational suffix - * numbers), we can skip the checkpoint, there's nothing to do. - * The exception is if we're deleting two or more checkpoints: - * then we may save space. - */ - if (ckpt > ckptbase && - (strcmp(name, (ckpt - 1)->name) == 0 || - (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && - WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) && - deleted < 2) { -nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, - btree->checkpoint_gen); - goto done; - } - } - /* Add a new checkpoint entry at the end of the list. */ WT_CKPT_FOREACH(ckptbase, ckpt) ; WT_ERR(__wt_strdup(session, name, &ckpt->name)); + /* + * We are now done with the local use of the name. Free the local + * allocation, if needed. + */ + __wt_free(session, name_alloc); F_SET(ckpt, WT_CKPT_ADD); /* @@ -1007,32 +946,128 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); * copy instead of forcing checkpoints on clean objects to associate * names with checkpoints. */ - if (is_checkpoint) - switch (F_MASK(btree, WT_BTREE_SPECIAL_FLAGS)) { - case 0: - break; - case WT_BTREE_BULK: - /* - * The only checkpoints a bulk-loaded file should have - * are fake ones we created without the underlying block - * manager. I'm leaving this code here because it's a - * cheap test and a nasty race. - */ - WT_CKPT_FOREACH(ckptbase, ckpt) - if (!F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_FAKE)) - WT_ERR_MSG(session, ret, - "block-manager checkpoint found " - "for a bulk-loaded file"); - fake_ckpt = true; - goto fake; - case WT_BTREE_REBALANCE: - case WT_BTREE_SALVAGE: - case WT_BTREE_UPGRADE: - case WT_BTREE_VERIFY: - WT_ERR_MSG(session, EINVAL, - "checkpoints are blocked during rebalance, " - "salvage, upgrade or verify operations"); + WT_ASSERT(session, + !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); + + hot_backup_locked = false; + WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); + + WT_ASSERT(session, btree->ckpt == NULL); + btree->ckpt = ckptbase; + + return (0); + +err: if (hot_backup_locked) + WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); + + __wt_meta_ckptlist_free(session, ckptbase); + __wt_free(session, name_alloc); + + return (ret); +} + +/* + * __checkpoint_tree -- + * Checkpoint a single tree. + * Assumes all necessary locks have been acquired by the caller. + */ +static int +__checkpoint_tree( + WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +{ + WT_BM *bm; + WT_BTREE *btree; + WT_CKPT *ckpt, *ckptbase; + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + WT_LSN ckptlsn; + const char *name; + int deleted, was_modified; + bool fake_ckpt, force; + + btree = S2BT(session); + bm = btree->bm; + ckptbase = btree->ckpt; + conn = S2C(session); + dhandle = session->dhandle; + fake_ckpt = false; + was_modified = btree->modified; + + /* + * Set the checkpoint LSN to the maximum LSN so that if logging is + * disabled, recovery will never roll old changes forward over the + * non-logged changes in this checkpoint. If logging is enabled, a + * real checkpoint LSN will be assigned for this checkpoint and + * overwrite this. + */ + WT_MAX_LSN(&ckptlsn); + + /* + * Check for clean objects not requiring a checkpoint. + * + * If we're closing a handle, and the object is clean, we can skip the + * checkpoint, whatever checkpoints we have are sufficient. (We might + * not have any checkpoints if the object was never modified, and that's + * OK: the object creation code doesn't mark the tree modified so we can + * skip newly created trees here.) + * + * If the application repeatedly checkpoints an object (imagine hourly + * checkpoints using the same explicit or internal name), there's no + * reason to repeat the checkpoint for clean objects. The test is if + * the only checkpoint we're deleting is the last one in the list and + * it has the same name as the checkpoint we're about to take, skip the + * work. (We can't skip checkpoints that delete more than the last + * checkpoint because deleting those checkpoints might free up space in + * the file.) This means an application toggling between two (or more) + * checkpoint names will repeatedly take empty checkpoints, but that's + * not likely enough to make detection worthwhile. + * + * Checkpoint read-only objects otherwise: the application must be able + * to open the checkpoint in a cursor after taking any checkpoint, which + * means it must exist. + */ + force = false; + F_CLR(btree, WT_BTREE_SKIP_CKPT); + if (!btree->modified && cfg != NULL) { + ret = __wt_config_gets(session, cfg, "force", &cval); + if (ret != 0 && ret != WT_NOTFOUND) + WT_ERR(ret); + if (ret == 0 && cval.val != 0) + force = true; + } + if (!btree->modified && !force) { + if (!is_checkpoint) + goto nockpt; + + deleted = 0; + WT_CKPT_FOREACH(ckptbase, ckpt) + if (F_ISSET(ckpt, WT_CKPT_DELETE)) + ++deleted; + /* + * Complicated test: if the tree is clean and last two + * checkpoints have the same name (correcting for internal + * checkpoint names with their generational suffix numbers), we + * can skip the checkpoint, there's nothing to do. The + * exception is if we're deleting two or more checkpoints: then + * we may save space. + */ + name = (ckpt - 1)->name; + if (ckpt > ckptbase + 1 && deleted < 2 && + (strcmp(name, (ckpt - 2)->name) == 0 || + (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && + WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { +nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, + btree->checkpoint_gen); + ret = 0; + goto err; } + } /* * If an object has never been used (in other words, if it could become @@ -1086,9 +1121,9 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); /* Flush the file from the cache, creating the checkpoint. */ if (is_checkpoint) - WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CHECKPOINT)); + WT_ERR(__wt_cache_op(session, WT_SYNC_CHECKPOINT)); else - WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CLOSE)); + WT_ERR(__wt_cache_op(session, WT_SYNC_CLOSE)); /* * All blocks being written have been written; set the object's write @@ -1120,9 +1155,8 @@ fake: /* * sync the file here or we could roll forward the metadata in * recovery and open a checkpoint that isn't yet durable. */ - if (F_ISSET(conn, WT_CONN_CKPT_SYNC) && - (WT_IS_METADATA(session, dhandle) || - !F_ISSET(&session->txn, WT_TXN_RUNNING))) + if (WT_IS_METADATA(session, dhandle) || + !F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_ERR(__wt_checkpoint_sync(session, NULL)); WT_ERR(__wt_meta_ckptlist_set( @@ -1147,7 +1181,6 @@ fake: /* WT_ERR(__wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_STOP, NULL)); -done: err: /* * If the checkpoint didn't complete successfully, make sure the * tree is marked dirty. @@ -1155,30 +1188,42 @@ err: /* if (ret != 0 && !btree->modified && was_modified) btree->modified = 1; - if (hot_backup_locked) - WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); - __wt_meta_ckptlist_free(session, ckptbase); - __wt_free(session, name_alloc); + btree->ckpt = NULL; return (ret); } /* + * __checkpoint_tree_helper -- + * Checkpoint a tree (suitable for use in *_apply functions). + */ +static int +__checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) +{ + return (__checkpoint_tree(session, true, cfg)); +} + +/* * __wt_checkpoint -- * Checkpoint a file. */ int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_DECL_RET; + /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); - /* Should be holding the schema lock. */ + /* We must hold the metadata lock if checkpointing the metadata. */ WT_ASSERT(session, !WT_IS_METADATA(session, session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); - return (__checkpoint_worker(session, cfg, true, true)); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, true, true, cfg)); + WT_RET(ret); + return (__checkpoint_tree(session, true, cfg)); } /* @@ -1197,8 +1242,9 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); - /* Should have an underlying block manager reference. */ - WT_ASSERT(session, bm != NULL); + /* Unnecessary if checkpoint_sync has been configured "off". */ + if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC)) + return (0); return (bm->sync(bm, session, false)); } @@ -1227,7 +1273,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) F_SET(session->dhandle, WT_DHANDLE_DEAD); if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) - return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); + return (__wt_cache_op(session, WT_SYNC_DISCARD)); /* * If closing an unmodified file, check that no update is required @@ -1236,7 +1282,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (!btree->modified && !bulk) { __wt_txn_update_oldest(session, true); return (__wt_txn_visible_all(session, btree->rec_max_txn) ? - __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY); + __wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY); } /* @@ -1250,10 +1296,14 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (need_tracking) WT_RET(__wt_meta_track_on(session)); - WT_TRET(__checkpoint_worker(session, NULL, false, need_tracking)); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); + WT_ASSERT(session, ret == 0); + if (ret == 0) + ret = __checkpoint_tree(session, false, NULL); if (need_tracking) - WT_RET(__wt_meta_track_off(session, true, ret != 0)); + WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index f41691bbc3b..1ea4dba1152 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -88,11 +88,11 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r, * Helper to a cursor if this operation is to be applied during recovery. */ #define GET_RECOVERY_CURSOR(session, r, lsnp, fileid, cp) \ - WT_ERR(__recovery_cursor( \ - (session), (r), (lsnp), (fileid), false, (cp))); \ - WT_ERR(__wt_verbose((session), WT_VERB_RECOVERY, \ - "%s op %d to file %d at LSN %u/%u", \ - (cursor == NULL) ? "Skipping" : "Applying", \ + WT_ERR(__recovery_cursor(session, r, lsnp, fileid, false, cp)); \ + WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY, \ + "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \ + "/%" PRIu32, \ + cursor == NULL ? "Skipping" : "Applying", \ optype, fileid, lsnp->l.file, lsnp->l.offset)); \ if (cursor == NULL) \ break @@ -334,7 +334,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) r->files[fileid].ckpt_lsn = lsn; WT_RET(__wt_verbose(r->session, WT_VERB_RECOVERY, - "Recovering %s with id %u @ (%" PRIu32 ", %" PRIu32 ")", + "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file, lsn.l.offset)); return (0); @@ -496,7 +496,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) */ r.metadata_only = false; WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY, - "Main recovery loop: starting at %u/%u", + "Main recovery loop: starting at %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset)); WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec)); /* diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index ca761a52d8a..aedd9168fbd 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -22,10 +22,10 @@ static int dump_prefix(WT_SESSION *, bool); static int dump_record(WT_CURSOR *, bool, bool); static int dump_suffix(WT_SESSION *); static int dump_table_config(WT_SESSION *, WT_CURSOR *, const char *); -static int dump_table_config_type( +static int dump_table_config_complex( WT_SESSION *, WT_CURSOR *, WT_CURSOR *, const char *, const char *); static int dup_json_string(const char *, char **); -static int print_config(WT_SESSION *, const char *, const char *, const char *); +static int print_config(WT_SESSION *, const char *, char *[]); static int usage(void); int @@ -150,9 +150,9 @@ dump_config(WT_SESSION *session, const char *uri, bool hex) /* Open a metadata cursor. */ if ((ret = session->open_cursor( - session, "metadata:create", NULL, NULL, &cursor)) != 0) { + session, "metadata:", NULL, NULL, &cursor)) != 0) { fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname, - "metadata:create", session->strerror(session, ret)); + "metadata:", session->strerror(session, ret)); return (1); } /* @@ -352,12 +352,23 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) static int dump_json_table_config(WT_SESSION *session, const char *uri) { + WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_DECL_RET; + size_t len; int tret; - char *value; + const char *name, *value; + char *p; + + p = NULL; + + /* Get the table name. */ + if ((name = strchr(uri, ':')) == NULL) { + fprintf(stderr, "%s: %s: corrupted uri\n", progname, uri); + return (1); + } + ++name; - /* Dump the config. */ /* Open a metadata cursor. */ if ((ret = session->open_cursor( session, "metadata:create", NULL, NULL, &cursor)) != 0) { @@ -368,12 +379,41 @@ dump_json_table_config(WT_SESSION *session, const char *uri) } /* - * Search for the object itself, to make sure it - * exists, and get its config string. This where we - * find out a table object doesn't exist, use a simple - * error message. + * Search for the object itself, just to make sure it exists, we don't + * want to output a header if the user entered the wrong name. This is + * where we find out a table doesn't exist, use a simple error message. + * + * Workaround for WiredTiger "simple" table handling. Simple tables + * have column-group entries, but they aren't listed in the metadata's + * table entry. Figure out if it's a simple table and in that case, + * retrieve the column-group entry and use the value from its "source" + * file. */ - cursor->set_key(cursor, uri); + if (WT_PREFIX_MATCH(uri, "table:")) { + len = strlen("colgroup:") + strlen(name) + 1; + if ((p = malloc(len)) == NULL) + return (util_err(session, errno, NULL)); + (void)snprintf(p, len, "colgroup:%s", name); + cursor->set_key(cursor, p); + if ((ret = cursor->search(cursor)) == 0) { + if ((ret = cursor->get_value(cursor, &value)) != 0) + return (util_cerr(cursor, "get_value", ret)); + if ((ret = __wt_config_getones( + (WT_SESSION_IMPL *)session, + value, "source", &cval)) != 0) + return (util_err( + session, ret, "%s: source entry", p)); + free(p); + len = cval.len + 10; + if ((p = malloc(len)) == NULL) + return (util_err(session, errno, NULL)); + (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); + cursor->set_key(cursor, p); + } else + cursor->set_key(cursor, uri); + } else + cursor->set_key(cursor, uri); + if ((ret = cursor->search(cursor)) == 0) { if ((ret = cursor->get_value(cursor, &value)) != 0) ret = util_cerr(cursor, "get_value", ret); @@ -381,8 +421,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri) session, cursor, uri, value) != 0) ret = 1; } else if (ret == WT_NOTFOUND) - ret = util_err( - session, 0, "%s: No such object exists", uri); + ret = util_err(session, 0, "%s: No such object exists", uri); else ret = util_err(session, ret, "%s", uri); @@ -392,6 +431,7 @@ dump_json_table_config(WT_SESSION *session, const char *uri) ret = tret; } + free(p); return (ret); } @@ -414,10 +454,17 @@ dump_json_table_end(WT_SESSION *session) static int dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) { + WT_CONFIG_ITEM cval; WT_CURSOR *srch; WT_DECL_RET; + size_t len; int tret; - const char *key, *name, *value; + bool complex_table; + const char *name, *v; + char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL}; + + p = NULL; + cfg = &_cfg[3]; /* Get the table name. */ if ((name = strchr(uri, ':')) == NULL) { @@ -427,59 +474,111 @@ dump_table_config(WT_SESSION *session, WT_CURSOR *cursor, const char *uri) ++name; /* - * Dump out the config information: first, dump the uri entry itself - * (requires a lookup). + * Dump out the config information: first, dump the uri entry itself, + * it overrides all subsequent configurations. */ cursor->set_key(cursor, uri); if ((ret = cursor->search(cursor)) != 0) return (util_cerr(cursor, "search", ret)); - if ((ret = cursor->get_key(cursor, &key)) != 0) - return (util_cerr(cursor, "get_key", ret)); - if ((ret = cursor->get_value(cursor, &value)) != 0) + if ((ret = cursor->get_value(cursor, &v)) != 0) return (util_cerr(cursor, "get_value", ret)); - if (print_config(session, key, value, NULL) != 0) - return (1); + if ((*--cfg = strdup(v)) == NULL) + return (util_err(session, errno, NULL)); /* - * The underlying table configuration function needs a second cursor: - * open one before calling it, it makes error handling hugely simpler. + * Workaround for WiredTiger "simple" table handling. Simple tables + * have column-group entries, but they aren't listed in the metadata's + * table entry, and the name is different from other column-groups. + * Figure out if it's a simple table and in that case, retrieve the + * column-group's configuration value and the column-group's "source" + * entry, where the column-group entry overrides the source's. */ - if ((ret = - session->open_cursor(session, NULL, cursor, NULL, &srch)) != 0) - return (util_cerr(cursor, "open_cursor", ret)); + complex_table = false; + if (WT_PREFIX_MATCH(uri, "table:")) { + len = strlen("colgroup:") + strlen(name) + 1; + if ((p = malloc(len)) == NULL) + return (util_err(session, errno, NULL)); + (void)snprintf(p, len, "colgroup:%s", name); + cursor->set_key(cursor, p); + if ((ret = cursor->search(cursor)) == 0) { + if ((ret = cursor->get_value(cursor, &v)) != 0) + return (util_cerr(cursor, "get_value", ret)); + if ((*--cfg = strdup(v)) == NULL) + return (util_err(session, errno, NULL)); + if ((ret =__wt_config_getones( + (WT_SESSION_IMPL *)session, + *cfg, "source", &cval)) != 0) + return (util_err( + session, ret, "%s: source entry", p)); + free(p); + len = cval.len + 10; + if ((p = malloc(len)) == NULL) + return (util_err(session, errno, NULL)); + (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); + cursor->set_key(cursor, p); + if ((ret = cursor->search(cursor)) != 0) + return (util_cerr(cursor, "search", ret)); + if ((ret = cursor->get_value(cursor, &v)) != 0) + return (util_cerr(cursor, "get_value", ret)); + if ((*--cfg = strdup(v)) == NULL) + return (util_err(session, errno, NULL)); + } else + complex_table = true; + } - if ((ret = dump_table_config_type( - session, cursor, srch, name, "colgroup:")) == 0) - ret = dump_table_config_type( - session, cursor, srch, name, "index:"); + if (print_config(session, uri, cfg) != 0) + return (1); - if ((tret = srch->close(srch)) != 0) { - tret = util_cerr(cursor, "close", tret); - if (ret == 0) - ret = tret; + if (complex_table) { + /* + * The underlying table configuration function needs a second + * cursor: open one before calling it, it makes error handling + * hugely simpler. + */ + if ((ret = session->open_cursor( + session, "metadata:", NULL, NULL, &srch)) != 0) + return (util_cerr(cursor, "open_cursor", ret)); + + if ((ret = dump_table_config_complex( + session, cursor, srch, name, "colgroup:")) == 0) + ret = dump_table_config_complex( + session, cursor, srch, name, "index:"); + + if ((tret = srch->close(srch)) != 0) { + tret = util_cerr(cursor, "close", tret); + if (ret == 0) + ret = tret; + } } + free(p); + free(_cfg[0]); + free(_cfg[1]); + free(_cfg[2]); return (ret); } /* - * dump_table_config_type -- + * dump_table_config_complex -- * Dump the column groups or indices for a table. */ static int -dump_table_config_type(WT_SESSION *session, +dump_table_config_complex(WT_SESSION *session, WT_CURSOR *cursor, WT_CURSOR *srch, const char *name, const char *entry) { WT_CONFIG_ITEM cval; WT_DECL_RET; - const char *key, *skip, *value, *value_source; + const char *key; + size_t len; int exact; - char *p; + const char *v; + char *p, *cfg[3] = {NULL, NULL, NULL}; /* * Search the file looking for column group and index key/value pairs: * for each one, look up the related source information and append it - * to the base record. + * to the base record, where the column group and index configuration + * overrides the source configuration. */ cursor->set_key(cursor, entry); if ((ret = cursor->search_near(cursor, &exact)) != 0) { @@ -497,27 +596,32 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) if (!WT_PREFIX_MATCH(key, entry)) return (0); - /* Check for a table name match. */ - skip = key + strlen(entry); - if (strncmp( - skip, name, strlen(name)) != 0 || skip[strlen(name)] != ':') + /* + * Check for a table name match. This test will match "simple" + * table column-groups as well as the more complex ones, but + * the previous version of the test was wrong and we're only + * in this function in the case of complex tables. + */ + if (!WT_PREFIX_MATCH(key + strlen(entry), name)) continue; /* Get the value. */ - if ((ret = cursor->get_value(cursor, &value)) != 0) + if ((ret = cursor->get_value(cursor, &v)) != 0) return (util_cerr(cursor, "get_value", ret)); + if ((cfg[1] = strdup(v)) == NULL) + return (util_err(session, errno, NULL)); /* Crack it and get the underlying source. */ if ((ret = __wt_config_getones( - (WT_SESSION_IMPL *)session, value, "source", &cval)) != 0) + (WT_SESSION_IMPL *)session, cfg[1], "source", &cval)) != 0) return ( util_err(session, ret, "%s: source entry", key)); /* Nul-terminate the source entry. */ - if ((p = malloc(cval.len + 10)) == NULL) + len = cval.len + 10; + if ((p = malloc(len)) == NULL) return (util_err(session, errno, NULL)); - (void)strncpy(p, cval.str, cval.len); - p[cval.len] = '\0'; + (void)snprintf(p, len, "%.*s", (int)cval.len, cval.str); srch->set_key(srch, p); if ((ret = srch->search(srch)) != 0) ret = util_err(session, ret, "%s: %s", key, p); @@ -526,16 +630,22 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0) return (1); /* Get the source's value. */ - if ((ret = srch->get_value(srch, &value_source)) != 0) + if ((ret = srch->get_value(srch, &v)) != 0) return (util_cerr(cursor, "get_value", ret)); + if ((cfg[0] = strdup(v)) == NULL) + return (util_err(session, errno, NULL)); /* * The dumped configuration string is the original key plus the - * source's configuration. + * source's configuration, where the values of the original key + * override any source configurations of the same name. */ - if (print_config(session, key, value, value_source) != 0) + if (print_config(session, key, cfg) != 0) return (util_err(session, EIO, NULL)); } + free(cfg[0]); + free(cfg[1]); + if (ret == 0 || ret == WT_NOTFOUND) return (0); return (util_cerr(cursor, "next", ret)); @@ -649,27 +759,21 @@ dup_json_string(const char *str, char **result) * Output a key/value URI pair by combining v1 and v2. */ static int -print_config(WT_SESSION *session, - const char *key, const char *v1, const char *v2) +print_config(WT_SESSION *session, const char *key, char *cfg[]) { WT_DECL_RET; char *value_ret; - const char *cfg[] = { v1, v2, NULL }; /* - * The underlying call will stop if the first string is NULL -- check - * here and swap in that case. + * We have all of the object configuration, but don't have the default + * session.create configuration. Have the underlying library add in the + * defaults and collapse it all into one load configuration string. */ - if (cfg[0] == NULL) { - cfg[0] = cfg[1]; - cfg[1] = NULL; - } - - if ((ret = __wt_config_collapse( + if ((ret = __wt_schema_create_final( (WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0) return (util_err(session, ret, NULL)); ret = printf("%s\n%s\n", key, value_ret); - free((char *)value_ret); + free(value_ret); if (ret < 0) return (util_err(session, EIO, NULL)); return (0); diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c index 183dc3d2d42..f95bc7faaf9 100644 --- a/src/third_party/wiredtiger/test/bloom/test_bloom.c +++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c @@ -160,7 +160,7 @@ run(void) for (i = 0; i < g.c_ops; i++) { item.data = g.entries[i]; if ((ret = __wt_bloom_insert(bloomp, &item)) != 0) - testutil_die(ret, "__wt_bloom_insert: %d", i); + testutil_die(ret, "__wt_bloom_insert: %" PRIu32, i); } testutil_check(__wt_bloom_finalize(bloomp)); @@ -168,7 +168,8 @@ run(void) for (i = 0; i < g.c_ops; i++) { item.data = g.entries[i]; if ((ret = __wt_bloom_get(bloomp, &item)) != 0) { - fprintf(stderr, "get failed at record: %d\n", i); + fprintf(stderr, + "get failed at record: %" PRIu32 "\n", i); testutil_die(ret, "__wt_bloom_get"); } } @@ -201,7 +202,8 @@ run(void) testutil_die(ret, "__wt_bloom_get"); } free((void *)item.data); - printf("Out of %d ops, got %d false positives, %.4f%%\n", + printf( + "Out of %" PRIu32 " ops, got %" PRIu32 " false positives, %.4f%%\n", g.c_ops, fp, 100.0 * fp/g.c_ops); testutil_check(__wt_bloom_drop(bloomp, NULL)); } diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index 0f28a86b675..c5524b3c63e 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -136,7 +136,7 @@ main(int argc, char *argv[]) printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { - printf(" %d: %u workers, %u tables\n", + printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); (void)cleanup(); /* Clean up previous runs */ diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c index 68d2f092c60..d8cfc0c1421 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c @@ -154,8 +154,10 @@ main(int argc, char *argv[]) printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; runs == 0 || cnt <= runs; ++cnt) { - printf(" %d: %u reverse scanners, %u writers\n", cnt, - (int)cfg->reverse_scanners, (int)cfg->append_inserters); + printf( + " %d: %" PRIu64 + " reverse scanners, %" PRIu64 " writers\n", + cnt, cfg->reverse_scanners, cfg->append_inserters); shutdown(); /* Clean up previous runs */ diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c index 4cd92e7b590..ea15f1ee80d 100644 --- a/src/third_party/wiredtiger/test/fops/file.c +++ b/src/third_party/wiredtiger/test/fops/file.c @@ -147,7 +147,7 @@ obj_create_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%d", uri, ++uid); + (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); diff --git a/src/third_party/wiredtiger/test/fops/fops.c b/src/third_party/wiredtiger/test/fops/fops.c index fbc9d9c6048..3333ff16858 100644 --- a/src/third_party/wiredtiger/test/fops/fops.c +++ b/src/third_party/wiredtiger/test/fops/fops.c @@ -109,7 +109,7 @@ fop(void *arg) __wt_random_init(&rnd); for (i = 0; i < nops; ++i, __wt_yield()) - switch (__wt_random(&rnd) % 9) { + switch (__wt_random(&rnd) % 10) { case 0: ++s->bulk; obj_bulk(); diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 56657940514..2b1463bd0e3 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -67,6 +67,13 @@ copy_file(const char *name) "cp %s/%s %s/%s", g.home, name, g.home_backup, name); testutil_checkfmt(system(cmd), "backup copy: %s", cmd); free(cmd); + + len = strlen(g.home) + strlen(g.home_backup2) + strlen(name) * 2 + 20; + cmd = dmalloc(len); + (void)snprintf(cmd, len, + "cp %s/%s %s/%s", g.home, name, g.home_backup2, name); + testutil_checkfmt(system(cmd), "backup copy: %s", cmd); + free(cmd); } /* diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index c54fd061736..a129c5395fd 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -109,6 +109,7 @@ typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ + char *home_backup2; /* Saved Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ char *home_bdb; /* BDB directory */ char *home_config; /* Run CONFIG file path */ diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c index 347b2ea1db3..2e4c869366c 100644 --- a/src/third_party/wiredtiger/test/format/util.c +++ b/src/third_party/wiredtiger/test/format/util.c @@ -310,6 +310,10 @@ path_setup(const char *home) g.home_backup = dmalloc(len); snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + len = strlen(g.home) + strlen("BACKUP2") + 2; + g.home_backup2 = dmalloc(len); + snprintf(g.home_backup2, len, "%s/%s", g.home, "BACKUP2"); + /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; g.home_bdb = dmalloc(len); @@ -340,13 +344,15 @@ path_setup(const char *home) /* Backup directory initialize command, remove and re-create it. */ #undef CMD #ifdef _WIN32 -#define CMD "del /s /q >:nul && mkdir %s" +#define CMD "del /s /q >:nul && mkdir %s %s" #else -#define CMD "rm -rf %s && mkdir %s" +#define CMD "rm -rf %s %s && mkdir %s %s" #endif - len = strlen(g.home_backup) * 2 + strlen(CMD) + 1; + len = strlen(g.home_backup) * 2 + + strlen(g.home_backup2) * 2 + strlen(CMD) + 1; g.home_backup_init = dmalloc(len); - snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup); + snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup2, + g.home_backup, g.home_backup2); /* * Salvage command, save the interesting files so we can replay the diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index a0e57dc2bee..81e484296e2 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -53,7 +53,8 @@ compressor(uint32_t compress_flag) default: break; } - testutil_die(EINVAL, "illegal compression flag: 0x%x", compress_flag); + testutil_die(EINVAL, + "illegal compression flag: %#" PRIx32, compress_flag); } /* @@ -71,7 +72,8 @@ encryptor(uint32_t encrypt_flag) default: break; } - testutil_die(EINVAL, "illegal encryption flag: 0x%x", encrypt_flag); + testutil_die(EINVAL, + "illegal encryption flag: %#" PRIx32, encrypt_flag); } static int @@ -313,7 +315,7 @@ wts_create(void) p += snprintf(p, REMAIN(p, end), "key_format=%s," "allocation_size=512,%s" - "internal_page_max=%d,leaf_page_max=%d", + "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32, (g.type == ROW) ? "u" : "r", g.c_firstfit ? "block_allocation=first," : "", maxintlpage, maxleafpage); @@ -325,15 +327,15 @@ wts_create(void) maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) p += snprintf(p, REMAIN(p, end), - ",internal_key_max=%d", maxintlkey); + ",internal_key_max=%" PRIu32, maxintlkey); maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) p += snprintf(p, REMAIN(p, end), - ",leaf_key_max=%d", maxleafkey); + ",leaf_key_max=%" PRIu32, maxleafkey); maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) p += snprintf(p, REMAIN(p, end), - ",leaf_value_max=%d", maxleafvalue); + ",leaf_value_max=%" PRIu32, maxleafvalue); switch (g.type) { case FIX: @@ -361,7 +363,7 @@ wts_create(void) ",huffman_value=english"); if (g.c_dictionary) p += snprintf(p, REMAIN(p, end), - ",dictionary=%d", mmrand(NULL, 123, 517)); + ",dictionary=%" PRIu32, mmrand(NULL, 123, 517)); break; } diff --git a/src/third_party/wiredtiger/test/manydbs/Makefile.am b/src/third_party/wiredtiger/test/manydbs/Makefile.am new file mode 100644 index 00000000000..53559b25243 --- /dev/null +++ b/src/third_party/wiredtiger/test/manydbs/Makefile.am @@ -0,0 +1,13 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \ + -I$(top_srcdir)/test/utility + +noinst_PROGRAMS = t +t_SOURCES = manydbs.c +t_LDADD = $(top_builddir)/libwiredtiger.la +t_LDFLAGS = -static + +# Run this during a "make check" smoke test. +TESTS = smoke.sh + +clean-local: + rm -rf WiredTiger* *.core __* diff --git a/src/third_party/wiredtiger/test/manydbs/manydbs.c b/src/third_party/wiredtiger/test/manydbs/manydbs.c new file mode 100644 index 00000000000..1d3412a7b06 --- /dev/null +++ b/src/third_party/wiredtiger/test/manydbs/manydbs.c @@ -0,0 +1,264 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <sys/wait.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef _WIN32 +#include <unistd.h> +#endif + +#include <wiredtiger.h> + +#include "test_util.i" + +#define HOME_SIZE 512 +#define HOME_BASE "WT_HOME" +static char home[HOME_SIZE]; /* Base home directory */ +static char hometmp[HOME_SIZE]; /* Each conn home directory */ +static const char *progname; /* Program name */ +static const char * const uri = "table:main"; + +#define WTOPEN_CFG_COMMON \ + "create,log=(file_max=10M,archive=false,enabled)," \ + "statistics=(fast),statistics_log=(wait=5)," +#define WT_CONFIG0 \ + WTOPEN_CFG_COMMON \ + "transaction_sync=(enabled=false)" +#define WT_CONFIG1 \ + WTOPEN_CFG_COMMON \ + "transaction_sync=(enabled,method=none)" +#define WT_CONFIG2 \ + WTOPEN_CFG_COMMON \ + "transaction_sync=(enabled,method=fsync)" + +#define MAX_DBS 10 +#define MAX_IDLE_TIME 30 +#define IDLE_INCR 5 + +#define MAX_KV 100 +#define MAX_VAL 128 + +static void +usage(void) +{ + fprintf(stderr, + "usage: %s [-I] [-D maxdbs] [-h dir]\n", progname); + exit(EXIT_FAILURE); +} + +extern int __wt_optind; +extern char *__wt_optarg; + +void (*custom_die)(void) = NULL; + +WT_CONNECTION **connections = NULL; +WT_CURSOR **cursors = NULL; +WT_RAND_STATE rnd; +WT_SESSION **sessions = NULL; + +static int +get_stat(WT_SESSION *stat_session, int stat_field, uint64_t *valuep) +{ + WT_CURSOR *statc; + const char *desc, *pvalue; + int ret; + + testutil_check(stat_session->open_cursor(stat_session, + "statistics:", NULL, NULL, &statc)); + statc->set_key(statc, stat_field); + if ((ret = statc->search(statc)) != 0) + return (ret); + + ret = statc->get_value(statc, &desc, &pvalue, valuep); + testutil_check(statc->close(statc)); + return (ret); +} + +static int +run_ops(int dbs) +{ + WT_ITEM data; + int db_set, i, key; + uint32_t db; + uint8_t buf[MAX_VAL]; + + memset(buf, 0, sizeof(buf)); + for (i = 0; i < MAX_VAL; ++i) + buf[i] = (uint8_t)__wt_random(&rnd); + data.data = buf; + /* + * Write a small amount of data into a random subset of the databases. + */ + db_set = dbs / 4; + for (i = 0; i < db_set; ++i) { + db = __wt_random(&rnd) % (uint32_t)dbs; + printf("Write to database %" PRIu32 "\n", db); + for (key = 0; key < MAX_KV; ++key) { + data.size = __wt_random(&rnd) % MAX_VAL; + cursors[db]->set_key(cursors[db], key); + cursors[db]->set_value(cursors[db], &data); + testutil_check(cursors[db]->insert(cursors[db])); + } + } + return (0); +} + +int +main(int argc, char *argv[]) +{ + uint64_t cond_reset, cond_wait; + uint64_t *cond_reset_orig; + int cfg, ch, dbs, i; + bool idle; + const char *working_dir, *wt_cfg; + char cmd[128]; + + if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) + progname = argv[0]; + else + ++progname; + dbs = MAX_DBS; + working_dir = HOME_BASE; + idle = false; + while ((ch = __wt_getopt(progname, argc, argv, "D:h:I")) != EOF) + switch (ch) { + case 'D': + dbs = atoi(__wt_optarg); + break; + case 'h': + working_dir = __wt_optarg; + break; + case 'I': + idle = true; + break; + default: + usage(); + } + argc -= __wt_optind; + argv += __wt_optind; + if (argc != 0) + usage(); + + /* + * Allocate arrays for connection handles, sessions, statistics + * cursors and, if needed, data cursors. + */ + if ((connections = calloc( + (size_t)dbs, sizeof(WT_CONNECTION *))) == NULL) + testutil_die(ENOMEM, "connection array malloc"); + if ((sessions = calloc( + (size_t)dbs, sizeof(WT_SESSION *))) == NULL) + testutil_die(ENOMEM, "session array malloc"); + if ((cond_reset_orig = calloc((size_t)dbs, sizeof(uint64_t))) == NULL) + testutil_die(ENOMEM, "orig stat malloc"); + if (!idle && ((cursors = calloc( + (size_t)dbs, sizeof(WT_CURSOR *))) == NULL)) + testutil_die(ENOMEM, "cursor array malloc"); + memset(cmd, 0, sizeof(cmd)); + /* + * Set up all the directory names. + */ + testutil_work_dir_from_path(home, HOME_SIZE, working_dir); + testutil_make_work_dir(home); + __wt_random_init(&rnd); + for (i = 0; i < dbs; ++i) { + snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i); + testutil_make_work_dir(hometmp); + /* + * Open each database. Rotate different configurations + * among them. Open a session and statistics cursor. + * If writing data, create the table and open a data cursor. + */ + cfg = i % 3; + if (cfg == 0) + wt_cfg = WT_CONFIG0; + else if (cfg == 1) + wt_cfg = WT_CONFIG1; + else + wt_cfg = WT_CONFIG2; + testutil_check(wiredtiger_open( + hometmp, NULL, wt_cfg, &connections[i])); + testutil_check(connections[i]->open_session(connections[i], + NULL, NULL, &sessions[i])); + if (!idle) { + testutil_check(sessions[i]->create(sessions[i], + uri, "key_format=Q,value_format=u")); + testutil_check(sessions[i]->open_cursor(sessions[i], + uri, NULL, NULL, &cursors[i])); + } + } + + sleep(10); + + /* + * Record original reset setting. There could have been some + * activity during the creation period. + */ + for (i = 0; i < dbs; ++i) + testutil_check(get_stat(sessions[i], + WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset_orig[i])); + for (i = 0; i < MAX_IDLE_TIME; i += IDLE_INCR) { + if (!idle) + testutil_check(run_ops(dbs)); + printf("Sleep %d (%d of %d)\n", IDLE_INCR, i, MAX_IDLE_TIME); + sleep(IDLE_INCR); + } + for (i = 0; i < dbs; ++i) { + testutil_check(get_stat(sessions[i], + WT_STAT_CONN_COND_AUTO_WAIT_RESET, &cond_reset)); + testutil_check(get_stat(sessions[i], + WT_STAT_CONN_COND_AUTO_WAIT, &cond_wait)); + /* + * On an idle workload there should be no resets of condition + * variables during the idle period. Even with a light + * workload, resets should not be very common. We look for 5%. + */ + if (idle && cond_reset != cond_reset_orig[i]) + testutil_die(ERANGE, + "condition reset on idle connection %d of %" PRIu64, + i, cond_reset); + if (!idle && cond_reset > cond_wait / 20) + testutil_die(ERANGE, "connection %d condition reset %" + PRIu64 " exceeds 5%% of %" PRIu64, + i, cond_reset, cond_wait); + testutil_check(connections[i]->close(connections[i], NULL)); + } + + /* Cleanup allocated memory. */ + free(connections); + free(sessions); + free(cond_reset_orig); + if (!idle) + free(cursors); + + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/manydbs/smoke.sh b/src/third_party/wiredtiger/test/manydbs/smoke.sh new file mode 100755 index 00000000000..c0e2976f154 --- /dev/null +++ b/src/third_party/wiredtiger/test/manydbs/smoke.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +set -e + +# Smoke-test format as part of running "make check". +# Run with: +# 1. The defaults +# 2. Set idle flag to turn off operations. +# 3. More dbs. +# +echo "manydbs: default with operations turned on" +$TEST_WRAPPER ./t +echo "manydbs: totally idle databases" +$TEST_WRAPPER ./t -I +echo "manydbs: 40 databases with operations" +$TEST_WRAPPER ./t -D 40 +echo "manydbs: 40 idle databases" +$TEST_WRAPPER ./t -I -D 40 diff --git a/src/third_party/wiredtiger/test/readonly/Makefile.am b/src/third_party/wiredtiger/test/readonly/Makefile.am index 384e197a1f8..3abcd2386a1 100644 --- a/src/third_party/wiredtiger/test/readonly/Makefile.am +++ b/src/third_party/wiredtiger/test/readonly/Makefile.am @@ -10,4 +10,4 @@ t_LDFLAGS = -static TESTS = smoke.sh clean-local: - rm -rf WiredTiger* *.core __* + rm -rf WT_RD* WiredTiger* *.core __* diff --git a/src/third_party/wiredtiger/test/readonly/readonly.c b/src/third_party/wiredtiger/test/readonly/readonly.c index 100ccbf81b7..41400da2605 100644 --- a/src/third_party/wiredtiger/test/readonly/readonly.c +++ b/src/third_party/wiredtiger/test/readonly/readonly.c @@ -42,9 +42,13 @@ #define HOME_SIZE 512 static char home[HOME_SIZE]; /* Program working dir lock file */ -static char home_wr[HOME_SIZE]; /* Writable dir copy no lock file */ -static char home_rd[HOME_SIZE]; /* Read-only dir */ -static char home_rd2[HOME_SIZE]; /* Read-only dir no lock file */ +#define HOME_WR_SUFFIX ".WRNOLOCK" /* Writable dir copy no lock file */ +static char home_wr[HOME_SIZE + sizeof(HOME_WR_SUFFIX)]; +#define HOME_RD_SUFFIX ".RD" /* Read-only dir */ +static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)]; +#define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */ +static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)]; + static const char *progname; /* Program name */ static const char *saved_argv0; /* Program command */ static const char * const uri = "table:main"; @@ -87,13 +91,14 @@ run_child(const char *homedir, int op, int expect) cfg = ENV_CONFIG_RD; else cfg = ENV_CONFIG_WR; - ret = wiredtiger_open(homedir, NULL, cfg, &conn); - if (expect == EXPECT_SUCCESS && ret != 0) - testutil_die(ret, "wiredtiger_open success err"); - if (expect == EXPECT_ERR) { - if (ret == 0) + if ((ret = wiredtiger_open(homedir, NULL, cfg, &conn)) == 0) { + if (expect == EXPECT_ERR) + testutil_die( + ret, "wiredtiger_open expected error, succeeded"); + } else { + if (expect == EXPECT_SUCCESS) testutil_die( - ret, "wiredtiger_open expected err succeeded"); + ret, "wiredtiger_open expected success, error"); /* * If we expect an error and got one, we're done. */ @@ -207,17 +212,14 @@ main(int argc, char *argv[]) if (argc != 0) usage(); - memset(buf, 0, sizeof(buf)); /* * Set up all the directory names. */ - testutil_work_dir_from_path(home, 512, working_dir); - strncpy(home_wr, home, HOME_SIZE); - strcat(home_wr, ".WRNOLOCK"); - strncpy(home_rd, home, HOME_SIZE); - strcat(home_rd, ".RD"); - strncpy(home_rd2, home, HOME_SIZE); - strcat(home_rd2, ".RDNOLOCK"); + testutil_work_dir_from_path(home, sizeof(home), working_dir); + (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX); + (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX); + (void)snprintf( + home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX); if (!child) { testutil_make_work_dir(home); testutil_make_work_dir(home_wr); @@ -260,6 +262,7 @@ main(int argc, char *argv[]) /* * Write data into the table and then cleanly shut down connection. */ + memset(buf, 0, sizeof(buf)); data.data = buf; data.size = MAX_VAL; for (i = 0; i < MAX_KV; ++i) { @@ -329,7 +332,8 @@ main(int argc, char *argv[]) * the child even though it should not be. So use 'system' to spawn * an entirely new process. */ - (void)snprintf(cmd, sizeof(cmd), "%s -R", saved_argv0); + (void)snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system"); /* @@ -341,7 +345,8 @@ main(int argc, char *argv[]) /* * Scenario 2. Run child with writable config. */ - (void)snprintf(cmd, sizeof(cmd), "%s -W", saved_argv0); + (void)snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system"); @@ -362,7 +367,8 @@ main(int argc, char *argv[]) /* * Scenario 3. Child read-only. */ - (void)snprintf(cmd, sizeof(cmd), "%s -R", saved_argv0); + (void)snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system"); if (WEXITSTATUS(status) != 0) @@ -371,7 +377,8 @@ main(int argc, char *argv[]) /* * Scenario 4. Run child with writable config. */ - (void)snprintf(cmd, sizeof(cmd), "%s -W", saved_argv0); + (void)snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); if ((status = system(cmd)) < 0) testutil_die(status, "system"); if (WEXITSTATUS(status) != 0) diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index c9cc10d2db3..f9c3ed28814 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -249,9 +249,10 @@ main(int argc, char *argv[]) if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (absent) { - printf("%u record(s) absent from %u\n", absent, count); + printf("%" PRIu32 " record(s) absent from %" PRIu32 "\n", + absent, count); return (EXIT_FAILURE); } - printf("%u records verified\n", count); + printf("%" PRIu32 " records verified\n", count); return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c index 23269e99d35..67fdb932c27 100644 --- a/src/third_party/wiredtiger/test/recovery/truncated-log.c +++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c @@ -156,14 +156,16 @@ fill_db(void) "%" PRIu32 " %" PRIu32 "\n", save_lsn.l.offset, i - 1) == -1) testutil_die(errno, "fprintf"); - if (fclose(fp) != 0) - testutil_die(errno, "fclose"); - abort(); + break; } } first = false; } } + if (fclose(fp) != 0) + testutil_die(errno, "fclose"); + abort(); + /* NOTREACHED */ } extern int __wt_optind; @@ -243,8 +245,10 @@ main(int argc, char *argv[]) * The offset is the beginning of the last record. Truncate to * the middle of that last record (i.e. ahead of that offset). */ + if (offset > UINT64_MAX - V_SIZE) + testutil_die(ERANGE, "offset"); new_offset = offset + V_SIZE; - printf("Parent: Truncate to %u\n", (uint32_t)new_offset); + printf("Parent: Truncate to %" PRIu64 "\n", new_offset); if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0) testutil_die(errno, "truncate"); @@ -267,9 +271,10 @@ main(int argc, char *argv[]) if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (count > max_key) { - printf("expected %u records found %u\n", max_key, count); + printf("expected %" PRIu32 " records found %" PRIu32 "\n", + max_key, count); return (EXIT_FAILURE); } - printf("%u records verified\n", count); + printf("%" PRIu32 " records verified\n", count); return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/test/suite/helper.py b/src/third_party/wiredtiger/test/suite/helper.py index 3c460e23d08..f85d708880f 100644 --- a/src/third_party/wiredtiger/test/suite/helper.py +++ b/src/third_party/wiredtiger/test/suite/helper.py @@ -107,7 +107,10 @@ def copy_wiredtiger_home(olddir, newdir, aligned=True): for fname in os.listdir(olddir): fullname = os.path.join(olddir, fname) # Skip lock file, on Windows it is locked. - if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname: + # Skip temporary log files. + if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname and \ + "WiredTigerTmplog" not in fullname and \ + "WiredTigerPreplog" not in fullname: # Use a dd command that does not align on a block boundary. if aligned: shutil.copy(fullname, newdir) @@ -196,31 +199,36 @@ def complex_populate_index_count(): # config: prefix of the session.create configuration string # rows: entries to insert def complex_populate(self, uri, config, rows): - complex_populate_type(self, uri, config, rows, '') + complex_populate_type(self, uri, config, '', rows, '') +def complex_populate_cgconfig(self, uri, config, rows): + complex_populate_type(self, uri, config, config, rows, '') def complex_populate_lsm(self, uri, config, rows): - complex_populate_type(self, uri, config, rows, 'type=lsm') -def complex_populate_type(self, uri, config, rows, type): + complex_populate_type(self, uri, config, '', rows, 'type=lsm') +def complex_populate_cgconfig_lsm(self, uri, config, rows): + complex_populate_type(self, uri, config, config, rows, 'type=lsm') +def complex_populate_type(self, uri, config, cgconfig, rows, type): self.session.create(uri, config + ',value_format=SiSS,' + 'columns=(record,column2,column3,column4,column5),' + 'colgroups=(cgroup1,cgroup2,cgroup3,cgroup4,cgroup5,cgroup6)') cgname = 'colgroup:' + uri.split(":")[1] - self.session.create(cgname + ':cgroup1', 'columns=(column2)' + ',' + type) - self.session.create(cgname + ':cgroup2', 'columns=(column3)' + ',' + type) - self.session.create(cgname + ':cgroup3', 'columns=(column4)' + ',' + type) + cgcfg = ',' + cgconfig + ',' + type + self.session.create(cgname + ':cgroup1', 'columns=(column2)' + ',' + cgcfg) + self.session.create(cgname + ':cgroup2', 'columns=(column3)' + ',' + cgcfg) + self.session.create(cgname + ':cgroup3', 'columns=(column4)' + ',' + cgcfg) self.session.create( - cgname + ':cgroup4', 'columns=(column2,column3)' + ',' + type) + cgname + ':cgroup4', 'columns=(column2,column3)' + ',' + cgcfg) self.session.create( - cgname + ':cgroup5', 'columns=(column3,column4)' + ',' + type) + cgname + ':cgroup5', 'columns=(column3,column4)' + ',' + cgcfg) self.session.create( - cgname + ':cgroup6', 'columns=(column2,column4,column5)' + ',' + type) + cgname + ':cgroup6', 'columns=(column2,column4,column5)' + ',' + cgcfg) indxname = 'index:' + uri.split(":")[1] - self.session.create(indxname + ':indx1', 'columns=(column2)' + ',' + type) - self.session.create(indxname + ':indx2', 'columns=(column3)' + ',' + type) - self.session.create(indxname + ':indx3', 'columns=(column4)' + ',' + type) + self.session.create(indxname + ':indx1', 'columns=(column2)' + ',' + cgcfg) + self.session.create(indxname + ':indx2', 'columns=(column3)' + ',' + cgcfg) + self.session.create(indxname + ':indx3', 'columns=(column4)' + ',' + cgcfg) self.session.create( - indxname + ':indx4', 'columns=(column2,column4)' + ',' + type) + indxname + ':indx4', 'columns=(column2,column4)' + ',' + cgcfg) cursor = self.session.open_cursor(uri, None) for i in range(1, rows + 1): cursor[key_populate(cursor, i)] = \ @@ -228,9 +236,9 @@ def complex_populate_type(self, uri, config, rows, type): cursor.close() # add some indices after populating self.session.create( - indxname + ':indx5', 'columns=(column3,column5)' + ',' + type) + indxname + ':indx5', 'columns=(column3,column5)' + ',' + cgcfg) self.session.create( - indxname + ':indx6', 'columns=(column3,column5,column4)' + ',' + type) + indxname + ':indx6', 'columns=(column3,column5,column4)' + ',' + cgcfg) def complex_populate_colgroup_name(self, uri, i): return 'colgroup:' + uri.split(":")[1] + ':cgroup' + str(i + 1) diff --git a/src/third_party/wiredtiger/test/suite/test_bug008.py b/src/third_party/wiredtiger/test/suite/test_bug008.py index 8f0526d9cef..0243887e258 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug008.py +++ b/src/third_party/wiredtiger/test/suite/test_bug008.py @@ -33,65 +33,208 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate from wtscenario import check_scenarios -# Tests for invisible updates. +# Test search/search-near operations, including invisible values and keys +# past the end of the table. class test_bug008(wttest.WiredTigerTestCase): + uri = 'file:test_bug008' # This is a btree layer test. scenarios = check_scenarios([ - ('fix', dict(fmt='key_format=r,value_format=8t', empty=1)), - ('row', dict(fmt='key_format=S', empty=0)), - ('var', dict(fmt='key_format=r', empty=0)) + ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)), + ('row', dict(fmt='key_format=S', empty=0, colvar=0)), + ('var', dict(fmt='key_format=r', empty=0, colvar=1)) ]) + # Verify cursor search and search-near operations in an empty table. + def test_search_empty(self): + # Create the object and open a cursor. + self.session.create(self.uri, self.fmt) + cursor = self.session.open_cursor(self.uri, None) + + # Search for a record past the end of the table, which should fail. + cursor.set_key(key_populate(cursor, 100)) + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + + # Search-near for a record past the end of the table, which should fail. + cursor.set_key(key_populate(cursor, 100)) + self.assertEqual(cursor.search_near(), wiredtiger.WT_NOTFOUND) + + # Verify cursor search and search-near operations at and past the end of + # a file, with a set of on-page visible records. + def test_search_eot(self): + # Populate the tree and reopen the connection, forcing it to disk + # and moving the records to an on-page format. + simple_populate(self, self.uri, self.fmt, 100) + self.reopen_conn() + + # Open a cursor. + cursor = self.session.open_cursor(self.uri, None) + + # Search for a record at the end of the table, which should succeed. + cursor.set_key(key_populate(cursor, 100)) + self.assertEqual(cursor.search(), 0) + self.assertEqual(cursor.get_key(), key_populate(cursor, 100)) + self.assertEqual(cursor.get_value(), value_populate(cursor, 100)) + + # Search-near for a record at the end of the table, which should + # succeed, returning the last record. + cursor.set_key(key_populate(cursor, 100)) + self.assertEqual(cursor.search_near(), 0) + self.assertEqual(cursor.get_key(), key_populate(cursor, 100)) + self.assertEqual(cursor.get_value(), value_populate(cursor, 100)) + + # Search for a record past the end of the table, which should fail. + cursor.set_key(key_populate(cursor, 200)) + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + + # Search-near for a record past the end of the table, which should + # succeed, returning the last record. + cursor.set_key(key_populate(cursor, 200)) + self.assertEqual(cursor.search_near(), -1) + self.assertEqual(cursor.get_key(), key_populate(cursor, 100)) + self.assertEqual(cursor.get_value(), value_populate(cursor, 100)) + + # Verify cursor search-near operations before and after a set of + # column-store duplicates. + def test_search_duplicate(self): + if self.colvar == 0: + return + + # Populate the tree. + simple_populate(self, self.uri, self.fmt, 105) + + # Set up deleted records before and after a set of duplicate records, + # and make sure search/search-near returns the correct record. + cursor = self.session.open_cursor(self.uri, None) + for i in range(20, 100): + cursor[key_populate(cursor, i)] = '=== IDENTICAL VALUE ===' + for i in range(15, 25): + cursor.set_key(key_populate(cursor, i)) + self.assertEqual(cursor.remove(), 0) + for i in range(95, 106): + cursor.set_key(key_populate(cursor, i)) + self.assertEqual(cursor.remove(), 0) + cursor.close() + + # Reopen the connection, forcing it to disk and moving the records to + # an on-page format. + self.reopen_conn() + + # Open a cursor. + cursor = self.session.open_cursor(self.uri, None) + + # Search-near for a record in the deleted set before the duplicate set, + # which should succeed, returning the first record in the duplicate set. + cursor.set_key(key_populate(cursor, 18)) + self.assertEqual(cursor.search_near(), 1) + self.assertEqual(cursor.get_key(), key_populate(cursor, 25)) + + # Search-near for a record in the deleted set after the duplicate set, + # which should succeed, returning the last record in the duplicate set. + cursor.set_key(key_populate(cursor, 98)) + self.assertEqual(cursor.search_near(), -1) + self.assertEqual(cursor.get_key(), key_populate(cursor, 94)) + # Verify cursor search and search-near operations on a file with a set of # on-page visible records, and a set of insert-list invisible records. def test_search_invisible_one(self): - uri = 'file:test_bug008' # This is a btree layer test. + # Populate the tree. + simple_populate(self, self.uri, self.fmt, 100) - # Populate the tree and reopen the connection, forcing it to disk - # and moving the records to an on-page format. - simple_populate(self, uri, self.fmt, 100) + # Delete a range of records. + for i in range(5, 10): + cursor = self.session.open_cursor(self.uri, None) + cursor.set_key(key_populate(cursor, i)) + self.assertEqual(cursor.remove(), 0) + + # Reopen the connection, forcing it to disk and moving the records to + # an on-page format. self.reopen_conn() - # Begin a transaction, and add some additional records. + # Add updates to the existing records (in both the deleted an undeleted + # range), as well as some new records after the end. Put the updates in + # a separate transaction so they're invisible to another cursor. self.session.begin_transaction() - cursor = self.session.open_cursor(uri, None) + cursor = self.session.open_cursor(self.uri, None) + for i in range(5, 10): + cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000) + for i in range(30, 40): + cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000) for i in range(100, 140): - cursor[key_populate(cursor, i)] = value_populate(cursor, i) + cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000) # Open a separate session and cursor. s = self.conn.open_session() - cursor = s.open_cursor(uri, None) + cursor = s.open_cursor(self.uri, None) - # Search for an invisible record. - cursor.set_key(key_populate(cursor, 130)) - if self.empty: - # Invisible updates to fixed-length column-store objects are - # invisible to the reader, but the fact that they exist past - # the end of the initial records causes the instantiation of - # empty records: confirm successful return of an empty row. - cursor.search() - self.assertEqual(cursor.get_key(), 130) - self.assertEqual(cursor.get_value(), 0) - else: - # Otherwise, we should not find any matching records. - self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + # Search for an existing record in the deleted range, should not find + # it. + for i in range(5, 10): + cursor.set_key(key_populate(cursor, i)) + if self.empty: + # Fixed-length column-store rows always exist. + self.assertEqual(cursor.search(), 0) + self.assertEqual(cursor.get_key(), i) + self.assertEqual(cursor.get_value(), 0) + else: + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) - # Search-near for an invisible record, which should succeed, returning - # the last visible record. - cursor.set_key(key_populate(cursor, 130)) - cursor.search_near() - if self.empty: - # Invisible updates to fixed-length column-store objects are - # invisible to the reader, but the fact that they exist past - # the end of the initial records causes the instantiation of - # empty records: confirm successful return of an empty row. - cursor.search() - self.assertEqual(cursor.get_key(), 130) - self.assertEqual(cursor.get_value(), 0) - else: - # Otherwise, we should find the closest record for which we can see - # the value. - self.assertEqual(cursor.get_key(), key_populate(cursor, 100)) - self.assertEqual(cursor.get_value(), value_populate(cursor, 100)) + # Search for an existing record in the updated range, should see the + # original value. + for i in range(30, 40): + cursor.set_key(key_populate(cursor, i)) + self.assertEqual(cursor.search(), 0) + self.assertEqual(cursor.get_key(), key_populate(cursor, i)) + + # Search for a added record, should not find it. + for i in range(120, 130): + cursor.set_key(key_populate(cursor, i)) + if self.empty: + # Invisible updates to fixed-length column-store objects are + # invisible to the reader, but the fact that they exist past + # the end of the initial records causes the instantiation of + # empty records: confirm successful return of an empty row. + self.assertEqual(cursor.search(), 0) + self.assertEqual(cursor.get_key(), i) + self.assertEqual(cursor.get_value(), 0) + else: + # Otherwise, we should not find any matching records. + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + + # Search-near for an existing record in the deleted range, should find + # the next largest record. (This depends on the implementation behavior + # which currently includes a bias to prefix search.) + for i in range(5, 10): + cursor.set_key(key_populate(cursor, i)) + if self.empty: + # Fixed-length column-store rows always exist. + self.assertEqual(cursor.search_near(), 0) + self.assertEqual(cursor.get_key(), i) + self.assertEqual(cursor.get_value(), 0) + else: + self.assertEqual(cursor.search_near(), 1) + self.assertEqual(cursor.get_key(), key_populate(cursor, 10)) + + # Search-near for an existing record in the updated range, should see + # the original value. + for i in range(30, 40): + cursor.set_key(key_populate(cursor, i)) + self.assertEqual(cursor.search_near(), 0) + self.assertEqual(cursor.get_key(), key_populate(cursor, i)) + + # Search-near for an added record, should find the previous largest + # record. + for i in range(120, 130): + cursor.set_key(key_populate(cursor, i)) + if self.empty: + # Invisible updates to fixed-length column-store objects are + # invisible to the reader, but the fact that they exist past + # the end of the initial records causes the instantiation of + # empty records: confirm successful return of an empty row. + self.assertEqual(cursor.search_near(), 0) + self.assertEqual(cursor.get_key(), i) + self.assertEqual(cursor.get_value(), 0) + else: + self.assertEqual(cursor.search_near(), -1) + self.assertEqual(cursor.get_key(), key_populate(cursor, 100)) # Verify cursor search and search-near operations on a file with a set of # on-page visible records, a set of insert-list visible records, and a set @@ -101,28 +244,26 @@ class test_bug008(wttest.WiredTigerTestCase): # fallback happens, whether the correct position is in the page slots or # the insert list.) def test_search_invisible_two(self): - uri = 'file:test_bug008' # This is a btree layer test. - # Populate the tree and reopen the connection, forcing it to disk # and moving the records to an on-page format. - simple_populate(self, uri, self.fmt, 100) + simple_populate(self, self.uri, self.fmt, 100) self.reopen_conn() # Add some additional visible records. - cursor = self.session.open_cursor(uri, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(100, 120): cursor[key_populate(cursor, i)] = value_populate(cursor, i) cursor.close() # Begin a transaction, and add some additional records. self.session.begin_transaction() - cursor = self.session.open_cursor(uri, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(120, 140): cursor[key_populate(cursor, i)] = value_populate(cursor, i) # Open a separate session and cursor. s = self.conn.open_session() - cursor = s.open_cursor(uri, None) + cursor = s.open_cursor(self.uri, None) # Search for an invisible record. cursor.set_key(key_populate(cursor, 130)) diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py index 9955944f73d..6e1ad7814ed 100644 --- a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py @@ -185,7 +185,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase): # Check dropping all checkpoints fails. msg = '/checkpoints cannot be dropped/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.session.checkpoint("name=checkpoint-2"), msg) + lambda: self.session.checkpoint("force,name=checkpoint-2"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.checkpoint("drop=(checkpoint-2)"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, diff --git a/src/third_party/wiredtiger/test/suite/test_collator.py b/src/third_party/wiredtiger/test/suite/test_collator.py new file mode 100644 index 00000000000..34b5c20247f --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_collator.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +import wiredtiger, wttest, run +from wtscenario import check_scenarios, number_scenarios + +# test_collator.py +# Test indices using a custom extractor and collator. +class test_collator(wttest.WiredTigerTestCase): + """ + Test indices with a custom extractor to create an index, + with our own collator. + Our set of rows looks like a multiplication table: + row '0': '0,0,0,0' + row '1': '0,1,2,3' + row '2': '0,2,4,6' + with the twist that entries are mod 100. So, looking further: + row '40': '0,40,80,20' + + Each column is placed into its own index. Our collator reverses + the values. + """ + nentries = 100 + nindices = 4 + + # Return the wiredtiger_open extension argument for a shared library. + def extensionArg(self, exts): + extfiles = [] + for ext in exts: + (dirname, name, libname) = ext + if name != None and name != 'none': + testdir = os.path.dirname(__file__) + extdir = os.path.join(run.wt_builddir, 'ext', dirname) + extfile = os.path.join( + extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') + if not os.path.exists(extfile): + self.skipTest('extension "' + extfile + '" not built') + if not extfile in extfiles: + extfiles.append(extfile) + if len(extfiles) == 0: + return '' + else: + return ',extensions=["' + '","'.join(extfiles) + '"]' + + # Override WiredTigerTestCase, we have extensions. + def setUpConnectionOpen(self, dir): + extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor'), + ('collators', 'revint', 'revint_collator')]) + connarg = 'create,error_prefix="{0}: ",{1}'.format( + self.shortid(), extarg) + conn = self.wiredtiger_open(dir, connarg) + self.pr(`conn`) + return conn + + def create_indices(self): + # Create self.nindices index files, each with a column from the CSV + for i in range(0, self.nindices): + si = str(i) + self.session.create('index:collator:x' + si, + 'key_format=i,columns=(key),' + + 'collator=revint,' + + 'extractor=csv,app_metadata={"format" : "i",' + + '"field" : "' + si + '"}') + + def drop_indices(self): + for i in range(0, self.nindices): + self.session.drop("index:collator:x" + str(i)) + + def csv(self, s, i): + return s.split(',')[i] + + def expected_main_value(self, i): + return ','.join([str((i*j)%100) for j in range(0, self.nindices)]) + + # We split the population into two phases + # (in anticipation of future tests that create + # indices between the two population steps). + def populate(self): + cursor = self.session.open_cursor('table:collator', None, None) + for i in range(0, self.nentries): + cursor[i] = self.expected_main_value(i) + cursor.close() + + def check_entries(self): + cursor = self.session.open_cursor('table:collator', None, None) + icursor = [] + for i in range(0, self.nindices): + icursor.append(self.session.open_cursor('index:collator:x' + str(i), + None, None)) + i = 0 + for primkey, value in cursor: + # Check main table + expect = self.expected_main_value(i) + self.assertEqual(i, primkey) + self.assertEqual(value, expect) + for idx in range(0, self.nindices): + c = icursor[idx] + indexkey = (i*idx)%100 + c.set_key(indexkey) + self.assertEqual(c.search(), 0) + value = c.get_value() + key = c.get_key() + while value != expect and key == indexkey and \ + self.csv(value, idx) == self.csv(expect, idx): + self.assertEqual(0, c.next()) + value = c.get_value() + key = c.get_key() + self.assertEqual(value, expect) + i += 1 + self.assertEqual(self.nentries, i) + for i in range(0, self.nindices): + c = icursor[i] + c.reset() + expected = set(range(0, self.nentries)) + for key, val in c: + primkey = int(val.split(',')[1]) + expected.remove(primkey) + self.assertEquals(0, len(expected)) + c.close() + + def test_index(self): + self.session.create("table:collator", "key_format=i,value_format=S," + "columns=(primarykey,value)") + self.create_indices() + self.populate() + self.check_entries() + + # Drop and recreate all indices, everything should be there. + self.drop_indices() + self.create_indices() + self.check_entries() + + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_drop.py b/src/third_party/wiredtiger/test/suite/test_drop.py index 5663b85d661..52ea7251ab5 100644 --- a/src/third_party/wiredtiger/test/suite/test_drop.py +++ b/src/third_party/wiredtiger/test/suite/test_drop.py @@ -41,12 +41,11 @@ class test_drop(wttest.WiredTigerTestCase): scenarios = check_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), - #Not yet: drop failing with an open cursor needs handle locking - #('table-lsm', dict(uri='table:', extra_config=',type=lsm')), + ('table-lsm', dict(uri='table:', extra_config=',type=lsm')), ]) # Populate an object, remove it and confirm it no longer exists. - def drop(self, populate, with_cursor, close_session, drop_index): + def drop(self, populate, with_cursor, reopen, drop_index): uri = self.uri + self.name populate(self, uri, 'key_format=S' + self.extra_config, 10) @@ -57,7 +56,7 @@ class test_drop(wttest.WiredTigerTestCase): lambda: self.session.drop(uri, None)) cursor.close() - if close_session: + if reopen: self.reopen_conn() if drop_index: @@ -73,17 +72,17 @@ class test_drop(wttest.WiredTigerTestCase): # Try all combinations except dropping the index, the simple # case has no indices. for with_cursor in [False, True]: - for close_session in [False, True]: - self.drop(simple_populate, with_cursor, close_session, False) + for reopen in [False, True]: + self.drop(simple_populate, with_cursor, reopen, False) # A complex, multi-file table object. # Try all test combinations. if self.uri == "table:": for with_cursor in [False, True]: - for close_session in [False, True]: + for reopen in [False, True]: for drop_index in [False, True]: self.drop(complex_populate, with_cursor, - close_session, drop_index) + reopen, drop_index) # Test drop of a non-existent object: force succeeds, without force fails. def test_drop_dne(self): diff --git a/src/third_party/wiredtiger/test/suite/test_drop02.py b/src/third_party/wiredtiger/test/suite/test_drop02.py new file mode 100644 index 00000000000..677ba3866b2 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_drop02.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from helper import simple_populate + +# test_drop02.py +# Test dropping an LSM tree on first open. There was a bug where this +# would cause an assertion failure: WT-2501 +class test_drop02(wttest.WiredTigerTestCase): + name = 'test_drop02' + + # Populate an object, remove it and confirm it no longer exists. + def test_drop(self): + uri = 'lsm:' + self.name + simple_populate(self, uri, 'key_format=S', 100000) + self.reopen_conn() + + self.session.drop(uri, None) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_dump.py b/src/third_party/wiredtiger/test/suite/test_dump.py index c850d1b5d3f..fc1422155e2 100644 --- a/src/third_party/wiredtiger/test/suite/test_dump.py +++ b/src/third_party/wiredtiger/test/suite/test_dump.py @@ -29,8 +29,8 @@ import os import wiredtiger, wttest from helper import \ - complex_populate, complex_populate_check_cursor,\ - simple_populate, simple_populate_check_cursor + complex_populate, complex_populate_check, \ + simple_populate, simple_populate_check from suite_subprocess import suite_subprocess from wtscenario import multiply_scenarios, number_scenarios @@ -54,15 +54,24 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): ('string', dict(keyfmt='S')) ] types = [ - ('file', dict(type='file:', + ('file', dict(uri='file:', config='', lsm=False, populate=simple_populate, - populate_check=simple_populate_check_cursor)), - ('table-simple', dict(type='table:', + populate_check=simple_populate_check)), + ('lsm', dict(uri='lsm:', config='', lsm=True, populate=simple_populate, - populate_check=simple_populate_check_cursor)), - ('table-complex', dict(type='table:', + populate_check=simple_populate_check)), + ('table-simple', dict(uri='table:', config='', lsm=False, + populate=simple_populate, + populate_check=simple_populate_check)), + ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, + populate=simple_populate, + populate_check=simple_populate_check)), + ('table-complex', dict(uri='table:', config='', lsm=False, + populate=complex_populate, + populate_check=complex_populate_check)), + ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=complex_populate, - populate_check=complex_populate_check_cursor)) + populate_check=complex_populate_check)) ] scenarios = number_scenarios( multiply_scenarios('.', types, keyfmt, dumpfmt)) @@ -94,9 +103,14 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): # Dump, re-load and do a content comparison. def test_dump(self): + # LSM and column-store isn't a valid combination. + if self.lsm and self.keyfmt == 'r': + return + # Create the object. - uri = self.type + self.name - self.populate(self, uri, 'key_format=' + self.keyfmt, self.nentries) + uri = self.uri + self.name + self.populate(self, uri, + self.config + ',key_format=' + self.keyfmt, self.nentries) # Dump the object. os.mkdir(self.dir) @@ -108,11 +122,17 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): # Re-load the object. self.runWt(['-h', self.dir, 'load', '-f', 'dump.out']) - # Check the contents + # Check the database contents + self.runWt(['list'], outfilename='list.out') + self.runWt(['-h', self.dir, 'list'], outfilename='list.out.new') + s1 = set(open('list.out').read().split()) + s2 = set(open('list.out.new').read().split()) + self.assertEqual(not s1.symmetric_difference(s2), True) + + # Check the object's contents conn = self.wiredtiger_open(self.dir) session = conn.open_session() - cursor = session.open_cursor(uri, None, None) - self.populate_check(self, cursor, self.nentries) + self.populate_check(self, uri, self.nentries) conn.close() # Re-load the object again. @@ -121,8 +141,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): # Check the contents, they shouldn't have changed. conn = self.wiredtiger_open(self.dir) session = conn.open_session() - cursor = session.open_cursor(uri, None, None) - self.populate_check(self, cursor, self.nentries) + self.populate_check(self, uri, self.nentries) conn.close() # Re-load the object again, but confirm -n (no overwrite) fails. @@ -130,7 +149,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): 'load', '-n', '-f', 'dump.out'], errfilename='errfile.out') self.check_non_empty_file('errfile.out') - # If there is are indices, dump one of them and check the output. + # If there are indices, dump one of them and check the output. if self.populate == complex_populate: indexuri = 'index:' + self.name + ':indx1' hexopt = ['-x'] if self.hex == 1 else [] diff --git a/src/third_party/wiredtiger/test/suite/test_join01.py b/src/third_party/wiredtiger/test/suite/test_join01.py index 539a3a3ae57..4aa2bc6e269 100644 --- a/src/third_party/wiredtiger/test/suite/test_join01.py +++ b/src/third_party/wiredtiger/test/suite/test_join01.py @@ -74,8 +74,18 @@ class test_join01(wttest.WiredTigerTestCase): # the join cursor and iterating again. def stats(self, jc, which): statcur = self.session.open_cursor('statistics:join', jc, None) - self.check_stats(statcur, 0, 'join: index:join01:index1: ' + - 'bloom filter false positives') + # pick a stat we always expect to see + statdesc = 'bloom filter false positives' + expectstats = [ + 'join: index:join01:index1: ' + statdesc, + 'join: index:join01:index2: ' + statdesc ] + if self.ref == 'index': + expectstats.append('join: index:join01:index0: ' + statdesc) + else: + expectstats.append('join: table:join01: ' + statdesc) + self.check_stats(statcur, expectstats) + statcur.reset() + self.check_stats(statcur, expectstats) statcur.close() def statstr_to_int(self, str): @@ -86,16 +96,14 @@ class test_join01(wttest.WiredTigerTestCase): parts = str.rpartition('(') return int(parts[2].rstrip(')')) - # string should appear with a minimum value of least "min". - def check_stats(self, statcursor, min, lookfor): + # All of the expect strings should appear + def check_stats(self, statcursor, expectstats): stringclass = ''.__class__ intclass = (0).__class__ # Reset the cursor, we're called multiple times. statcursor.reset() - found = False - foundval = 0 self.printVerbose(3, 'statistics:') for id, desc, valstr, val in statcursor: self.assertEqual(type(desc), stringclass) @@ -104,12 +112,11 @@ class test_join01(wttest.WiredTigerTestCase): self.assertEqual(val, self.statstr_to_int(valstr)) self.printVerbose(3, ' stat: \'' + desc + '\', \'' + valstr + '\', ' + str(val)) - if desc == lookfor: - found = True - foundval = val + if desc in expectstats: + expectstats.remove(desc) - self.assertTrue(found, 'in stats, did not see: ' + lookfor) - self.assertTrue(foundval >= min) + self.assertTrue(len(expectstats) == 0, + 'missing expected values in stats: ' + str(expectstats)) # Common function for testing the most basic functionality # of joins @@ -141,7 +148,8 @@ class test_join01(wttest.WiredTigerTestCase): # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97. jc = self.session.open_cursor('join:table:join01' + proj_suffix, None, None) - c2 = self.session.open_cursor('index:join01:index2', None, None) + # Adding a projection to a reference cursor should be allowed. + c2 = self.session.open_cursor('index:join01:index2(v1)', None, None) c2.set_key(99) # skips all entries w/ primary key divisible by three self.assertEquals(0, c2.search()) self.session.join(jc, c2, 'compare=gt') @@ -159,12 +167,12 @@ class test_join01(wttest.WiredTigerTestCase): # Then select all numbers whose reverse string representation # is in '20' < x < '40'. - c1a = self.session.open_cursor('index:join01:index1', None, None) + c1a = self.session.open_cursor('index:join01:index1(v1)', None, None) c1a.set_key('21') self.assertEquals(0, c1a.search()) self.session.join(jc, c1a, 'compare=gt' + joincfg1) - c1b = self.session.open_cursor('index:join01:index1', None, None) + c1b = self.session.open_cursor('index:join01:index1(v1)', None, None) c1b.set_key('41') self.assertEquals(0, c1b.search()) self.session.join(jc, c1b, 'compare=lt' + joincfg1) diff --git a/src/third_party/wiredtiger/test/suite/test_join02.py b/src/third_party/wiredtiger/test/suite/test_join02.py index d122de8a0eb..a691c499cf6 100644 --- a/src/third_party/wiredtiger/test/suite/test_join02.py +++ b/src/third_party/wiredtiger/test/suite/test_join02.py @@ -179,15 +179,16 @@ class test_join02(wttest.WiredTigerTestCase): c.close() # Use the primary table in one of the joins. + # Use various projections, which should not matter for ref cursors c0a = self.session.open_cursor('table:join02', None, None) - c0b = self.session.open_cursor('table:join02', None, None) - c1a = self.session.open_cursor('index:join02:index1', None, None) + c0b = self.session.open_cursor('table:join02(v4)', None, None) + c1a = self.session.open_cursor('index:join02:index1(v0)', None, None) c1b = self.session.open_cursor('index:join02:index1', None, None) c2a = self.session.open_cursor('index:join02:index2', None, None) c2b = self.session.open_cursor('index:join02:index2', None, None) - c3a = self.session.open_cursor('index:join02:index3', None, None) - c3b = self.session.open_cursor('index:join02:index3', None, None) - c4a = self.session.open_cursor('index:join02:index4', None, None) + c3a = self.session.open_cursor('index:join02:index3(v4)', None, None) + c3b = self.session.open_cursor('index:join02:index3(v0)', None, None) + c4a = self.session.open_cursor('index:join02:index4(v1)', None, None) # Attach extra properties to each cursor. For cursors that # may appear on the 'left' side of a range CA < x < CB, diff --git a/src/third_party/wiredtiger/test/suite/test_join05.py b/src/third_party/wiredtiger/test/suite/test_join05.py new file mode 100644 index 00000000000..ef2be4c6460 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_join05.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join05.py +# Tests based on JIRA reports +class test_join05(wttest.WiredTigerTestCase): + + # test join having the first index just be lt/le + def test_wt_2384(self): + self.session.create("table:test_2384", + "key_format=i,value_format=i,columns=(k,v)") + self.session.create("index:test_2384:index", "columns=(v)") + cursor = self.session.open_cursor("table:test_2384", None, None) + cursor[1] = 11 + cursor[2] = 12 + cursor[3] = 13 + cursor.close() + + cursor = self.session.open_cursor("index:test_2384:index", None, None) + cursor.set_key(13) + self.assertEquals(cursor.search(), 0) + + jcursor = self.session.open_cursor("join:table:test_2384", None, None) + self.session.join(jcursor, cursor, "compare=lt") + + nr_found = 0 + while jcursor.next() == 0: + [k] = jcursor.get_keys() + [v] = jcursor.get_values() + #self.tty("jcursor: k=" + str(k) + ", v=" + str(v)) + nr_found += 1 + + self.assertEquals(nr_found, 2) + jcursor.close() + cursor.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_join06.py b/src/third_party/wiredtiger/test/suite/test_join06.py new file mode 100644 index 00000000000..9af6f93792f --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_join06.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +import wiredtiger, wttest, run +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join06.py +# Join operations +# Joins with a read-uncommitted +class test_join06(wttest.WiredTigerTestCase): + nentries = 1000 + + isoscen = [ + ('isolation_read_uncommitted', dict(uncommitted=True)), + ('isolation_default', dict(uncommitted=False)) + ] + + bloomscen = [ + ('bloom', dict(bloom=True)), + ('nobloom', dict(bloom=False)) + ] + + scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen)) + + def gen_values(self, i): + s = str(i) # 345 => "345" + f = s[0:1] + s[0:1] + s[0:1] # 345 => "333" + return [s, f] + + def gen_values2(self, i): + s = str(i) # 345 => "345" + l = s[-1:] + s[-1:] + s[-1:] # 345 => "555" + return [s, l] + + def populate(self, s, gen_values): + c = s.open_cursor('table:join06', None, None) + for i in range(0, self.nentries): + c.set_key(i) + c.set_value(*gen_values(i)) + c.insert() + c.close() + + # Common function for testing the most basic functionality + # of joins + def test_join(self): + self.session.create('table:join06', + 'columns=(k,v0,v1),key_format=i,value_format=SS') + self.session.create('index:join06:index0','columns=(v0)') + self.session.create('index:join06:index1','columns=(v1)') + + self.populate(self.session, self.gen_values) + + # TODO: needed? + #self.reopen_conn() + + if self.uncommitted: + self.session.begin_transaction('isolation=read-uncommitted') + + jc = self.session.open_cursor('join:table:join06', None, None) + c0 = self.session.open_cursor('index:join06:index0', None, None) + c0.set_key('520') + self.assertEquals(0, c0.search()) + self.session.join(jc, c0, 'compare=ge') + + joinconfig = 'compare=eq' + if self.bloom: + joinconfig += ',strategy=bloom,count=1000' + c1 = self.session.open_cursor('index:join06:index1', None, None) + c1.set_key('555') + self.assertEquals(0, c1.search()) + self.session.join(jc, c1, joinconfig) + + if self.uncommitted and self.bloom: + # Make sure that read-uncommitted with Bloom is not allowed. + # This is detected on the first next() operation. + msg = '/cannot be used with read-uncommitted/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.next(), msg) + return + + # Changes made in another session may or may not be visible to us, + # depending on the isolation level. + if self.uncommitted: + # isolation level is read-uncommitted, so we will see + # additions deletions made in our other session. + mbr = set(range(525,1000,10)) | set(range(55,100,10)) | set([520]) + else: + # default isolation level, so we should see a consistent + # set at the time we begin iteration. + mbr = set(range(520,600)) | set(range(53,60)) + + altered = False + + while jc.next() == 0: + [k] = jc.get_keys() + [v0,v1] = jc.get_values() + #self.tty('GOT: ' + str(k) + ': ' + str(jc.get_values())) + if altered and self.uncommitted: + self.assertEquals(self.gen_values2(k), [v0, v1]) + else: + self.assertEquals(self.gen_values(k), [v0, v1]) + if not k in mbr: + self.tty('**** ERROR: result ' + str(k) + ' is not in: ' + + str(mbr)) + self.assertTrue(k in mbr) + mbr.remove(k) + + # In another session, we remove entries for keys ending in 6, + # and add entries for keys ending in 5. Depending on the + # isolation level for the transaction, these changes may or + # may not be visible for the original session. + if not altered: + s = self.conn.open_session(None) + s.begin_transaction(None) + self.populate(s, self.gen_values2) + s.commit_transaction() + s.close() + altered = True + + if len(mbr) != 0: + self.tty('**** ERROR: did not see these: ' + str(mbr)) + self.assertEquals(0, len(mbr)) + + jc.close() + c1.close() + c0.close() + if self.uncommitted: + self.session.commit_transaction() + self.session.drop('table:join06') + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_lsm03.py b/src/third_party/wiredtiger/test/suite/test_lsm03.py new file mode 100644 index 00000000000..448d864c646 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_lsm03.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wtscenario, wttest +from helper import simple_populate + +# test_lsm03.py +# Check to make sure that LSM schema operations don't get EBUSY when +# there are no user operations active. +class test_lsm03(wttest.WiredTigerTestCase): + name = 'test_lsm03' + + # Use small pages so we generate some internal layout + # Setup LSM so multiple chunks are present + config = 'key_format=S,allocation_size=512,internal_page_max=512' + \ + ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)' + + # Populate an object then drop it. + def test_lsm_drop_active(self): + uri = 'lsm:' + self.name + simple_populate(self, uri, self.config, 10000) + + # Force to disk + self.reopen_conn() + + # An open cursors should cause failure. + cursor = self.session.open_cursor(uri, None, None) + self.assertRaises(wiredtiger.WiredTigerError, + lambda: self.session.drop(uri, None)) + cursor.close() + + # Add enough records that a merge should be running + simple_populate(self, uri, self.config, 50000) + # The drop should succeed even when LSM work units are active + self.session.drop(uri) diff --git a/src/third_party/wiredtiger/test/suite/test_rebalance.py b/src/third_party/wiredtiger/test/suite/test_rebalance.py index 80cce6ed514..f2167e864c9 100644 --- a/src/third_party/wiredtiger/test/suite/test_rebalance.py +++ b/src/third_party/wiredtiger/test/suite/test_rebalance.py @@ -59,7 +59,7 @@ class test_rebalance(wttest.WiredTigerTestCase): if with_cursor: cursor = self.session.open_cursor(uri, None, None) self.assertRaises(wiredtiger.WiredTigerError, - lambda: self.session.drop(uri, None)) + lambda: self.session.rebalance(uri, None)) cursor.close() self.session.rebalance(uri, None) diff --git a/src/third_party/wiredtiger/test/suite/test_schema07.py b/src/third_party/wiredtiger/test/suite/test_schema07.py new file mode 100644 index 00000000000..ac397c6e1a1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_schema07.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_schema07.py +# Test that long-running tests don't fill the cache with metadata +class test_schema07(wttest.WiredTigerTestCase): + tablename = 'table:test_schema07' + + def conn_config(self, dir): + return 'cache_size=10MB' + + @wttest.longtest("Creating many tables shouldn't fill the cache") + def test_many_tables(self): + s = self.session + # We have a 10MB cache, metadata is (well) over 512B per table, + # if we can create 20K tables, something must be cleaning up. + for i in xrange(20000): + uri = '%s-%06d' % (self.tablename, i) + s.create(uri) + c = s.open_cursor(uri) + # This will block if the metadata fills the cache + c["key"] = "value" + c.close() + self.session.drop(uri) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_sweep01.py b/src/third_party/wiredtiger/test/suite/test_sweep01.py index f996dbfa06d..bccd2bce012 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep01.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep01.py @@ -40,7 +40,7 @@ import wttest class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): tablebase = 'test_sweep01' uri = 'table:' + tablebase - numfiles = 50 + numfiles = 30 numkv = 1000 conn_config = 'file_manager=(close_handle_minimum=0,' + \ 'close_idle_time=6,close_scan_interval=2),' + \ @@ -87,7 +87,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): # # We've configured checkpoints to run every 5 seconds, sweep server to # run every 2 seconds and idle time to be 6 seconds. It should take - # about 8 seconds for a handle to be closed. Sleep for 12 seconds to be + # about 8 seconds for a handle to be closed. Sleep for double to be # safe. # uri = '%s.test' % self.uri @@ -105,13 +105,24 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): c = self.session.open_cursor(uri, None) k = 0 sleep = 0 - while sleep < 12: + max = 60 + final_nfile = 4 + while sleep < max: self.session.checkpoint() k = k+1 c[k] = 1 sleep += 2 time.sleep(2) + # Give slow machines time to process files. + stat_cursor = self.session.open_cursor('statistics:', None, None) + this_nfile = stat_cursor[stat.conn.file_open][2] + stat_cursor.close() + self.pr("==== loop " + str(sleep)) + self.pr("this_nfile " + str(this_nfile)) + if this_nfile == final_nfile: + break c.close() + self.pr("Sweep loop took " + str(sleep)) stat_cursor = self.session.open_cursor('statistics:', None, None) close2 = stat_cursor[stat.conn.dh_sweep_close][2] @@ -177,7 +188,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): self.assertEqual(nfile2 < nfile1, True) # The only files that should be left are the metadata, the lookaside # file, the lock file, and the active file. - if (nfile2 != 4): + if (nfile2 != final_nfile): print "close1: " + str(close1) + " close2: " + str(close2) print "remove1: " + str(remove1) + " remove2: " + str(remove2) print "sweep1: " + str(sweep1) + " sweep2: " + str(sweep2) @@ -186,7 +197,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): print "tod1: " + str(tod1) + " tod2: " + str(tod2) print "ref1: " + str(ref1) + " ref2: " + str(ref2) print "XX2: nfile1: " + str(nfile1) + " nfile2: " + str(nfile2) - self.assertEqual(nfile2 == 4, True) + self.assertEqual(nfile2 == final_nfile, True) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_util13.py b/src/third_party/wiredtiger/test/suite/test_util13.py new file mode 100644 index 00000000000..222f42cd7f1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_util13.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, re, string +from suite_subprocess import suite_subprocess +import itertools, wiredtiger, wttest + +from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm +from helper import simple_populate +from helper import complex_populate_check, simple_populate_check +from wtscenario import multiply_scenarios, number_scenarios + +# test_util13.py +# Utilities: wt dump, as well as the dump cursor +# Test that dump and load retain table configuration information. +# +class test_util13(wttest.WiredTigerTestCase, suite_subprocess): + """ + Test wt dump. We check for specific output and preservation of + non-default table create parameters. + """ + + pfx = 'test_util13' + nentries = 100 + dir = "dump_dir" + # + # Select table configuration settings that are not the default. + # + types = [ + ('file-simple', dict(uri='file:' + pfx, pop=simple_populate, + populate_check=simple_populate_check, + table_config='prefix_compression_min=3', cfg='')), + ('lsm-simple', dict(uri='lsm:' + pfx, pop=simple_populate, + populate_check=simple_populate_check, + table_config='lsm=(bloom_bit_count=29)', + cfg='bloom_bit_count=29')), + ('table-simple', dict(uri='table:' + pfx, pop=simple_populate, + populate_check=simple_populate_check, + table_config='split_pct=50', cfg='')), + ('table-complex', + dict(uri='table:' + pfx, pop=complex_populate_cgconfig, + populate_check=complex_populate_check, + table_config='allocation_size=512B', cfg='')), + ('table-complex-lsm', + dict(uri='table:' + pfx, pop=complex_populate_cgconfig_lsm, + populate_check=complex_populate_check, + table_config='lsm=(merge_max=5)', + cfg='merge_max=5')), + ] + + scenarios = number_scenarios(multiply_scenarios('.', types)) + + def compare_config(self, expected_cfg, actual_cfg): + # Replace '(' characters so configuration groups don't break parsing. + # If we ever want to look for config groups this will need to change. + #print "compare_config Actual config " + #print actual_cfg + #print "compare_config Expected config " + #print expected_cfg + cfg_orig = actual_cfg + if self.pop != simple_populate: + # + # If we have a complex config, strip out the colgroups and + # columns from the config. Doing so allows us to keep the + # split commands below usable because those two items don't + # have assignments in them. + # + nocolgrp = re.sub("colgroups=\((.+?)\),", '', actual_cfg) + cfg_orig = re.sub("columns=\((.+?)\),", '', nocolgrp) + + #print "Using original config " + #print cfg_orig + da = dict(kv.split('=') for kv in + cfg_orig.strip().replace('(',',').split(',')) + dx = dict(kv.split('=') for kv in + expected_cfg.strip().replace('(',',').split(',')) + + # Check that all items in our expected config subset are in + # the actual configuration and they match. + match = all(item in da.items() for item in dx.items()) + if match == False: + print "MISMATCH:" + print "Original dict: " + print da + print "Expected config: " + print dx + return match + + def compare_files(self, expect_subset, dump_out): + inheader = isconfig = False + for l1, l2 in zip(open(expect_subset, "rb"), open(dump_out, "rb")): + if isconfig: + if not self.compare_config(l1, l2): + return False + if inheader: + # This works because the expected subset has a format + # of URI and config lines alternating. + isconfig = not isconfig + if l1.strip() == 'Header': + inheader = True + if l1.strip() == 'Data': + break + return True + + def load_recheck(self, expect_subset, dump_out): + newdump = "newdump.out" + os.mkdir(self.dir) + self.runWt(['-h', self.dir, 'load', '-f', dump_out]) + # Check the contents + conn = self.wiredtiger_open(self.dir) + session = conn.open_session() + cursor = session.open_cursor(self.uri, None, None) + self.populate_check + conn.close() + dumpargs = ["-h"] + dumpargs.append(self.dir) + dumpargs.append("dump") + dumpargs.append(self.uri) + self.runWt(dumpargs, outfilename=newdump) + + self.assertTrue(self.compare_files(expect_subset, newdump)) + return True + + def test_dump_config(self): + # The number of btree_entries reported is influenced by the + # number of column groups and indices. Each insert will have + # a multiplied effect. + self.pop(self, self.uri, + 'key_format=S,value_format=S,' + self.table_config, self.nentries) + + ver = wiredtiger.wiredtiger_version() + verstring = str(ver[1]) + '.' + str(ver[2]) + '.' + str(ver[3]) + expectfile="expect.out" + with open(expectfile, "w") as expectout: + # Note: this output is sensitive to the precise output format + # generated by wt dump. If this is likely to change, we should + # make this test more accommodating. + expectout.write( + 'WiredTiger Dump (WiredTiger Version ' + verstring + ')\n') + expectout.write('Format=print\n') + expectout.write('Header\n') + expectout.write(self.uri + '\n') + # Check the config on the colgroup itself for complex tables. + if self.pop != simple_populate: + expectout.write('key_format=S\n') + expectout.write('colgroup:' + self.pfx + ':cgroup1\n') + if self.cfg == '': + expectout.write(self.table_config + '\n') + else: + expectout.write(self.cfg + '\n') + expectout.write('Data\n') + + self.pr('calling dump') + outfile="dump.out" + dumpargs = ["dump"] + dumpargs.append(self.uri) + self.runWt(dumpargs, outfilename=outfile) + + self.assertTrue(self.compare_files(expectfile, outfile)) + self.assertTrue(self.load_recheck(expectfile, outfile)) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py index f181aeb09b4..c75e4f194dd 100644 --- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py +++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py @@ -1,6 +1,7 @@ # DO NOT EDIT: automatically built by dist/stat.py. */ no_scale_per_second_list = [ + 'async: current work queue length', 'async: maximum work queue length', 'cache: bytes currently in the cache', 'cache: eviction currently operating in aggressive mode', @@ -36,6 +37,7 @@ no_scale_per_second_list = [ 'transaction: transaction range of IDs currently pinned by named snapshots', 'block-manager: checkpoint size', 'block-manager: file allocation unit size', + 'block-manager: file bytes available for reuse', 'block-manager: file magic number', 'block-manager: file major version number', 'block-manager: file size in bytes', |