diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-12-05 16:00:08 +1100 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-12-05 16:24:35 +1100 |
commit | b74f8750d92f1dc31469261d8fbfe359ae759c29 (patch) | |
tree | 8c15b6c91170239c551f11a1e316fdb057bd83c0 | |
parent | 51ada5ef6dafa2ca29d329edb0a93fc1a91e2ad0 (diff) | |
download | mongo-b74f8750d92f1dc31469261d8fbfe359ae759c29.tar.gz |
Import wiredtiger: fcb59a43a44222716ddae6d94d45cdfd36b915f7 from branch mongodb-4.2
ref: 74aa2f92a9..fcb59a43a4
for: 4.1.7
WT-4192 Remove WiredTiger raw compression support
WT-4319 Improvements to csuite tests
WT-4331 Further extend max wait time for test_bug019.py
WT-4393 Document cursor behaviour for read committed isolation
WT-4410 Split 'unit-test' task to reduce Evergreen Ubuntu build variant runtime
WT-4417 Make os_cache_max and os_cache_dirty_max reconfigurable
WT-4421 Add a way to calculate modify operations
WT-4434 Modify zstd compression level from 3 to 6
WT-4442 Add the ability to duplicate a backup cursor
WT-4455 test_wt4156_metadata_salvage with HAVE_ATTACH fails on zSeries
WT-4457 Add a maximum of dirty system buffers for the logging subsystem
WT-4463 Reduce runtime for csuite handle locks testing
WT-4464 In debug output row-store internal page keys may not format correctly
WT-4469 Coverity #105148: redundant test
66 files changed, 1715 insertions, 1785 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/compress_ratio.py b/src/third_party/wiredtiger/bench/workgen/runner/compress_ratio.py index 2c5552bfa5d..44320d75e29 100644 --- a/src/third_party/wiredtiger/bench/workgen/runner/compress_ratio.py +++ b/src/third_party/wiredtiger/bench/workgen/runner/compress_ratio.py @@ -82,11 +82,12 @@ conn_config="create,cache_size=2GB,session_max=1000,eviction=(threads_min=4,thre table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file" compression_opts = { "none" : "block_compressor=none", - "zlib_noraw" : "block_compressor=zlib-noraw", - "zlib_noraw_onepage" : "block_compressor=zlib-noraw,memory_page_image_max=32k", - "zlib_noraw_tenpage" : "block_compressor=zlib-noraw,memory_page_image_max=320k", - "zlib_raw" : "block_compressor=zlib", + "lz4" : "block_compressor=lz4" "snappy" : "block_compressor=snappy" + "zlib" : "block_compressor=zlib", + "zlib_onepage" : "block_compressor=zlib,memory_page_image_max=32k", + "zlib_tenpage" : "block_compressor=zlib,memory_page_image_max=320k", + "zstd" : "block_compressor=zstd" } #conn_config += extensions_config(['compressors/snappy']) conn = wiredtiger_open("WT_TEST", conn_config) diff --git a/src/third_party/wiredtiger/build_win/wiredtiger.def b/src/third_party/wiredtiger/build_win/wiredtiger.def index 79fa84a11e0..71c52bd81af 100644 --- a/src/third_party/wiredtiger/build_win/wiredtiger.def +++ b/src/third_party/wiredtiger/build_win/wiredtiger.def @@ -1,5 +1,6 @@ LIBRARY WIREDTIGER EXPORTS + wiredtiger_calc_modify wiredtiger_config_parser_open wiredtiger_config_validate wiredtiger_crc32c_func diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 08a322a66e7..13d47d72d07 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -153,10 +153,6 @@ file_runtime_config = common_runtime_config + [ option leads to an advisory call to an appropriate operating system API where available''', choices=['none', 'random', 'sequential']), - Config('cache_resident', 'false', r''' - do not ever evict the object's pages from cache. Not compatible with - LSM tables; see @ref tuning_cache_resident for more information''', - type='boolean'), Config('assert', '', r''' enable enhanced checking. ''', type='category', subconfig= [ @@ -173,6 +169,10 @@ file_runtime_config = common_runtime_config + [ if mixed read use is allowed.''', choices=['always','never','none']) ], undoc=True), + Config('cache_resident', 'false', r''' + do not ever evict the object's pages from cache. Not compatible with + LSM tables; see @ref tuning_cache_resident for more information''', + type='boolean'), Config('log', '', r''' the transaction log configuration for this object. Only valid if log is enabled in ::wiredtiger_open''', @@ -181,6 +181,17 @@ file_runtime_config = common_runtime_config + [ if false, this object has checkpoint-level durability''', type='boolean'), ]), + Config('os_cache_max', '0', r''' + maximum system buffer cache usage, in bytes. If non-zero, evict + object blocks from the system buffer cache after that many bytes + from this object are read or written into the buffer cache''', + min=0), + Config('os_cache_dirty_max', '0', r''' + maximum dirty system buffer cache usage, in bytes. If non-zero, + schedule writes for dirty blocks belonging to this object in the + system buffer cache after that many bytes from this object are + written into the buffer cache''', + min=0), ] # Per-file configuration @@ -318,17 +329,6 @@ file_config = format_meta + file_runtime_config + [ for pages to be temporarily larger than this value. This setting is ignored for LSM trees, see \c chunk_size''', min='512B', max='10TB'), - Config('os_cache_max', '0', r''' - maximum system buffer cache usage, in bytes. If non-zero, evict - object blocks from the system buffer cache after that many bytes - from this object are read or written into the buffer cache''', - min=0), - Config('os_cache_dirty_max', '0', r''' - maximum dirty system buffer cache usage, in bytes. If non-zero, - schedule writes for dirty blocks belonging to this object in the - system buffer cache after that many bytes from this object are - written into the buffer cache''', - min=0), Config('prefix_compression', 'false', r''' configure prefix compression on row-store leaf pages''', type='boolean'), @@ -676,6 +676,13 @@ log_configuration_common = [ Config('archive', 'true', r''' automatically archive unneeded log files''', type='boolean'), + Config('os_cache_dirty_pct', '0', r''' + maximum dirty system buffer cache usage, as a percentage of the + log's \c file_max. If non-zero, schedule writes for dirty blocks + belonging to the log in the system buffer cache after that percentage + of the log has been written into the buffer cache without an + intervening file sync.''', + min='0', max='100'), Config('prealloc', 'true', r''' pre-allocate log files''', type='boolean'), diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 79590313b89..1bbeeb3c7a3 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -62,6 +62,7 @@ src/config/config_collapse.c src/config/config_def.c src/config/config_ext.c src/config/config_upgrade.c +src/conn/api_calc_modify.c src/conn/api_strerror.c src/conn/api_version.c src/conn/conn_api.c diff --git a/src/third_party/wiredtiger/dist/s_export.list b/src/third_party/wiredtiger/dist/s_export.list index e49fa113d96..e85bf62517d 100644 --- a/src/third_party/wiredtiger/dist/s_export.list +++ b/src/third_party/wiredtiger/dist/s_export.list @@ -1,4 +1,5 @@ # List of OK external symbols. +wiredtiger_calc_modify wiredtiger_config_parser_open wiredtiger_config_validate wiredtiger_crc32c_func diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list index 95c568a19ff..bcedc19a459 100644 --- a/src/third_party/wiredtiger/dist/s_funcs.list +++ b/src/third_party/wiredtiger/dist/s_funcs.list @@ -33,6 +33,7 @@ __wt_stat_join_aggregate __wt_stat_join_clear_all __wt_stream_set_no_buffer __wt_try_readlock +wiredtiger_calc_modify wiredtiger_config_parser_open wiredtiger_config_validate wiredtiger_pack_int diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 247ad261085..20befb6bf97 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -528,6 +528,7 @@ bzDecompressInit bzalloc bzfree bzip +calc call's calloc cas @@ -952,6 +953,7 @@ marshalled maxCLevel maxcpu maxdbs +maxdiff mbll mbss mem @@ -999,9 +1001,11 @@ needkey needvalue negint nentries +nentriesp newbar newfile newuri +newv nextprev nfilename nhex @@ -1030,6 +1034,7 @@ numSymbols numbare offpage ok +oldv ondisk onint online @@ -1217,6 +1222,7 @@ subgets subinit sublicense subone +suboptimal subtest subtree sunique diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index e4e56194040..f2c61bea4b1 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -690,9 +690,6 @@ dsrc_stats = [ ########################################## # Compression statistics ########################################## - CompressStat('compress_raw_fail', 'raw compression call failed, no additional data available'), - CompressStat('compress_raw_fail_temporary', 'raw compression call failed, additional data available'), - CompressStat('compress_raw_ok', 'raw compression call succeeded'), CompressStat('compress_read', 'compressed pages read'), CompressStat('compress_write', 'compressed pages written'), CompressStat('compress_write_fail', 'page written failed to compress'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index fe6bf86804b..8a5efca4ab3 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -1352,6 +1352,26 @@ main(int argc, char *argv[]) } { + /*! [Calculate a modify operation] */ + WT_MODIFY mod[3]; + int nmod = 3; + WT_ITEM prev, newv; + prev.data = "the quick brown fox jumped over the lazy dog. " \ + "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \ + "the quick brown fox jumped over the lazy dog. " \ + "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. "; + prev.size = strlen(prev.data); + newv.data = "A quick brown fox jumped over the lazy dog. " \ + "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \ + "then a quick brown fox jumped over the lazy dog. " \ + "THE QUICK BROWN FOX JUMPED OVER THE LAZY DOG. " \ + "then what?"; + newv.size = strlen(newv.data); + error_check(wiredtiger_calc_modify(NULL, &prev, &newv, 20, mod, &nmod)); + /*! [Calculate a modify operation] */ + } + + { const char *buffer = "some string"; size_t len = strlen(buffer); /*! [Checksum a buffer] */ diff --git a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c index dc90500dcdb..a024e328ea7 100644 --- a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c @@ -62,6 +62,10 @@ typedef struct { * decompressed, not the number of bytes decompressed; store that value in the * destination buffer as well. * + * (Since raw compression has been removed from WiredTiger, the lz4 compression + * code no longer calls LZ4_compress_destSize. Some support remains to support + * existing compressed objects.) + * * Use fixed-size, 4B values (WiredTiger never writes buffers larger than 4GB). * * The unused field is available for a mode flag if one is needed in the future, @@ -210,6 +214,9 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, * other words, our caller doesn't know how many bytes will result from * decompression, likely hasn't provided us a large enough buffer, and * we have to allocate a scratch buffer. + * + * Even though raw compression has been removed from WiredTiger, this + * code remains for backward compatibility with existing objects. */ if (dst_len < prefix.uncompressed_len) { if ((dst_tmp = wt_api->scr_alloc( @@ -238,102 +245,6 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, } /* - * lz4_find_slot -- - * Find the slot containing the target offset (binary search). - */ -static inline uint32_t -lz4_find_slot(int target_arg, uint32_t *offsets, uint32_t slots) -{ - uint32_t base, indx, limit, target; - - indx = 1; /* -Wuninitialized */ - - target = (uint32_t)target_arg; /* Type conversion */ - - /* Fast check if we consumed it all, it's a likely result. */ - if (target >= offsets[slots]) - return (slots); - - /* - * Figure out which slot we got to: binary search. Note the test of - * offset (slot + 1), that's (end-byte + 1) for slot. - */ - for (base = 0, limit = slots; limit != 0; limit >>= 1) { - indx = base + (limit >> 1); - if (target > offsets[indx + 1]) { - base = indx + 1; - --limit; - } - } - - return (indx); -} - -/* - * lz4_compress_raw -- - * Pack records into a specified on-disk page size. - */ -static int -lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session, - size_t page_max, int split_pct, size_t extra, - uint8_t *src, uint32_t *offsets, uint32_t slots, - uint8_t *dst, size_t dst_len, int final, - size_t *result_lenp, uint32_t *result_slotsp) -{ - LZ4_PREFIX prefix; - uint32_t slot; - int lz4_len, sourceSize, targetDestSize; - - (void)compressor; /* Unused parameters */ - (void)session; - (void)split_pct; - (void)final; - - /* - * Set the source and target sizes. The target size is complicated: we - * don't want to exceed the smaller of the maximum page size or the - * destination buffer length, and in both cases we have to take into - * account the space for our overhead and the extra bytes required by - * our caller. - */ - sourceSize = (int)offsets[slots]; - targetDestSize = (int)(page_max < dst_len ? page_max : dst_len); - targetDestSize -= (int)(sizeof(LZ4_PREFIX) + extra); - - /* Compress, starting after the prefix bytes. */ - lz4_len = LZ4_compress_destSize((const char *)src, - (char *)dst + sizeof(LZ4_PREFIX), &sourceSize, targetDestSize); - - /* - * If compression succeeded and the compressed length is smaller than - * the original size, return success. - */ - if (lz4_len != 0) { - /* Find the first slot we didn't compress. */ - slot = lz4_find_slot(sourceSize, offsets, slots); - - if ((size_t)lz4_len + sizeof(LZ4_PREFIX) < offsets[slot]) { - prefix.compressed_len = (uint32_t)lz4_len; - prefix.uncompressed_len = (uint32_t)sourceSize; - prefix.useful_len = offsets[slot]; - prefix.unused = 0; -#ifdef WORDS_BIGENDIAN - lz4_prefix_swap(&prefix); -#endif - memcpy(dst, &prefix, sizeof(LZ4_PREFIX)); - - *result_slotsp = slot; - *result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX); - return (0); - } - } - - *result_slotsp = 0; - *result_lenp = 1; - return (0); -} - -/* * lz4_pre_size -- * WiredTiger LZ4 destination buffer sizing for compression. */ @@ -372,20 +283,15 @@ lz4_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session) * Add a LZ4 compressor. */ static int -lz_add_compressor(WT_CONNECTION *connection, bool raw, const char *name) +lz_add_compressor(WT_CONNECTION *connection, const char *name) { LZ4_COMPRESSOR *lz4_compressor; int ret; - /* - * There are two almost identical LZ4 compressors: one using raw - * compression to target a specific block size, and one without. - */ if ((lz4_compressor = calloc(1, sizeof(LZ4_COMPRESSOR))) == NULL) return (errno); lz4_compressor->compressor.compress = lz4_compress; - lz4_compressor->compressor.compress_raw = raw ? lz4_compress_raw : NULL; lz4_compressor->compressor.decompress = lz4_decompress; lz4_compressor->compressor.pre_size = lz4_pre_size; lz4_compressor->compressor.terminate = lz4_terminate; @@ -416,9 +322,11 @@ lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) (void)config; /* Unused parameters */ - if ((ret = lz_add_compressor(connection, true, "lz4")) != 0) + if ((ret = lz_add_compressor(connection, "lz4")) != 0) return (ret); - if ((ret = lz_add_compressor(connection, false, "lz4-noraw")) != 0) + + /* Raw compression API backward compatibility. */ + if ((ret = lz_add_compressor(connection, "lz4-noraw")) != 0) return (ret); return (0); } diff --git a/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c b/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c index 586f6c8831b..3a5baaedc79 100644 --- a/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/nop/nop_compress.c @@ -170,7 +170,6 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) * Heap memory (not static), because it can support multiple databases. */ nop_compressor->compressor.compress = nop_compress; - nop_compressor->compressor.compress_raw = NULL; nop_compressor->compressor.decompress = nop_decompress; nop_compressor->compressor.pre_size = nop_pre_size; nop_compressor->compressor.terminate = nop_terminate; diff --git a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c index 03a17d28a1b..f369a0c2965 100644 --- a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c @@ -260,7 +260,6 @@ snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) return (errno); snappy_compressor->compressor.compress = snappy_compression; - snappy_compressor->compressor.compress_raw = NULL; snappy_compressor->compressor.decompress = snappy_decompression; snappy_compressor->compressor.pre_size = snappy_pre_size; snappy_compressor->compressor.terminate = snappy_terminate; diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c index d5c0d0fb318..dd9cfdeb88b 100644 --- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c @@ -198,239 +198,6 @@ zlib_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, } /* - * zlib_find_slot -- - * Find the slot containing the target offset (binary search). - */ -static inline uint32_t -zlib_find_slot(uint64_t target, uint32_t *offsets, uint32_t slots) -{ - uint32_t base, indx, limit; - - indx = 1; - - /* Figure out which slot we got to: binary search */ - if (target >= offsets[slots]) - indx = slots; - else if (target > offsets[1]) - for (base = 2, limit = slots - base; limit != 0; limit >>= 1) { - indx = base + (limit >> 1); - if (target < offsets[indx]) - continue; - base = indx + 1; - --limit; - } - - return (indx); -} - -/* - * zlib_compress_raw -- - * Pack records into a specified on-disk page size. - */ -static int -zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session, - size_t page_max, int split_pct, size_t extra, - uint8_t *src, uint32_t *offsets, uint32_t slots, - uint8_t *dst, size_t dst_len, int final, - size_t *result_lenp, uint32_t *result_slotsp) -{ - ZLIB_COMPRESSOR *zlib_compressor; - ZLIB_OPAQUE opaque; - z_stream *best_zs, *last_zs, _last_zs, *zs, _zs; - uint32_t curr_slot, last_slot, zlib_reserved; - bool increase_reserve; - int ret, tret; - - (void)split_pct; /* Unused parameters */ - (void)final; - - zlib_compressor = (ZLIB_COMPRESSOR *)compressor; - - /* - * Experimentally derived, reserve this many bytes for zlib to finish - * up a buffer. If this isn't sufficient, we don't fail but we will be - * inefficient. - */ -#define WT_ZLIB_RESERVED 24 -#define WT_ZLIB_RESERVED_MAX 48 - zlib_reserved = WT_ZLIB_RESERVED; - - if (0) { -retry: /* If we reached our maximum reserve, quit. */ - if (zlib_reserved == WT_ZLIB_RESERVED_MAX) - return (0); - zlib_reserved = WT_ZLIB_RESERVED_MAX; - } - - best_zs = last_zs = NULL; - last_slot = 0; - increase_reserve = false; - ret = 0; - - zs = &_zs; - memset(zs, 0, sizeof(*zs)); - zs->zalloc = zalloc; - zs->zfree = zfree; - opaque.compressor = compressor; - opaque.session = session; - zs->opaque = &opaque; - - if ((ret = deflateInit(zs, zlib_compressor->zlib_level)) != Z_OK) - return (zlib_error(compressor, session, "deflateInit", ret)); - - zs->next_in = src; - zs->next_out = dst; - - /* - * Set the target size. The target size is complicated: we don't want - * to exceed the smaller of the maximum page size or the destination - * buffer length, and in both cases we have to take into account the - * space required by zlib to finish up the buffer and the extra bytes - * required by our caller. - */ - zs->avail_out = (uint32_t)(page_max < dst_len ? page_max : dst_len); - zs->avail_out -= (uint32_t)(zlib_reserved + extra); - - /* - * Strategy: take the available output size and compress that much - * input. Continue until there is no input small enough or the - * compression fails to fit. - */ - for (;;) { - /* Find the next slot we will try to compress up to. */ - curr_slot = zlib_find_slot( - zs->total_in + zs->avail_out, offsets, slots); - if (curr_slot > last_slot) { - zs->avail_in = offsets[curr_slot] - offsets[last_slot]; - while (zs->avail_in > 0 && zs->avail_out > 0) - if ((ret = deflate(zs, Z_SYNC_FLUSH)) != Z_OK) { - ret = zlib_error(compressor, - session, "deflate", ret); - goto err; - } - } - - /* - * We didn't do a deflate, or it didn't work: use the last saved - * position (if any). - */ - if (curr_slot <= last_slot || zs->avail_in > 0) { - best_zs = last_zs; - break; - } - - /* - * If there's more compression to do, save a snapshot and keep - * going, otherwise, use the current compression. - */ - last_slot = curr_slot; - if (zs->avail_out > 0) { - /* Discard any previously saved snapshot. */ - if (last_zs != NULL) { - ret = deflateEnd(last_zs); - last_zs = NULL; - if (ret != Z_OK && ret != Z_DATA_ERROR) { - ret = zlib_error(compressor, - session, "deflateEnd", ret); - goto err; - } - } - last_zs = &_last_zs; - if ((ret = deflateCopy(last_zs, zs)) != Z_OK) { - last_zs = NULL; - ret = zlib_error( - compressor, session, "deflateCopy", ret); - goto err; - } - continue; - } - - best_zs = zs; - break; - } - - if (last_slot > 0 && best_zs != NULL) { - /* Add the reserved bytes and try to finish the compression. */ - best_zs->avail_out += zlib_reserved; - ret = deflate(best_zs, Z_FINISH); - - /* - * If the end marker didn't fit with the default value, try - * again with a maximum value; if that doesn't work, report we - * got no work done, WiredTiger will compress the (possibly - * large) page image using ordinary compression instead. - */ - if (ret == Z_OK || ret == Z_BUF_ERROR) { - last_slot = 0; - increase_reserve = true; - } else if (ret != Z_STREAM_END) { - ret = zlib_error( - compressor, session, "deflate end block", ret); - goto err; - } - ret = 0; - } - -err: if ((tret = deflateEnd(zs)) != Z_OK && tret != Z_DATA_ERROR) - ret = zlib_error(compressor, session, "deflateEnd", tret); - if (last_zs != NULL && - (tret = deflateEnd(last_zs)) != Z_OK && tret != Z_DATA_ERROR) - ret = zlib_error(compressor, session, "deflateEnd", tret); - - if (ret == 0 && last_slot > 0) { - *result_slotsp = last_slot; - *result_lenp = (size_t)best_zs->total_out; - } else { - /* We didn't manage to compress anything. */ - *result_slotsp = 0; - *result_lenp = 1; - - if (increase_reserve) - goto retry; - } - -#if 0 - /* Decompress the result and confirm it matches the original source. */ - if (ret == 0 && last_slot > 0) { - WT_EXTENSION_API *wt_api; - void *decomp; - size_t result_len; - - wt_api = ((ZLIB_COMPRESSOR *)compressor)->wt_api; - - if ((decomp = zalloc( - &opaque, 1, (uint32_t)best_zs->total_in + 100)) == NULL) { - (void)wt_api->err_printf(wt_api, session, - "zlib_compress_raw: zalloc failure"); - return (ENOMEM); - } - if ((ret = zlib_decompress( - compressor, session, dst, (size_t)best_zs->total_out, - decomp, (size_t)best_zs->total_in + 100, &result_len)) == 0) - if (memcmp(src, decomp, result_len) != 0) { - (void)wt_api->err_printf(wt_api, session, - "zlib_compress_raw: " - "deflate compare with original source"); - return (WT_ERROR); - } - zfree(&opaque, decomp); - } -#endif - -#if 0 - if (ret == 0 && last_slot > 0) - fprintf(stderr, - "zlib_compress_raw (%s): page_max %" PRIuMAX ", slots %" - PRIu32 ", take %" PRIu32 ": %" PRIu32 " -> %" PRIuMAX "\n", - final ? "final" : "not final", (uintmax_t)page_max, - slots, last_slot, offsets[last_slot], - (uintmax_t)*result_lenp); -#endif - - return (ret); -} - -/* * zlib_terminate -- * WiredTiger zlib compression termination. */ @@ -448,22 +215,15 @@ zlib_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session) * Add a zlib compressor. */ static int -zlib_add_compressor( - WT_CONNECTION *connection, bool raw, const char *name, int zlib_level) +zlib_add_compressor(WT_CONNECTION *connection, const char *name, int zlib_level) { ZLIB_COMPRESSOR *zlib_compressor; int ret; - /* - * There are two almost identical zlib compressors: one using raw - * compression to target a specific block size, and one without. - */ if ((zlib_compressor = calloc(1, sizeof(ZLIB_COMPRESSOR))) == NULL) return (errno); zlib_compressor->compressor.compress = zlib_compress; - zlib_compressor->compressor.compress_raw = raw ? - zlib_compress_raw : NULL; zlib_compressor->compressor.decompress = zlib_decompress; zlib_compressor->compressor.pre_size = NULL; zlib_compressor->compressor.terminate = zlib_terminate; @@ -542,11 +302,12 @@ zlib_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) if ((ret = zlib_init_config(connection, config, &zlib_level)) != 0) return (ret); - if ((ret = zlib_add_compressor( - connection, true, "zlib", zlib_level)) != 0) + if ((ret = zlib_add_compressor(connection, "zlib", zlib_level)) != 0) return (ret); + + /* Raw compression API backward compatibility. */ if ((ret = zlib_add_compressor( - connection, false, "zlib-noraw", zlib_level)) != 0) + connection, "zlib-noraw", zlib_level)) != 0) return (ret); return (0); } diff --git a/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c index 40a872f92e2..210bc4b30df 100644 --- a/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c @@ -296,7 +296,7 @@ zstd_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) * ratio). In other words, position zstd as a zlib replacement, having * similar compression at much higher compression/decompression speeds. */ - compression_level = 3; + compression_level = 6; if ((ret = zstd_init_config(connection, config, &compression_level)) != 0) return (ret); @@ -305,7 +305,6 @@ zstd_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) return (errno); zstd_compressor->compressor.compress = zstd_compress; - zstd_compressor->compressor.compress_raw = NULL; zstd_compressor->compressor.decompress = zstd_decompress; zstd_compressor->compressor.pre_size = zstd_pre_size; zstd_compressor->compressor.terminate = zstd_terminate; diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 7ace999a697..c727e51820e 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "74aa2f92a95596196d8ff131cdf015850613c893", + "commit": "fcb59a43a44222716ddae6d94d45cdfd36b915f7", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i index dfac0e9ec8d..5ef1658a322 100644 --- a/src/third_party/wiredtiger/lang/java/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i @@ -357,6 +357,7 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %ignore __wt_modify::position; %ignore __wt_modify::size; %ignore __wt_cursor::modify; +%ignore wiredtiger_calc_modify; %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); %rename (compare_wrap) __wt_cursor::compare; diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i index 9ba6bd15c78..47ed727f3a8 100644 --- a/src/third_party/wiredtiger/lang/python/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i @@ -151,6 +151,41 @@ from packing import pack, unpack } } +%typemap(in,numinputs=1) (WT_MODIFY *entries, int *nentriesp) (WT_MODIFY *mod, int nentries) { + nentries = (int) PyLong_AsLong($input); + if (__wt_calloc_def(NULL, (size_t)nentries, &mod) != 0) + SWIG_exception_fail(SWIG_MemoryError, "WT calloc failed"); + $1 = mod; + $2 = &nentries; +} + +%typemap(argout) (WT_MODIFY *entries, int *nentriesp) { + int i; + $result = PyList_New(*$2); + for (i = 0; i < *$2; i++) { + PyObject *o = SWIG_NewPointerObj(Py_None, SWIGTYPE_p___wt_modify, 0); + PyObject_SetAttrString(o, "data", PyString_FromStringAndSize( + $1[i].data.data, $1[i].data.size)); + PyObject_SetAttrString(o, "offset", + PyInt_FromLong($1[i].offset)); + PyObject_SetAttrString(o, "size", + PyInt_FromLong($1[i].size)); + PyList_SetItem($result, i, o); + } +} + +%typemap(in) const WT_ITEM * (WT_ITEM val, long sz) { + if (PyString_AsStringAndSize($input, &val.data, &sz) < 0) + SWIG_exception_fail(SWIG_AttributeError, + "bad string value for WT_ITEM"); + val.size = (size_t)sz; + $1 = &val; +} + +%typemap(freearg) (WT_MODIFY *entries, int *nentriesp) { + __wt_free(NULL, $1); +} + %typemap(in) WT_MODIFY * (int len, WT_MODIFY *modarray, int i) { len = PyList_Size($input); /* @@ -209,7 +244,7 @@ from packing import pack, unpack } %typemap(freearg) WT_MODIFY * { - /* The WT_MODIFY arg is in position 2. Is there a better way? */ + /* The WT_MODIFY arg is in position 2. Is there a better way? */ WT_MODIFY *modarray = modarray2; size_t i, len; @@ -980,6 +1015,9 @@ typedef int int_void; self.data = data self.offset = offset self.size = size + + def __repr__(self): + return 'Modify(\'%s\', %d, %d)' % (self.data, self.offset, self.size) %} }; @@ -1008,6 +1046,7 @@ int diagnostic_build() { #endif } %} + int diagnostic_build(); /* Remove / rename parts of the C API that we don't want in Python. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index bf7ad49ccc3..e413dd7ca72 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -459,8 +459,8 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) * Don't do compression adjustment for fixed-size column store, the * leaf page sizes don't change. (We could adjust internal pages but not * internal pages, but that seems an unlikely use case.) - * XXX - * Don't do compression adjustment of snappy-compressed blocks. + * XXX + * Don't do compression adjustment of snappy-compressed blocks. */ btree->intlpage_compadjust = false; btree->maxintlpage_precomp = btree->maxintlpage; diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 1bda9a62e25..1095fe18560 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -95,6 +95,9 @@ static const WT_CONFIG_CHECK static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = { { "archive", "boolean", NULL, NULL, NULL, 0 }, + { "os_cache_dirty_pct", "int", + NULL, "min=0,max=100", + NULL, 0 }, { "prealloc", "boolean", NULL, NULL, NULL, 0 }, { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } @@ -169,7 +172,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "log", "category", NULL, NULL, - confchk_WT_CONNECTION_reconfigure_log_subconfigs, 3 }, + confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -250,6 +253,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = { { "log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1 }, + { "os_cache_dirty_max", "int", NULL, "min=0", NULL, 0 }, + { "os_cache_max", "int", NULL, "min=0", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -785,6 +790,9 @@ static const WT_CONFIG_CHECK { "compressor", "string", NULL, NULL, NULL, 0 }, { "enabled", "boolean", NULL, NULL, NULL, 0 }, { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, + { "os_cache_dirty_pct", "int", + NULL, "min=0,max=100", + NULL, 0 }, { "path", "string", NULL, NULL, NULL, 0 }, { "prealloc", "boolean", NULL, NULL, NULL, 0 }, { "recover", "string", @@ -868,7 +876,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "in_memory", "boolean", NULL, NULL, NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_wiredtiger_open_log_subconfigs, 9 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -974,7 +982,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "in_memory", "boolean", NULL, NULL, NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_wiredtiger_open_log_subconfigs, 9 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -1077,7 +1085,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_wiredtiger_open_log_subconfigs, 9 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -1178,7 +1186,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_wiredtiger_open_log_subconfigs, 9 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -1284,9 +1292,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),log=(archive=true,prealloc=true," - "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," - "lsm_merge=true,operation_tracking=(enabled=false,path=\".\")," + "close_scan_interval=10),log=(archive=true,os_cache_dirty_pct=0," + "prealloc=true,zero_fill=false),lsm_manager=(merge=true," + "worker_thread_max=4),lsm_merge=true," + "operation_tracking=(enabled=false,path=\".\")," "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," "statistics=none,statistics_log=(json=false,on_close=false," "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," @@ -1317,8 +1326,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { { "WT_SESSION.alter", "access_pattern_hint=none,app_metadata=," "assert=(commit_timestamp=none,read_timestamp=none)," - "cache_resident=false,exclusive_refreshed=true,log=(enabled=true)", - confchk_WT_SESSION_alter, 6 + "cache_resident=false,exclusive_refreshed=true,log=(enabled=true)" + ",os_cache_dirty_max=0,os_cache_max=0", + confchk_WT_SESSION_alter, 8 }, { "WT_SESSION.begin_transaction", "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp=" @@ -1536,10 +1546,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "extensions=,file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "in_memory=false,log=(archive=true,compressor=,enabled=false," - "file_max=100MB,path=\".\",prealloc=true,recover=on," - "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," - "lsm_merge=true,mmap=true,multiprocess=false," - "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," + "recover=on,zero_fill=false),lsm_manager=(merge=true," + "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false" + ",operation_tracking=(enabled=false,path=\".\"),readonly=false," "salvage=false,session_max=100,session_scratch_max=2MB," "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" @@ -1563,10 +1573,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "extensions=,file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "in_memory=false,log=(archive=true,compressor=,enabled=false," - "file_max=100MB,path=\".\",prealloc=true,recover=on," - "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," - "lsm_merge=true,mmap=true,multiprocess=false," - "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," + "recover=on,zero_fill=false),lsm_manager=(merge=true," + "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false" + ",operation_tracking=(enabled=false,path=\".\"),readonly=false," "salvage=false,session_max=100,session_scratch_max=2MB," "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" @@ -1589,16 +1599,16 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=true,compressor=,enabled=false,file_max=100MB," - "path=\".\",prealloc=true,recover=on,zero_fill=false)," - "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," - "mmap=true,multiprocess=false,operation_tracking=(enabled=false," - "path=\".\"),readonly=false,salvage=false,session_max=100," - "session_scratch_max=2MB,session_table_cache=true," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=false,on_close=false," - "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "timing_stress_for_test=,transaction_sync=(enabled=false," - "method=fsync),verbose=,version=(major=0,minor=0),write_through=", + "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on," + "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," + "lsm_merge=true,mmap=true,multiprocess=false," + "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "salvage=false,session_max=100,session_scratch_max=2MB," + "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" + ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" + ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" + ",method=fsync),verbose=,version=(major=0,minor=0),write_through=", confchk_wiredtiger_open_basecfg, 42 }, { "wiredtiger_open_usercfg", @@ -1614,16 +1624,16 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=true,compressor=,enabled=false,file_max=100MB," - "path=\".\",prealloc=true,recover=on,zero_fill=false)," - "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," - "mmap=true,multiprocess=false,operation_tracking=(enabled=false," - "path=\".\"),readonly=false,salvage=false,session_max=100," - "session_scratch_max=2MB,session_table_cache=true," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=false,on_close=false," - "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "timing_stress_for_test=,transaction_sync=(enabled=false," - "method=fsync),verbose=,write_through=", + "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on," + "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," + "lsm_merge=true,mmap=true,multiprocess=false," + "operation_tracking=(enabled=false,path=\".\"),readonly=false," + "salvage=false,session_max=100,session_scratch_max=2MB," + "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" + ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" + ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" + ",method=fsync),verbose=,write_through=", confchk_wiredtiger_open_usercfg, 41 }, { NULL, NULL, NULL, 0 } diff --git a/src/third_party/wiredtiger/src/conn/api_calc_modify.c b/src/third_party/wiredtiger/src/conn/api_calc_modify.c new file mode 100644 index 00000000000..6de88644f79 --- /dev/null +++ b/src/third_party/wiredtiger/src/conn/api_calc_modify.c @@ -0,0 +1,193 @@ +/*- + * Copyright (c) 2014-2018 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +#define WT_CM_BLOCKSIZE 8 +#define WT_CM_MINMATCH 64 +#define WT_CM_STARTGAP (WT_CM_BLOCKSIZE / 2) + +typedef struct { + WT_SESSION_IMPL *session; + + const uint8_t *s1, *e1; /* Start / end of pre-image. */ + const uint8_t *s2, *e2; /* Start / end of after-image. */ + + const uint8_t *used1, *used2; /* Used up to here. */ + + size_t maxdiff; + int maxentries; +} WT_CM_STATE; + +typedef struct { + const uint8_t *m1, *m2; + size_t len; +} WT_CM_MATCH; + +/* + * __cm_add_modify -- + * Add a modify operation to the list of entries. + * + * Fails if all entries are used or the maximum bytes of difference is + * exceeded. + */ +static int +__cm_add_modify(WT_CM_STATE *cms, const uint8_t *p2, + const uint8_t *m1, const uint8_t *m2, WT_MODIFY *entries, int *nentriesp) +{ + WT_MODIFY *mod; + size_t len1, len2; + + WT_ASSERT(cms->session, m1 >= cms->used1 && m2 >= cms->used2); + + len1 = (size_t)(m1 - cms->used1); + len2 = (size_t)(m2 - cms->used2); + + if (*nentriesp >= cms->maxentries || len2 > cms->maxdiff) + return (WT_NOTFOUND); + + mod = entries + (*nentriesp)++; + mod->offset = (size_t)(p2 - cms->s2); + mod->size = len1; + mod->data.data = p2; + mod->data.size = len2; + cms->maxdiff -= len2; + + return (0); +} + +/* + * __cm_extend -- + * Given a potential match size, extend to find the complete match. + */ +static void +__cm_extend(WT_CM_STATE *cms, + const uint8_t *m1, const uint8_t *m2, WT_CM_MATCH *match) +{ + const uint8_t *p1, *p2; + + /* Step past the end and before the beginning of the matching block. */ + for (p1 = m1, p2 = m2; + p1 < cms->e1 && p2 < cms->e2 && *p1 == *p2; + p1++, p2++) + ; + + for (; m1 >= cms->used1 && m2 >= cms->used2 && *m1 == *m2; + m1--, m2--) + ; + + match->m1 = m1 + 1; + match->m2 = m2 + 1; + match->len = p1 > m1 ? (size_t)((p1 - m1) - 1) : 0; +} + +/* + * __cm_fingerprint -- + * Calculate an integral "fingerprint" of a block of bytes. + */ +static inline uint64_t +__cm_fingerprint(const uint8_t *p) +{ + uint64_t h; + + WT_STATIC_ASSERT(sizeof(h) <= WT_CM_BLOCKSIZE); + memcpy(&h, p, WT_CM_BLOCKSIZE); + return (h); +} + +/* + * wiredtiger_calc_modify -- + * Calculate a set of WT_MODIFY operations to represent an update. + */ +int +wiredtiger_calc_modify(WT_SESSION *wt_session, + const WT_ITEM *oldv, const WT_ITEM *newv, + size_t maxdiff, WT_MODIFY *entries, int *nentriesp) +{ + WT_CM_MATCH match; + WT_CM_STATE cms; + size_t gap, i; + uint64_t h, hend, hstart; + const uint8_t *p1, *p2; + bool start; + + if (oldv->size < WT_CM_MINMATCH || newv->size < WT_CM_MINMATCH) + return (WT_NOTFOUND); + + cms.session = (WT_SESSION_IMPL *)wt_session; + + cms.s1 = cms.used1 = oldv->data; + cms.e1 = cms.s1 + oldv->size; + cms.s2 = cms.used2 = newv->data; + cms.e2 = cms.s2 + newv->size; + cms.maxdiff = maxdiff; + cms.maxentries = *nentriesp; + *nentriesp = 0; + + /* Ignore matches at the beginning / end. */ + __cm_extend(&cms, cms.s1, cms.s2, &match); + cms.used1 += match.len; + cms.used2 += match.len; + if (cms.used1 < cms.e1 && cms.used2 < cms.e2) { + __cm_extend(&cms, cms.e1 - 1, cms.e2 - 1, &match); + cms.e1 -= match.len; + cms.e2 -= match.len; + } + + if (cms.used1 + WT_CM_BLOCKSIZE >= cms.e1 || + cms.used2 + WT_CM_BLOCKSIZE >= cms.e2) + goto end; + + /* + * Walk through the post-image, maintaining start / end markers + * separated by a gap in the pre-image. If the current point in the + * post-image matches either marker, try to extend the match to find a + * (large) range of matching bytes. If the end of the range is reached + * in the post-image without finding a good match, double the size of + * the gap, update the markers and keep trying. + */ + h = hstart = hend = 0; + i = gap = 0; + for (p1 = cms.used1, p2 = cms.used2, start = true; + p1 + WT_CM_BLOCKSIZE <= cms.e1 && p2 + WT_CM_BLOCKSIZE <= cms.e2; + p2++, i++) { + if (start || i == gap) { + p1 += gap; + gap = start ? WT_CM_STARTGAP : gap * 2; + if (p1 + gap + WT_CM_BLOCKSIZE >= cms.e1) + break; + if (gap > maxdiff) + return (WT_NOTFOUND); + hstart = start ? __cm_fingerprint(p1) : hend; + hend = __cm_fingerprint(p1 + gap); + start = false; + i = 0; + } + h = __cm_fingerprint(p2); + match.len = 0; + if (h == hstart) + __cm_extend(&cms, p1, p2, &match); + else if (h == hend) + __cm_extend(&cms, p1 + gap, p2, &match); + + if (match.len < WT_CM_MINMATCH) + continue; + + WT_RET(__cm_add_modify(&cms, cms.used2, match.m1, match.m2, + entries, nentriesp)); + cms.used1 = p1 = match.m1 + match.len; + cms.used2 = p2 = match.m2 + match.len; + start = true; + } + +end: if (cms.used1 < cms.e1 || cms.used2 < cms.e2) + WT_RET(__cm_add_modify(&cms, cms.used2, cms.e1, cms.e2, + entries, nentriesp)); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index dc6e4a82509..f04407d2e10 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -289,6 +289,10 @@ __logmgr_config( WT_STAT_CONN_SET(session, log_max_filesize, conn->log_file_max); } + WT_RET(__wt_config_gets(session, cfg, "log.os_cache_dirty_pct", &cval)); + if (cval.val != 0) + conn->log_dirty_max = (conn->log_file_max * cval.val) / 100; + /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 32021a6f7e2..ebb2e30a4dd 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -13,7 +13,7 @@ static int __backup_list_append( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *); static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *); static int __backup_start( - WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]); + WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, WT_CURSOR *, const char *[]); static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool *, bool *); @@ -66,6 +66,23 @@ err: API_END_RET(session, ret); } /* + * __backup_free -- + * Free list resources for a backup cursor. + */ +static void +__backup_free(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) +{ + int i; + + if (cb->list != NULL) { + for (i = 0; cb->list[i] != NULL; ++i) + __wt_free(session, cb->list[i]); + __wt_free(session, cb->list); + } + +} + +/* * __curbackup_close -- * WT_CURSOR->close method for the backup cursor type. */ @@ -89,7 +106,13 @@ err: * discarded when the cursor is closed), because that cursor will never * not be responsible for cleanup. */ - if (F_ISSET(cb, WT_CURBACKUP_LOCKER)) + if (F_ISSET(cb, WT_CURBACKUP_DUP)) { + __backup_free(session, cb); + /* Make sure the original backup cursor is still open. */ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_BACKUP_CURSOR)); + F_CLR(session, WT_SESSION_BACKUP_DUP); + F_CLR(cb, WT_CURBACKUP_DUP); + } else if (F_ISSET(cb, WT_CURBACKUP_LOCKER)) WT_TRET(__backup_stop(session, cb)); __wt_cursor_close(cursor); @@ -103,8 +126,8 @@ err: * WT_SESSION->open_cursor method for the backup cursor type. */ int -__wt_curbackup_open(WT_SESSION_IMPL *session, - const char *uri, const char *cfg[], WT_CURSOR **cursorp) +__wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, + WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ @@ -148,7 +171,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, */ WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - ret = __backup_start(session, cb, cfg))); + ret = __backup_start(session, cb, other, cfg))); WT_ERR(ret); WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); @@ -193,8 +216,8 @@ err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); * Start a backup. */ static int -__backup_start( - WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[]) +__backup_start(WT_SESSION_IMPL *session, + WT_CURSOR_BACKUP *cb, WT_CURSOR *other, const char *cfg[]) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -216,48 +239,70 @@ __backup_start( * Single thread hot backups: we're holding the schema lock, so we * know we'll serialize with other attempts to start a hot backup. */ - if (conn->hot_backup) + if (conn->hot_backup && other == NULL) WT_RET_MSG( session, EINVAL, "there is already a backup cursor open"); - /* - * The hot backup copy is done outside of WiredTiger, which means file - * blocks can't be freed and re-allocated until the backup completes. - * The checkpoint code checks the backup flag, and if a backup cursor - * is open checkpoints aren't discarded. We release the lock as soon - * as we've set the flag, we don't want to block checkpoints, we just - * want to make sure no checkpoints are deleted. The checkpoint code - * holds the lock until it's finished the checkpoint, otherwise we - * could start a hot backup that would race with an already-started - * checkpoint. - * - * We are holding the checkpoint and schema locks so schema operations - * will not see the backup file list until it is complete and valid. - */ - __wt_writelock(session, &conn->hot_backup_lock); - conn->hot_backup = true; - conn->hot_backup_list = NULL; - __wt_writeunlock(session, &conn->hot_backup_lock); + if (F_ISSET(session, WT_SESSION_BACKUP_DUP) && other != NULL) + WT_RET_MSG(session, EINVAL, + "there is already a duplicate backup cursor open"); - /* We're the lock holder, we own cleanup. */ - F_SET(cb, WT_CURBACKUP_LOCKER); + if (other == NULL) { + /* + * The hot backup copy is done outside of WiredTiger, which + * means file blocks can't be freed and re-allocated until the + * backup completes. The checkpoint code checks the backup flag, + * and if a backup cursor is open checkpoints aren't discarded. + * We release the lock as soon as we've set the flag, we don't + * want to block checkpoints, we just want to make sure no + * checkpoints are deleted. The checkpoint code holds the lock + * until it's finished the checkpoint, otherwise we could start + * a hot backup that would race with an already-started + * checkpoint. + * + * We are holding the checkpoint and schema locks so schema + * operations will not see the backup file list until it is + * complete and valid. + */ + __wt_writelock(session, &conn->hot_backup_lock); + conn->hot_backup = true; + conn->hot_backup_list = NULL; + __wt_writeunlock(session, &conn->hot_backup_lock); + + /* We're the lock holder, we own cleanup. */ + F_SET(cb, WT_CURBACKUP_LOCKER); + + /* + * Create a temporary backup file. This must be opened before + * generating the list of targets in backup_uri. This file will + * later be renamed to the correct name depending on whether or + * not we're doing an incremental backup. We need a temp file + * so that if we fail or crash while filling it, the existence + * of a partial file doesn't confuse restarting in the source + * database. + */ + WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, + WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); + } - /* - * Create a temporary backup file. This must be opened before - * generating the list of targets in backup_uri. This file will - * later be renamed to the correct name depending on whether or not - * we're doing an incremental backup. We need a temp file so that if - * we fail or crash while filling it, the existence of a partial file - * doesn't confuse restarting in the source database. - */ - WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, - WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); /* * If targets were specified, add them to the list. Otherwise it is a * full backup, add all database objects and log files to the list. */ target_list = false; WT_ERR(__backup_uri(session, cfg, &target_list, &log_only)); + /* + * For a duplicate cursor, all the work is done in backup_uri. The only + * usage accepted is "target=("log:")" so error if not log only. + */ + if (other != NULL) { + if (!log_only) + WT_ERR_MSG(session, EINVAL, + "duplicate backup cursor must be for logs only."); + F_SET(cb, WT_CURBACKUP_DUP); + F_SET(session, WT_SESSION_BACKUP_DUP); + goto done; + } if (!target_list) { /* * It's important to first gather the log files to be copied @@ -282,9 +327,15 @@ __backup_start( /* Add the hot backup and standard WiredTiger files to the list. */ if (log_only) { /* - * We also open an incremental backup source file so that we - * can detect a crash with an incremental backup existing in - * the source directory versus an improper destination. + * If this is not a duplicate cursor, using the log target is an + * incremental backup. If this is a duplicate cursor then using + * the log target on an existing backup cursor means this cursor + * returns the current list of log files. That list was set up + * when parsing the URI so we don't have anything to do here. + * + * We also open an incremental backup source file so that we can + * detect a crash with an incremental backup existing in the + * source directory versus an improper destination. */ dest = WT_INCREMENTAL_BACKUP; WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC, @@ -306,7 +357,8 @@ __backup_start( } err: /* Close the hot backup file. */ - WT_TRET(__wt_fclose(session, &cb->bfs)); + if (cb->bfs != NULL) + WT_TRET(__wt_fclose(session, &cb->bfs)); if (srcfs != NULL) WT_TRET(__wt_fclose(session, &srcfs)); if (ret == 0) { @@ -315,8 +367,10 @@ err: /* Close the hot backup file. */ __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = cb->list; __wt_writeunlock(session, &conn->hot_backup_lock); + F_SET(session, WT_SESSION_BACKUP_CURSOR); } +done: return (ret); } @@ -329,27 +383,26 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; - int i; conn = S2C(session); /* Release all btree names held by the backup. */ + WT_ASSERT(session, !F_ISSET(cb, WT_CURBACKUP_DUP)); + /* If it's not a dup backup cursor, make sure one isn't open. */ + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP)); __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = NULL; __wt_writeunlock(session, &conn->hot_backup_lock); - if (cb->list != NULL) { - for (i = 0; cb->list[i] != NULL; ++i) - __wt_free(session, cb->list[i]); - __wt_free(session, cb->list); - } + __backup_free(session, cb); /* Remove any backup specific file. */ WT_TRET(__wt_backup_file_remove(session)); - /* Checkpoint deletion can proceed, as can the next hot backup. */ + /* Checkpoint deletion and next hot backup can proceed. */ __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup = false; __wt_writeunlock(session, &conn->hot_backup_lock); + F_CLR(session, WT_SESSION_BACKUP_CURSOR); return (ret); } diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile index 60b6c4690b0..567a8cb92fb 100644 --- a/src/third_party/wiredtiger/src/docs/Doxyfile +++ b/src/third_party/wiredtiger/src/docs/Doxyfile @@ -210,7 +210,7 @@ ALIASES = \ configempty{2}="@param config configuration string, see @ref config_strings. No values currently permitted." \ configend=" </table>" \ configstart{2}="@param config configuration string, see @ref config_strings. Permitted values:\n <table>@hrow{Name,Effect,Values}" \ -"ebusy_errors=@returns zero on success, EBUSY if the object is not available for exclusive access, and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \ + ebusy_errors="@returns zero on success, EBUSY if the object is not available for exclusive access, and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \ errors="@returns zero on success and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \ exclusive="This method requires exclusive access to the specified data source(s). If any cursors are open with the specified name(s) or a data source is otherwise in use, the call will fail and return \c EBUSY.\n\n" \ ex_ref{1}="@ref \1 \"\1\"" \ diff --git a/src/third_party/wiredtiger/src/docs/compression.dox b/src/third_party/wiredtiger/src/docs/compression.dox index 74bed5c6f68..147c4fe2936 100644 --- a/src/third_party/wiredtiger/src/docs/compression.dox +++ b/src/third_party/wiredtiger/src/docs/compression.dox @@ -121,7 +121,7 @@ an extension. For example, with the WiredTiger library installed in @snippet ex_all.c Configure zstd extension -The default compression level for the zstd compression is 3; compression +The default compression level for the zstd compression is 6; compression can be configured to other levels using the additional configuration argument \c compression_level. diff --git a/src/third_party/wiredtiger/src/docs/cursors.dox b/src/third_party/wiredtiger/src/docs/cursors.dox index b6271951f91..6244f02514a 100644 --- a/src/third_party/wiredtiger/src/docs/cursors.dox +++ b/src/third_party/wiredtiger/src/docs/cursors.dox @@ -63,7 +63,8 @@ At \c read-committed (the default) or \c snapshot isolation levels, committed changes from concurrent transactions become visible when no cursor is positioned. In other words, at these isolation levels, all cursors in a session read from a stable snapshot while any cursor in the -session remains positioned. +session remains positioned. A call to WT_CURSOR::next or WT_CURSOR::prev +on a positioned cursor will not update the snapshot. Cursor positions survive transaction boundaries, unless a transaction is rolled back. When a transaction is rolled-back either implicitly diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index d5347709248..b336b5ab17a 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -152,6 +152,7 @@ colgroups combinatorial command's comparator +compressibility cond config configurign @@ -355,7 +356,6 @@ nolock nolocking nommap nop -noraw nosql nosync notgranted @@ -382,9 +382,9 @@ perf petabyte pget php +pid plantuml png -pid posix pre prepends @@ -465,7 +465,6 @@ substring superset sys sz -tid tRuE tablename tcl @@ -473,6 +472,7 @@ tcmalloc td th thang +tid timestamp timestamps todo diff --git a/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox b/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox index 96b0fda2333..9659d389bf0 100644 --- a/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox +++ b/src/third_party/wiredtiger/src/docs/tune-page-size-and-comp.dox @@ -107,7 +107,7 @@ trade-off between frequency of exclusive access to the pages (for reconciliation or splitting pages into smaller pages) versus the duration that the exclusive access is required. - Configuration: -\n Specified as memory_page_max configuration option to WT_SESSION::create(). An +\n Specified as memory_page_max configuration option to WT_SESSION::create. An example of such a configuration string is as follows: <pre> @@ -141,7 +141,7 @@ number of keys that can be stored in an internal page, which is internal_page_max divided by key size. Applications should choose an appropriate internal_page_max size that avoids the B-Tree from getting too deep. - Configuration: -\n Specified as internal_page_max configuration option to WT_SESSION::create(). +\n Specified as internal_page_max configuration option to WT_SESSION::create. An example of such a configuration string is as follows: <pre> @@ -167,7 +167,7 @@ increase leaf_page_max to transfer more data per I/O. - Applications focused on read/write amplification might decrease the page size to better match the underlying storage block size. - Configuration: -\n Specified as leaf_page_max configuration option to WT_SESSION::create(). An +\n Specified as leaf_page_max configuration option to WT_SESSION::create. An example of such a configuration string is as follows: <pre> @@ -193,7 +193,7 @@ on the platform (4KB for most common server platforms) 18,000 bytes requires 5 allocation units and wastes about 2KB of space. If the allocation size is 16KB, the same overflow item would waste more than 10KB. - Configuration: -\n Specified as allocation_size configuration option to WT_SESSION::create(). An +\n Specified as allocation_size configuration option to WT_SESSION::create. An example of such a configuration string is as follows: <pre> @@ -260,7 +260,7 @@ size will be ignored when the larger keys and values are being written, and a larger page will be created as necessary. - Configuration: \n Specified as internal_key_max, leaf_key_max and leaf_value_max configuration -options to WT_SESSION::create(). An example of configuration string for a large +options to WT_SESSION::create. An example of configuration string for a large leaf overflow value: <pre> @@ -287,7 +287,7 @@ pages grow to 1MB over and over. The default value for split_pct is 75%, intended to keep large pages relatively large, while still giving split pages room to grow. - Configuration: -\n Specified as split_pct configuration option to WT_SESSION::create(). An +\n Specified as split_pct configuration option to WT_SESSION::create. An example of such a configuration string is as follows: <pre> @@ -297,11 +297,11 @@ example of such a configuration string is as follows: @section compression_considerations Compression considerations WiredTiger compresses data at several stages to preserve memory and disk space. Applications can configure these different compression algorithms to tailor -their requirements between memory, disk and CPU consumption. Compression +their requirements between memory, disk and CPU consumption. Compression algorithms other than block compression work by modifying how the keys and -values are represented, and hence reduce data size in-memory and on-disk. Block -compression on the other hand compress the data in its binary representation -while saving it on the disk. +values are represented, and will reduce data size both in-memory and on-disk. +Block compression compresses the data in its binary representation while +saving it on the disk, and so only reduces the data size on-disk. Configuring compression may change application throughput. For example, in applications using solid-state drives (where I/O is less expensive), turning @@ -310,102 +310,90 @@ applications where I/O costs are more expensive, turning on compression may increase application performance by reducing the overall number of I/O operations. -WiredTiger uses some internal algorithms to compress the amount of data stored -that are not configurable, but always on. For example, run-length reduces the -size requirement by storing sequential, duplicate values in the store only a -single time (with an associated count). +WiredTiger uses some internal algorithms to compress the amount of data +stored that are not configurable, but always on. For example, run-length +encoding reduces in-memory and on-disk size requirements by storing +sequential, duplicate values in a column-store object only a single time. Different compression options available with WiredTiger: - - Key-prefix - - Reduces the size requirement by storing any identical key prefix only once -per page. The cost is additional CPU and memory when operating on the in-memory -tree. Specifically, reverse sequential cursor movement (but not forward) through -a prefix-compressed page or the random lookup of a key/value pair will allocate -sufficient memory to hold some number of uncompressed keys. So, for example, if -key prefix compression only saves a small number of bytes per key, the -additional memory cost of instantiating the uncompressed key may mean prefix -compression is not worthwhile. Further, in cases where the on-disk cost is the -primary concern, block compression may mean prefix compression is less useful. - - Configuration: -\n Specified as prefix_compression configuration option to -WT_SESSION::create(). Applications may limit the use of prefix compression by -configuring the minimum number of bytes that must be gained before prefix -compression is used with prefix_compression_min configuration option. An example -of such a configuration string is as follows: - +- Key-prefix +\n +Reduces the size requirement by storing any identical key prefix +only once per page. The cost is additional CPU and memory when operating on +the in-memory tree. Specifically, reverse sequential cursor movement (but not +forward) through a prefix-compressed page or the random lookup of a key/value +pair will allocate sufficient memory to hold some number of uncompressed keys. +So, for example, if key prefix compression only saves a small number of bytes +per key, the additional memory cost of instantiating the uncompressed key may +mean prefix compression is not worthwhile. Further, in cases where the on-disk +cost is the primary concern, block compression may mean prefix compression is +less useful. +\n\n +Key-prefix configuration: +\n +Specified using \c the prefix_compression configuration option to +WT_SESSION::create. Applications may limit the use of prefix compression +by configuring the minimum number of bytes that must be gained before +prefix compression is used with the \c prefix_compression_min configuration +option. An example of such a configuration string is as follows: <pre> - "key_format=S,value_format=S,prefix_compression=true,prefix_compression_min=7" +"key_format=S,value_format=S,prefix_compression=true,prefix_compression_min=7" </pre> - - Dictionary - - Reduces the size requirement by storing any identical value only once per -page. - - Configuration: -\n Specified as dictionary configuration configuration option to -WT_SESSION::create(), which specifies the maximum number of unique values +- Dictionary +\n +Reduces the size requirement by storing any identical value only once per page. +\n\n +Dictionary configuration: +\n +Specified using the \c dictionary configuration configuration option to +WT_SESSION::create, which specifies the maximum number of unique values remembered in the B-Tree row-store leaf page value dictionary. An example of such a configuration string is as follows: - <pre> - "key_format=S,value_format=S,dictionary=1000" +"key_format=S,value_format=S,dictionary=1000" </pre> - - Huffman - - Reduces the size requirement by compressing individual key/value items, and -can be separately configured either or both keys and values. The additional CPU -cost of Huffman encoding can be high, and should be considered. (See Huffman -Encoding for details.) - - Configuration: -\n Specified as huffman_key and/or huffman_value configuration option to -WT_SESSION::create(). These options can take values of "english" (to use a -built-in English language frequency table), "utf8<file>" or "utf16<file>" (to -use a custom utf8 or utf16 symbol frequency table file). An example of such a -configuration string is as follows: - +- Huffman +\n +Reduces the size requirement by compressing individual key/value items, and +can be separately configured either or both keys and values. The additional +CPU cost of Huffman encoding can be high, and should be considered. (See @ref +huffman for details.) +\n\n +Huffman configuration: +\n +Specified using the \c huffman_key and \c huffman_value configuration +options to WT_SESSION::create. These options can take values of "english" +(to use a built-in English language frequency table), "utf8<file>" or +"utf16<file>" (to use a custom UTF-8 or UTF-16 symbol frequency table file). +An example of such a configuration string is as follows: <pre> - "key_format=S,value_format=S,huffman_key=english,huffman_value=english" +"key_format=S,value_format=S,huffman_key=english,huffman_value=english" </pre> - - Block Compression - - Reduces the size requirement of on-disk objects by compressing blocks of +- Block Compression +\n +Reduces the size requirement of on-disk objects by compressing blocks of the backing object's file. The additional CPU cost of block compression can be high, and should be considered. When block compression has been configured, configured page sizes will not match the actual size of the page on disk. - - WiredTiger provides two methods of compressing your data when using block -compression: the raw and noraw methods. These methods change how WiredTiger -works to fit data into the blocks that are stored on disk. Applications needing -to write specific sized blocks may want to consider implementing a -WT_COMPRESSOR::compress_raw function. - - Noraw compression: -\n A fixed amount of data is given to the compression system, then turned into -a compressed block of data. The amount of data chosen to compress is the data -needed to fill the uncompressed block. Thus when compressed, the block will be -smaller than the normal data size and the sizes written to disk will often vary -depending on how compressible the data being stored is. Algorithms using noraw -compression include zlib-noraw, lz4-noraw and snappy. -Noraw compression is better suited for workloads with random access patterns -because each block will tend to be smaller and require less work to read and -decompress. - - Raw compression: -\n WiredTiger's raw compression takes advantage of compressors that provide a -streaming compression API. Using the streaming API WiredTiger will try to fit as -much data as possible into one block. This means that blocks created with raw -compression should be of similar size. Using a streaming compression method -should also make for less overhead in compression, as the setup and initial work -for compressing is done fewer times compared to the amount of data stored. -Algorithms using raw compression include zlib, lz4. -Compared to noraw, raw compression provides more compression while using more -CPU. Raw compression may provide a performance advantage in workloads where data -is accessed sequentially. That is because more data is generally packed into -each block on disk. - - Configuration: -\n Specified as the block_compressor configuration option to -WT_SESSION::create(). If WiredTiger has builtin support for "lz4", "snappy", -"zlib" or "zstd" compression, these names are available as the value to the -option. An example of such a configuration string is as follows: - +\n +When block compression is configured, chunks of data are given to the +compression system, then returned as a compressed block of data. The amount of +data chosen to compress is based on previous compression results. When +compressed, the size written to disk will vary depending on the compressibility +of the stored data. +\n\n +Block compression configuration: +\n +Specified using the \c block_compressor configuration option to +WT_SESSION::create. If WiredTiger was built with support for "lz4", "snappy", +"zlib" or "zstd" compression, these names are available as the value to +the configuration option. An example of such a configuration string is as +follows: <pre> - "key_format=S,value_format=S,block_compressor=snappy" +"key_format=S,value_format=S,block_compressor=snappy" </pre> See @ref compression for further information on how to configure and enable diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index 3428e2781d9..5895650ca5e 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -4,6 +4,17 @@ @section version_311 Upgrading to Version 3.1.1 <dl> +<dt>Raw compression</dt> +<dd> +Support for "raw compression" has been removed in the 3.1.1 release. Only +applications configuring their own compressors will require modification, +those applications should remove their initialization of the \c WT_COMPRESSOR +structure's \c WT_COMPRESSOR::compress_raw field. Applications configuring +the "lz4" or "zlib" compressors (the existing WiredTiger compressors that +supported raw compression), will work without change, but applications may +see different compression ratios from previous releases. +</dd> + <dt>WiredTiger timestamps</dt> <dd> In previous releases of WiredTiger, it was possible to disable timestamp @@ -11,6 +22,7 @@ support as well as to configure a timestamp size different from the 64-bit default, using the <code>--with-timestamp-size=X</code> configuration option. That is no longer the case, in the 3.1.1 release, timestamps are always configured, and are always 64-bit unsigned integers. +</dd> <dt>WT_CURSOR::modify transaction requirements</dt> <dd> diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 214d13b0206..cea27e1b26f 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -818,29 +818,3 @@ __wt_page_cell_data_ref(WT_SESSION_IMPL *session, { return (__cell_data_ref(session, page, page->type, unpack, store)); } - -/* - * __wt_cell_data_copy -- - * Copy the data from an unpacked cell into a buffer. - */ -static inline int -__wt_cell_data_copy(WT_SESSION_IMPL *session, - int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store) -{ - /* - * We have routines to both copy and reference a cell's information. In - * most cases, all we need is a reference and we prefer that, especially - * when returning key/value items. In a few we need a real copy: call - * the standard reference function and get a reference. In some cases, - * a copy will be made (for example, when reading an overflow item from - * the underlying object. If that happens, we're done, otherwise make - * a copy. - * - * We don't require two versions of this function, no callers need to - * handle WT_CELL_VALUE_OVFL_RM cells. - */ - WT_RET(__wt_dsk_cell_data_ref(session, page_type, unpack, store)); - if (!WT_DATA_IN_ITEM(store)) - WT_RET(__wt_buf_set(session, store, store->data, store->size)); - return (0); -} diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index bb96b8d18b0..5fb0cee2b91 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -341,6 +341,7 @@ struct __wt_connection_impl { WT_LOG *log; /* Logging structure */ WT_COMPRESSOR *log_compressor;/* Logging compressor */ uint32_t log_cursors; /* Log cursor count */ + wt_off_t log_dirty_max; /* Log dirty system cache max size */ wt_off_t log_file_max; /* Log file max size */ const char *log_path; /* Logging path format */ uint32_t log_prealloc; /* Log file pre-allocation */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 55a4cdb7a13..bac2a1d7a20 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -80,7 +80,8 @@ struct __wt_cursor_backup { size_t list_next; /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_CURBACKUP_LOCKER 0x1u /* Hot-backup started */ +#define WT_CURBACKUP_DUP 0x1u /* Duplicated backup cursor */ +#define WT_CURBACKUP_LOCKER 0x2u /* Hot-backup started */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint8_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index ca7f0597f88..d4a72bc340b 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -298,7 +298,7 @@ extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_ extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_sweep_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index b75f375dbee..389a33db8be 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -211,10 +211,11 @@ struct __wt_logslot { WT_ITEM slot_buf; /* Buffer for grouped writes */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_SLOT_CLOSEFH 0x1u /* Close old fh on release */ -#define WT_SLOT_FLUSH 0x2u /* Wait for write */ -#define WT_SLOT_SYNC 0x4u /* Needs sync on release */ -#define WT_SLOT_SYNC_DIR 0x8u /* Directory sync on release */ +#define WT_SLOT_CLOSEFH 0x01u /* Close old fh on release */ +#define WT_SLOT_FLUSH 0x02u /* Wait for write */ +#define WT_SLOT_SYNC 0x04u /* Needs sync on release */ +#define WT_SLOT_SYNC_DIR 0x08u /* Directory sync on release */ +#define WT_SLOT_SYNC_DIRTY 0x10u /* Sync system buffers on release */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; WT_CACHE_LINE_PAD_END @@ -222,6 +223,11 @@ struct __wt_logslot { #define WT_SLOT_INIT_FLAGS 0 +#define WT_SLOT_SYNC_FLAGS \ + (WT_SLOT_SYNC | \ + WT_SLOT_SYNC_DIR | \ + WT_SLOT_SYNC_DIRTY) + #define WT_WITH_SLOT_LOCK(session, log, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ WT_WITH_LOCK_WAIT(session, \ @@ -267,6 +273,7 @@ struct __wt_log { WT_LSN alloc_lsn; /* Next LSN for allocation */ WT_LSN bg_sync_lsn; /* Latest background sync LSN */ WT_LSN ckpt_lsn; /* Last checkpoint LSN */ + WT_LSN dirty_lsn; /* LSN of last non-synced write */ WT_LSN first_lsn; /* First LSN */ WT_LSN sync_dir_lsn; /* LSN of the last directory sync */ WT_LSN sync_lsn; /* LSN of the last sync */ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 092f2259edd..294ca1503a5 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -163,30 +163,32 @@ struct __wt_session_impl { u_int stat_bucket; /* Statistics bucket offset */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_SESSION_CACHE_CURSORS 0x000001u -#define WT_SESSION_CAN_WAIT 0x000002u -#define WT_SESSION_IGNORE_CACHE_SIZE 0x000004u -#define WT_SESSION_INTERNAL 0x000008u -#define WT_SESSION_LOCKED_CHECKPOINT 0x000010u -#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x000020u -#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x000040u -#define WT_SESSION_LOCKED_METADATA 0x000080u -#define WT_SESSION_LOCKED_PASS 0x000100u -#define WT_SESSION_LOCKED_SCHEMA 0x000200u -#define WT_SESSION_LOCKED_SLOT 0x000400u -#define WT_SESSION_LOCKED_TABLE_READ 0x000800u -#define WT_SESSION_LOCKED_TABLE_WRITE 0x001000u -#define WT_SESSION_LOCKED_TURTLE 0x002000u -#define WT_SESSION_LOGGING_INMEM 0x004000u -#define WT_SESSION_LOOKASIDE_CURSOR 0x008000u -#define WT_SESSION_NO_DATA_HANDLES 0x010000u -#define WT_SESSION_NO_LOGGING 0x020000u -#define WT_SESSION_NO_RECONCILE 0x040000u -#define WT_SESSION_NO_SCHEMA_LOCK 0x080000u -#define WT_SESSION_QUIET_CORRUPT_FILE 0x100000u -#define WT_SESSION_READ_WONT_NEED 0x200000u -#define WT_SESSION_SCHEMA_TXN 0x400000u -#define WT_SESSION_SERVER_ASYNC 0x800000u +#define WT_SESSION_BACKUP_CURSOR 0x0000001u +#define WT_SESSION_BACKUP_DUP 0x0000002u +#define WT_SESSION_CACHE_CURSORS 0x0000004u +#define WT_SESSION_CAN_WAIT 0x0000008u +#define WT_SESSION_IGNORE_CACHE_SIZE 0x0000010u +#define WT_SESSION_INTERNAL 0x0000020u +#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u +#define WT_SESSION_LOCKED_METADATA 0x0000200u +#define WT_SESSION_LOCKED_PASS 0x0000400u +#define WT_SESSION_LOCKED_SCHEMA 0x0000800u +#define WT_SESSION_LOCKED_SLOT 0x0001000u +#define WT_SESSION_LOCKED_TABLE_READ 0x0002000u +#define WT_SESSION_LOCKED_TABLE_WRITE 0x0004000u +#define WT_SESSION_LOCKED_TURTLE 0x0008000u +#define WT_SESSION_LOGGING_INMEM 0x0010000u +#define WT_SESSION_LOOKASIDE_CURSOR 0x0020000u +#define WT_SESSION_NO_DATA_HANDLES 0x0040000u +#define WT_SESSION_NO_LOGGING 0x0080000u +#define WT_SESSION_NO_RECONCILE 0x0100000u +#define WT_SESSION_NO_SCHEMA_LOCK 0x0200000u +#define WT_SESSION_QUIET_CORRUPT_FILE 0x0400000u +#define WT_SESSION_READ_WONT_NEED 0x0800000u +#define WT_SESSION_SCHEMA_TXN 0x1000000u +#define WT_SESSION_SERVER_ASYNC 0x2000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 3714d03d1ce..9ac1e6c619a 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -791,9 +791,6 @@ struct __wt_dsrc_stats { int64_t compress_write; int64_t compress_write_fail; int64_t compress_write_too_small; - int64_t compress_raw_fail_temporary; - int64_t compress_raw_fail; - int64_t compress_raw_ok; int64_t cursor_insert_bulk; int64_t cursor_cache; int64_t cursor_create; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index f04ad40693e..bf96e953ec1 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1168,6 +1168,15 @@ struct __wt_session { * @config{ enabled, if false\, this object has * checkpoint-level durability., a boolean flag; default \c true.} * @config{ ),,} + * @config{os_cache_dirty_max, maximum dirty system buffer cache usage\, + * in bytes. If non-zero\, schedule writes for dirty blocks belonging + * to this object in the system buffer cache after that many bytes from + * this object are written into the buffer cache., an integer greater + * than or equal to 0; default \c 0.} + * @config{os_cache_max, maximum system buffer cache usage\, in bytes. + * If non-zero\, evict object blocks from the system buffer cache after + * that many bytes from this object are read or written into the buffer + * cache., an integer greater than or equal to 0; default \c 0.} * @configend * @errors */ @@ -2281,6 +2290,12 @@ struct __wt_connection { * configuration options defined below.} * @config{ archive, automatically archive * unneeded log files., a boolean flag; default \c true.} + * @config{ os_cache_dirty_pct, maximum dirty + * system buffer cache usage\, as a percentage of the log's \c file_max. + * If non-zero\, schedule writes for dirty blocks belonging to the log + * in the system buffer cache after that percentage of the log has been + * written into the buffer cache without an intervening file sync., an + * integer between 0 and 100; default \c 0.} * @config{ prealloc, pre-allocate log files., a * boolean flag; default \c true.} * @config{ zero_fill, manually write zeroes into @@ -2938,9 +2953,15 @@ struct __wt_connection { * logging subsystem., a boolean flag; default \c false.} * @config{ file_max, the maximum size of log files., an * integer between 100KB and 2GB; default \c 100MB.} - * @config{ path, the name of a directory into which log - * files are written. The directory must already exist. If the value is not an - * absolute path\, the path is relative to the database home (see @ref + * @config{ os_cache_dirty_pct, maximum dirty system + * buffer cache usage\, as a percentage of the log's \c file_max. If non-zero\, + * schedule writes for dirty blocks belonging to the log in the system buffer + * cache after that percentage of the log has been written into the buffer cache + * without an intervening file sync., an integer between 0 and 100; default \c + * 0.} + * @config{ path, the name of a directory into which + * log files are written. The directory must already exist. If the value is + * not an absolute path\, the path is relative to the database home (see @ref * absolute_path for more information)., a string; default \c ".".} * @config{ prealloc, pre-allocate log files., a boolean * flag; default \c true.} @@ -3612,10 +3633,43 @@ struct __wt_config_parser { uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t) WT_ATTRIBUTE_LIBRARY_VISIBLE; -/*! @} */ #endif /* !defined(SWIG) */ /*! + * Calculate a set of WT_MODIFY operations to represent an update. + * This call will calculate a set of modifications to an old value that produce + * the new value. If more modifications are required than fit in the array + * passed in by the caller, or if more bytes have changed than the \c maxdiff + * parameter, the call will fail. The matching algorithm is approximate, so it + * may fail and return WT_NOTFOUND if a matching set of WT_MODIFY operations + * is not found. + * + * The \c maxdiff parameter bounds how much work will be done searching for a + * match: to ensure a match is found, it may need to be set larger than actual + * number of bytes that differ between the old and new values. In particular, + * repeated patterns of bytes in the values can lead to suboptimal matching, + * and matching ranges less than 64 bytes long will not be detected. + * + * If the call succeeds, the WT_MODIFY operations will point into \c newv, + * which must remain valid until WT_CURSOR::modify is called. + * + * @snippet ex_all.c Calculate a modify operation + * + * @param session the current WiredTiger session (may be NULL) + * @param oldv old value + * @param newv new value + * @param maxdiff maximum bytes difference + * @param[out] entries array of modifications producing the new value + * @param[in,out] nentriesp size of \c entries array passed in, + * set to the number of entries used + * @errors + */ +int wiredtiger_calc_modify(WT_SESSION *session, + const WT_ITEM *oldv, const WT_ITEM *newv, + size_t maxdiff, WT_MODIFY *entries, int *nentriesp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; + +/*! * Get version information. * * @snippet ex_all.c Get the WiredTiger library version #1 @@ -3629,6 +3683,8 @@ uint32_t (*wiredtiger_crc32c_func(void))(const void *, size_t) const char *wiredtiger_version(int *majorp, int *minorp, int *patchp) WT_ATTRIBUTE_LIBRARY_VISIBLE; +/*! @} */ + /******************************************* * Error returns *******************************************/ @@ -3851,121 +3907,6 @@ struct __wt_compressor { size_t *result_lenp, int *compression_failed); /*! - * Callback to compress a list of byte strings. - * - * WT_COMPRESSOR::compress_raw gives applications fine-grained control - * over disk block size when writing row-store or variable-length - * column-store pages. Where this level of control is not required by - * the underlying storage device, set the WT_COMPRESSOR::compress_raw - * callback to \c NULL and WiredTiger will internally split each page - * into blocks, each block then compressed by WT_COMPRESSOR::compress. - * - * WT_COMPRESSOR::compress_raw takes a source buffer and an array of - * 0-based offsets of byte strings in that buffer. The callback then - * encodes none, some or all of the byte strings and copies the encoded - * representation into a destination buffer. The callback returns the - * number of byte strings encoded and the bytes needed for the encoded - * representation. The encoded representation has header information - * prepended and is written as a block to the underlying file object. - * - * On entry, \c page_max is the configured maximum size for objects of - * this type. (This value is provided for convenience, and will be - * either the \c internal_page_max or \c leaf_page_max value specified - * to WT_SESSION::create when the object was created.) - * - * On entry, \c split_pct is the configured Btree page split size for - * this object. (This value is provided for convenience, and will be - * the \c split_pct value specified to WT_SESSION::create when the - * object was created.) - * - * On entry, \c extra is a count of additional bytes that will be added - * to the encoded representation before it is written. In other words, - * if the target write size is 8KB, the returned encoded representation - * should be less than or equal to (8KB - \c extra). The method does - * not need to skip bytes in the destination buffer based on \c extra, - * the method should only use \c extra to decide how many bytes to store - * into the destination buffer for its ideal block size. - * - * On entry, \c src points to the source buffer; \c offsets is an array - * of \c slots 0-based offsets into \c src, where each offset is the - * start of a byte string, except for the last offset, which is the - * offset of the first byte past the end of the last byte string. (In - * other words, <code>offsets[0]</code> will be 0, the offset of the - * first byte of the first byte string in \c src, and - * <code>offsets[slots]</code> is the total length of all of the byte - * strings in the \c src buffer.) - * - * On entry, \c dst points to the destination buffer with a length - * of \c dst_len. If the WT_COMPRESSOR::pre_size method is specified, - * the destination buffer will be at least the size returned by that - * method; otherwise, the destination buffer will be at least as large - * as the length of the data to compress. - * - * After successful completion, the callback should return \c 0, and - * set \c result_slotsp to the number of byte strings encoded and - * \c result_lenp to the bytes needed for the encoded representation. - * - * There is no requirement the callback encode any or all of the byte - * strings passed by WiredTiger. If the callback does not encode any - * of the byte strings and compression should not be retried, the - * callback should set \c result_slotsp to 0. - * - * If the callback does not encode any of the byte strings and - * compression should be retried with additional byte strings, the - * callback must return \c EAGAIN. In that case, WiredTiger will - * accumulate more rows and repeat the call. - * - * If there are no more rows to accumulate or the callback indicates - * that it cannot be retried, WiredTiger writes the remaining rows - * using \c WT_COMPRESSOR::compress. - * - * On entry, \c final is zero if there are more rows to be written as - * part of this page (if there will be additional data provided to the - * callback), and non-zero if there are no more rows to be written as - * part of this page. If \c final is set and the callback fails to - * encode any rows, WiredTiger writes the remaining rows without further - * calls to the callback. If \c final is set and the callback encodes - * any number of rows, WiredTiger continues to call the callback until - * all of the rows are encoded or the callback fails to encode any rows. - * - * The WT_COMPRESSOR::compress_raw callback is intended for applications - * wanting to create disk blocks in specific sizes. - * WT_COMPRESSOR::compress_raw is not a replacement for - * WT_COMPRESSOR::compress: objects which WT_COMPRESSOR::compress_raw - * cannot handle (for example, overflow key or value items), or which - * WT_COMPRESSOR::compress_raw chooses not to compress for any reason - * (for example, if WT_COMPRESSOR::compress_raw callback chooses not to - * compress a small number of rows, but the page being written has no - * more rows to accumulate), will be passed to WT_COMPRESSOR::compress. - * - * The WT_COMPRESSOR::compress_raw callback is only called for objects - * where it is applicable, that is, for row-store and variable-length - * column-store objects, where both row-store key prefix compression - * and row-store and variable-length column-store dictionary compression - * are \b not configured. When WT_COMPRESSOR::compress_raw is not - * applicable, the WT_COMPRESSOR::compress callback is used instead. - * - * @param[in] page_max the configured maximum page size for this object - * @param[in] split_pct the configured page split size for this object - * @param[in] extra the count of the additional bytes - * @param[in] src the data to compress - * @param[in] offsets the byte offsets of the byte strings in src - * @param[in] slots the number of entries in offsets - * @param[in] dst the destination buffer - * @param[in] dst_len the length of the destination buffer - * @param[in] final non-zero if there are no more rows to accumulate - * @param[out] result_lenp the length of the compressed data - * @param[out] result_slotsp the number of byte offsets taken - * @returns zero for success, non-zero to indicate an error. - */ - int (*compress_raw)(WT_COMPRESSOR *compressor, WT_SESSION *session, - size_t page_max, int split_pct, size_t extra, - uint8_t *src, uint32_t *offsets, uint32_t slots, - uint8_t *dst, size_t dst_len, - int final, - size_t *result_lenp, uint32_t *result_slotsp); - - /*! * Callback to decompress a chunk of data. * * WT_COMPRESSOR::decompress takes a source buffer and a destination @@ -6095,89 +6036,83 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2102 /*! compression: page written was too small to compress */ #define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2103 -/*! compression: raw compression call failed, additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2104 -/*! compression: raw compression call failed, no additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2105 -/*! compression: raw compression call succeeded */ -#define WT_STAT_DSRC_COMPRESS_RAW_OK 2106 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2107 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2104 /*! cursor: close calls that result in cache */ -#define WT_STAT_DSRC_CURSOR_CACHE 2108 +#define WT_STAT_DSRC_CURSOR_CACHE 2105 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2109 +#define WT_STAT_DSRC_CURSOR_CREATE 2106 /*! cursor: cursor operation restarted */ -#define WT_STAT_DSRC_CURSOR_RESTART 2110 +#define WT_STAT_DSRC_CURSOR_RESTART 2107 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2111 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2108 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2112 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2109 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2113 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2110 /*! cursor: cursors reused from cache */ -#define WT_STAT_DSRC_CURSOR_REOPEN 2114 +#define WT_STAT_DSRC_CURSOR_REOPEN 2111 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2115 +#define WT_STAT_DSRC_CURSOR_INSERT 2112 /*! cursor: modify calls */ -#define WT_STAT_DSRC_CURSOR_MODIFY 2116 +#define WT_STAT_DSRC_CURSOR_MODIFY 2113 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2117 +#define WT_STAT_DSRC_CURSOR_NEXT 2114 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2118 +#define WT_STAT_DSRC_CURSOR_PREV 2115 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2119 +#define WT_STAT_DSRC_CURSOR_REMOVE 2116 /*! cursor: reserve calls */ -#define WT_STAT_DSRC_CURSOR_RESERVE 2120 +#define WT_STAT_DSRC_CURSOR_RESERVE 2117 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2121 +#define WT_STAT_DSRC_CURSOR_RESET 2118 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2122 +#define WT_STAT_DSRC_CURSOR_SEARCH 2119 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2123 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2120 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2124 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2121 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2125 +#define WT_STAT_DSRC_CURSOR_UPDATE 2122 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2126 +#define WT_STAT_DSRC_REC_DICTIONARY 2123 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2127 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2124 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2128 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2125 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2129 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2126 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2130 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2127 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2131 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2128 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2132 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2129 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2133 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2130 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2134 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2131 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2135 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2132 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2136 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2133 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2137 +#define WT_STAT_DSRC_REC_PAGES 2134 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2138 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2135 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2139 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2136 /*! session: cached cursor count */ -#define WT_STAT_DSRC_SESSION_CURSORS_CACHED 2140 +#define WT_STAT_DSRC_SESSION_CURSORS_CACHED 2137 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2141 +#define WT_STAT_DSRC_SESSION_COMPACT 2138 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2142 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2139 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2143 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2140 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 690c5841ac8..9e520084e3c 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -1348,6 +1348,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) log->write_lsn = end_lsn; log->write_start_lsn = end_lsn; } + log->dirty_lsn = log->alloc_lsn; if (created != NULL) *created = create_log; return (0); @@ -2053,7 +2054,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * responsible for freeing the slot in that case. Otherwise the * worker thread will free it. */ - if (!F_ISSET(slot, WT_SLOT_FLUSH | WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) { + if (!F_ISSET(slot, WT_SLOT_FLUSH | WT_SLOT_SYNC_FLAGS)) { if (freep != NULL) *freep = 0; slot->slot_state = WT_LOG_SLOT_WRITTEN; @@ -2090,6 +2091,16 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) if (F_ISSET(slot, WT_SLOT_CLOSEFH)) __wt_cond_signal(session, conn->log_file_cond); + if (F_ISSET(slot, WT_SLOT_SYNC_DIRTY) && !F_ISSET(slot, WT_SLOT_SYNC) && + (ret = __wt_fsync(session, log->log_fh, false)) != 0) { + /* + * Ignore ENOTSUP, but don't try again. + */ + if (ret != ENOTSUP) + WT_ERR(ret); + conn->log_dirty_max = 0; + } + /* * Try to consolidate calls to fsync to wait less. Acquire a spin lock * so that threads finishing writing to the log will wait while the diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 2d9f1a04017..d7280f17c47 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -204,6 +204,38 @@ retry: } /* + * __log_slot_dirty_max_check -- + * If we've passed the maximum of dirty system pages, schedule an + * asynchronous sync that will be performed when this slot is written. + */ +static void +__log_slot_dirty_max_check(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) +{ + WT_CONNECTION_IMPL *conn; + WT_LOG *log; + WT_LSN *current, *last_sync; + + if (S2C(session)->log_dirty_max == 0) + return; + + conn = S2C(session); + log = conn->log; + current = &slot->slot_release_lsn; + + if (__wt_log_cmp(&log->dirty_lsn, &log->sync_lsn) < 0) + last_sync = &log->sync_lsn; + else + last_sync = &log->dirty_lsn; + if (current->l.file == last_sync->l.file && + current->l.offset > last_sync->l.offset && + current->l.offset - last_sync->l.offset > conn->log_dirty_max) { + /* Schedule the asynchronous sync */ + F_SET(slot, WT_SLOT_SYNC_DIRTY); + log->dirty_lsn = slot->slot_release_lsn; + } +} + +/* * __log_slot_new -- * Find a free slot and switch it as the new active slot. * Must be called holding the slot lock. @@ -263,6 +295,7 @@ __log_slot_new(WT_SESSION_IMPL *session) */ log->active_slot = slot; log->pool_index = pool_i; + __log_slot_dirty_max_check(session, slot); return (0); } } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index d3caa23ed02..2b2026f87cc 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -66,22 +66,6 @@ typedef struct { bool leave_dirty; /* - * Raw compression (don't get me started, as if normal reconciliation - * wasn't bad enough). If an application wants absolute control over - * what gets written to disk, we give it a list of byte strings and it - * gives us back an image that becomes a file block. Because we don't - * know the number of items we're storing in a block until we've done - * a lot of work, we turn off most compression: dictionary, copy-cell, - * prefix and row-store internal page suffix compression are all off. - */ - bool raw_compression; - uint32_t raw_max_slots; /* Raw compression array sizes */ - uint32_t *raw_entries; /* Raw compression slot entries */ - uint32_t *raw_offsets; /* Raw compression slot offsets */ - uint64_t *raw_recnos; /* Raw compression recno count */ - WT_ITEM raw_destination; /* Raw compression destination buffer */ - - /* * Track if reconciliation has seen any overflow items. If a leaf page * with no overflow items is written, the parent page's address cell is * set to the leaf-no-overflow type. This means we can delete the leaf @@ -94,10 +78,9 @@ typedef struct { * they contain overflow items. In other words, leaf-no-overflow is not * guaranteed to be set on every page that doesn't contain an overflow * item, only that if it is set, the page contains no overflow items. - * - * The reason is because of raw compression: there's no easy/fast way to - * figure out if the rows selected by raw compression included overflow - * items, and the optimization isn't worth another pass over the data. + * XXX + * This was originally done because raw compression couldn't do better, + * now that raw compression has been removed, we should do better. */ bool ovfl_items; @@ -121,17 +104,9 @@ typedef struct { * when the disk image we create exceeds the page type's maximum disk * image size. * - * First, the sizes of the page we're building. If WiredTiger is doing - * page layout, page_size is the same as page_size_orig. We accumulate - * a "page size" of raw data and when we reach that size, we split the - * page into multiple chunks, eventually compressing those chunks. When - * the application is doing page layout (raw compression is configured), - * page_size can continue to grow past page_size_orig, and we keep - * accumulating raw data until the raw compression callback accepts it. + * First, the target size of the page we're building. */ - uint32_t page_size; /* Set page size */ - uint32_t page_size_orig; /* Saved set page size */ - uint32_t max_raw_page_size; /* Max page size with raw compression */ + uint32_t page_size; /* Page size */ /* * Second, the split size: if we're doing the page layout, split to a @@ -814,61 +789,6 @@ err: __wt_page_out(session, &next); } /* - * __rec_raw_compression_config -- - * Configure raw compression. - */ -static inline bool -__rec_raw_compression_config(WT_SESSION_IMPL *session, - uint32_t flags, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage) -{ - WT_BTREE *btree; - - btree = S2BT(session); - - /* Check if raw compression configured. */ - if (btree->compressor == NULL || - btree->compressor->compress_raw == NULL) - return (false); - - /* Only for row-store and variable-length column-store objects. */ - if (page->type == WT_PAGE_COL_FIX) - return (false); - - /* - * XXX - * Turn off if lookaside or update/restore are configured: those modes - * potentially write blocks without entries and raw compression isn't - * ready for that. - */ - if (LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE)) - return (false); - - /* - * Raw compression cannot support dictionary compression. (Technically, - * we could still use the raw callback on column-store variable length - * internal pages with dictionary compression configured, because - * dictionary compression only applies to column-store leaf pages, but - * that seems an unlikely use case.) - */ - if (btree->dictionary != 0) - return (false); - - /* Raw compression cannot support prefix compression. */ - if (btree->prefix_compression) - return (false); - - /* - * Raw compression is also turned off during salvage: we can't allow - * pages to split during salvage, raw compression has no point if it - * can't manipulate the page size. - */ - if (salvage != NULL) - return (false); - - return (true); -} - -/* * __rec_init -- * Initialize the reconciliation structure. */ @@ -1003,11 +923,6 @@ __rec_init(WT_SESSION_IMPL *session, /* Track if the page can be marked clean. */ r->leave_dirty = false; - /* Raw compression. */ - r->raw_compression = - __rec_raw_compression_config(session, flags, page, salvage); - r->raw_destination.flags = WT_ITEM_ALIGNED; - /* Track overflow items. */ r->ovfl_items = false; @@ -1061,13 +976,9 @@ __rec_init(WT_SESSION_IMPL *session, * implement suffix compression for custom collators, we can add a * setting to the collator, configured when the collator is added, that * turns on suffix compression. - * - * The raw compression routines don't even consider suffix compression, - * but it doesn't hurt to confirm that. */ r->key_sfx_compress_conf = false; - if (btree->collator == NULL && - btree->internal_key_truncate && !r->raw_compression) + if (btree->collator == NULL && btree->internal_key_truncate) r->key_sfx_compress_conf = true; r->is_bulk_load = false; @@ -1128,11 +1039,6 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) return; *(WT_RECONCILE **)reconcilep = NULL; - __wt_free(session, r->raw_entries); - __wt_free(session, r->raw_offsets); - __wt_free(session, r->raw_recnos); - __wt_buf_free(session, &r->raw_destination); - __wt_buf_free(session, &r->chunkA.key); __wt_buf_free(session, &r->chunkA.min_key); __wt_buf_free(session, &r->chunkA.image); @@ -2167,8 +2073,7 @@ __rec_need_split(WT_RECONCILE *r, size_t len) len += r->supd_memsize; /* Check for the disk image crossing a boundary. */ - return (r->raw_compression ? - len > r->space_avail : WT_CHECK_CROSSING_BND(r, len)); + return (WT_CHECK_CROSSING_BND(r, len)); } /* @@ -2270,26 +2175,9 @@ __rec_split_init(WT_SESSION_IMPL *session, if (r->salvage != NULL) max = __rec_leaf_page_max(session, r); - /* - * Set the page sizes. If we're doing the page layout, the maximum page - * size is the same as the page size. If the application is doing page - * layout (raw compression is configured), we accumulate some amount of - * additional data because we don't know how well it will compress, and - * we don't want to increment our way up to the amount of data needed by - * the application to successfully compress to the target page size. - * Ideally accumulate data several times the page size without - * approaching the memory page maximum, but at least have data worth - * one page. - * - * There are cases when we grow the page size to accommodate large - * records, in those cases we split the pages once they have crossed - * the maximum size for a page with raw compression. - */ - r->page_size = r->page_size_orig = (uint32_t)max; - if (r->raw_compression) - r->max_raw_page_size = r->page_size = - (uint32_t)WT_MIN((uint64_t)r->page_size * 10, - WT_MAX((uint64_t)r->page_size, btree->maxmempage / 2)); + /* Set the page sizes. */ + r->page_size = (uint32_t)max; + /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2320,14 +2208,12 @@ __rec_split_init(WT_SESSION_IMPL *session, * increasing the size of the last page written without decreasing the * penultimate page size beyond the minimum split size. * - * Finally, all this doesn't matter for fixed-size column-store pages, - * raw compression, and salvage. Fixed-size column store pages can - * split under (very) rare circumstances, but they're allocated at a - * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not our - * problem. In salvage, as noted above, we can't split at all. - */ - if (r->raw_compression || r->salvage != NULL) { + * Finally, all this doesn't matter for fixed-size column-store pages + * and salvage. Fixed-size column store pages can split under (very) + * rare circumstances, but they're allocated at a fixed page size, never + * anything smaller. In salvage, as noted above, we can't split at all. + */ + if (r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } else if (page->type == WT_PAGE_COL_FIX) { @@ -2420,34 +2306,6 @@ __rec_is_checkpoint(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __rec_split_row_promote_cell -- - * Get a key from a cell for the purposes of promotion. - */ -static int -__rec_split_row_promote_cell(WT_SESSION_IMPL *session, - WT_RECONCILE *r, WT_PAGE_HEADER *dsk, WT_ITEM *key) -{ - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *kpack, _kpack; - - btree = S2BT(session); - kpack = &_kpack; - - /* - * The cell had better have a zero-length prefix and not be a copy cell; - * the first cell on a page cannot refer an earlier cell on the page. - */ - cell = WT_PAGE_HEADER_BYTE(btree, dsk); - __wt_cell_unpack(cell, kpack); - WT_ASSERT(session, - kpack->prefix == 0 && kpack->raw != WT_CELL_VALUE_COPY); - - WT_RET(__wt_cell_data_copy(session, r->page->type, kpack, key)); - return (0); -} - -/* * __rec_split_row_promote -- * Key promotion for a row-store. */ @@ -2752,390 +2610,6 @@ __rec_split_crossing_bnd( } /* - * __rec_split_raw -- - * Raw compression. - */ -static int -__rec_split_raw(WT_SESSION_IMPL *session, - WT_RECONCILE *r, size_t next_len, bool no_more_rows) -{ - WT_BM *bm; - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - WT_CHUNK *chunk, *next, *tmp; - WT_COMPRESSOR *compressor; - WT_DECL_RET; - WT_ITEM *dst; - WT_PAGE *page; - WT_PAGE_HEADER *dsk; - WT_SESSION *wt_session; - size_t corrected_page_size, extra_skip, len, result_len; - uint64_t recno; - uint32_t entry, i, max_image_slot, result_slots, slots; - uint8_t *next_start; - bool compressed, last_block; - - wt_session = (WT_SESSION *)session; - btree = S2BT(session); - bm = btree->bm; - - unpack = &_unpack; - compressor = btree->compressor; - dst = &r->raw_destination; - page = r->page; - compressed = false; - - chunk = r->cur_ptr; - if (r->prev_ptr == NULL) - r->prev_ptr = &r->chunkB; - next = r->prev_ptr; - - /* - * We can get here if the first key/value pair won't fit. - */ - if (r->entries == 0 && !__rec_need_split(r, 0)) - goto split_grow; - - /* - * Build arrays of offsets and cumulative counts of cells and rows in - * the page: the offset is the byte offset to the possible split-point - * (adjusted for an initial chunk that cannot be compressed), entries - * is the cumulative page entries covered by the byte offset, recnos is - * the cumulative rows covered by the byte offset. Allocate to handle - * both column- and row-store regardless of this page type, structures - * are potentially reused for subsequent reconciliations of different - * page types. - */ - if (r->entries >= r->raw_max_slots) { - __wt_free(session, r->raw_entries); - __wt_free(session, r->raw_offsets); - __wt_free(session, r->raw_recnos); - r->raw_max_slots = 0; - - i = r->entries + 100; - WT_RET(__wt_calloc_def(session, i, &r->raw_entries)); - WT_RET(__wt_calloc_def(session, i, &r->raw_offsets)); - WT_RET(__wt_calloc_def(session, i, &r->raw_recnos)); - r->raw_max_slots = i; - } - - /* - * Walk the disk image looking for places where we can split it, which - * requires setting the number of entries. - */ - dsk = chunk->image.mem; - dsk->u.entries = r->entries; - - /* - * We track the record number at each column-store split point, set an - * initial value. - */ - recno = WT_RECNO_OOB; - if (page->type == WT_PAGE_COL_VAR) - recno = chunk->recno; - - entry = max_image_slot = slots = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - ++entry; - - /* - * Row-store pages can split at keys, but not at values, - * column-store pages can split at values. - */ - __wt_cell_unpack(cell, unpack); - switch (unpack->type) { - case WT_CELL_KEY: - case WT_CELL_KEY_OVFL: - case WT_CELL_KEY_SHORT: - break; - case WT_CELL_ADDR_DEL: - case WT_CELL_ADDR_INT: - case WT_CELL_ADDR_LEAF: - case WT_CELL_ADDR_LEAF_NO: - case WT_CELL_DEL: - case WT_CELL_VALUE: - case WT_CELL_VALUE_OVFL: - case WT_CELL_VALUE_SHORT: - if (page->type == WT_PAGE_COL_INT) { - recno = unpack->v; - break; - } - if (page->type == WT_PAGE_COL_VAR) { - recno += __wt_cell_rle(unpack); - break; - } - r->raw_entries[slots] = entry; - continue; - WT_ILLEGAL_VALUE(session, unpack->type); - } - - /* - * We can't compress the first 64B of the block (it must be - * written without compression), and a possible split point - * may appear in that 64B; keep it simple, ignore the first - * allocation size of data, anybody splitting smaller than - * that (as calculated before compression), is doing it wrong. - */ - if ((len = WT_PTRDIFF(cell, dsk)) > btree->allocsize) - r->raw_offsets[++slots] = - WT_STORE_SIZE(len - WT_BLOCK_COMPRESS_SKIP); - - if (page->type == WT_PAGE_COL_INT || - page->type == WT_PAGE_COL_VAR) - r->raw_recnos[slots] = recno; - r->raw_entries[slots] = entry; - - /* - * Don't create an image so large that any future update will - * cause a split in memory. - */ - if (max_image_slot == 0 && len > (size_t)r->max_raw_page_size) - max_image_slot = slots; - } - - /* - * If we haven't managed to find at least one split point, we're done, - * don't bother calling the underlying compression function. - */ - if (slots == 0) { - result_slots = 0; - goto no_slots; - } - - /* The slot at array's end is the total length of the data. */ - r->raw_offsets[++slots] = - WT_STORE_SIZE(WT_PTRDIFF(cell, dsk) - WT_BLOCK_COMPRESS_SKIP); - - /* - * Allocate a destination buffer. If there's a pre-size function, call - * it to determine the destination buffer's size, else the destination - * buffer is documented to be at least the source size. (We can't use - * the target page size, any single key/value could be larger than the - * page size. Don't bother figuring out a minimum, just use the source - * size.) - * - * The destination buffer needs to be large enough for the final block - * size, corrected for the requirements of the underlying block manager. - * If the final block size is 8KB, that's a multiple of 512B and so the - * underlying block manager is fine with it. But... we don't control - * what the pre_size method returns us as a required size, and we don't - * want to document the compress_raw method has to skip bytes in the - * buffer because that's confusing, so do something more complicated. - * First, find out how much space the compress_raw function might need, - * either the value returned from pre_size, or the initial source size. - * Add the compress-skip bytes, and then correct that value for the - * underlying block manager. As a result, we have a destination buffer - * that's large enough when calling the compress_raw method, and there - * are bytes in the header just for us. - */ - if (compressor->pre_size == NULL) - result_len = (size_t)r->raw_offsets[slots]; - else - WT_RET(compressor->pre_size(compressor, wt_session, - (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, - (size_t)r->raw_offsets[slots], &result_len)); - extra_skip = btree->kencryptor == NULL ? 0 : - btree->kencryptor->size_const + WT_ENCRYPT_LEN_SIZE; - - corrected_page_size = result_len + WT_BLOCK_COMPRESS_SKIP; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, dst, corrected_page_size)); - - /* - * Copy the header bytes into the destination buffer, then call the - * compression function. - */ - memcpy(dst->mem, dsk, WT_BLOCK_COMPRESS_SKIP); - ret = compressor->compress_raw(compressor, wt_session, - r->page_size_orig, btree->split_pct, - WT_BLOCK_COMPRESS_SKIP + extra_skip, - (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets, - max_image_slot == 0 ? slots : max_image_slot, - (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP, - result_len, - no_more_rows || max_image_slot != 0, - &result_len, &result_slots); - switch (ret) { - case EAGAIN: - /* - * The compression function wants more rows, accumulate and - * retry if possible. - * - * First, reset the resulting slots count, just in case the - * compression function modified it before giving up. - */ - result_slots = 0; - - /* - * If the image is too large and there are more rows to gather, - * act as if the compression engine gave up on this chunk of - * data. That doesn't make sense (we flagged the engine that we - * wouldn't give it any more rows, but it's a possible return). - */ - if (no_more_rows || max_image_slot == 0) - break; - /* FALLTHROUGH */ - case 0: - /* - * If the compression function returned zero result slots, it's - * giving up and we write the original data. (This is a pretty - * bad result: we've not done compression on a block much larger - * than the maximum page size, but once compression gives up, - * there's not much else we can do.) - * - * If the compression function returned non-zero result slots, - * we were successful and have a block to write. - */ - if (result_slots == 0) { - WT_STAT_DATA_INCR(session, compress_raw_fail); - - /* - * If there are no more rows, we can write the original - * data from the original buffer, else take all but the - * last row of the original data (the last row has to be - * set as the key for the next block). - */ - if (!no_more_rows) - result_slots = slots - 1; - } else { - WT_STAT_DATA_INCR(session, compress_raw_ok); - - /* - * If there are more rows and the compression function - * consumed all of the current data, there are problems: - * First, with row-store objects, we're potentially - * skipping updates, we must have a key for the next - * block so we know with what block a skipped update is - * associated. Second, if the compression function - * compressed all of the data, we're not pushing it - * hard enough (unless we got lucky and gave it exactly - * the right amount to work with, which is unlikely). - * Handle both problems by accumulating more data any - * time we're not writing the last block and compression - * ate all of the rows. - */ - if (result_slots == slots && !no_more_rows) - result_slots = 0; - else { - /* - * Finalize the compressed disk image's - * information. - */ - dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; - - compressed = true; - } - } - break; - default: - return (ret); - } - -no_slots: - /* - * Check for the last block we're going to write: if no more rows and - * we failed to compress anything, or we compressed everything, it's - * the last block. - */ - last_block = no_more_rows && - (result_slots == 0 || result_slots == slots); - - if (!last_block && result_slots != 0) { - /* - * Writing the current (possibly compressed), chunk. - * Finalize the current chunk's information. - */ - chunk->image.size = (size_t) - r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; - chunk->entries = r->raw_entries[result_slots - 1]; - - /* Move any remnant to the next chunk. */ - len = WT_PTRDIFF(r->first_free, - (uint8_t *)dsk + chunk->image.size); - WT_ASSERT(session, len > 0); - WT_RET(__rec_split_chunk_init( - session, r, next, chunk->image.memsize)); - next_start = WT_PAGE_HEADER_BYTE(btree, next->image.mem); - (void)memcpy(next_start, r->first_free - len, len); - - /* Set the key for the next chunk. */ - switch (page->type) { - case WT_PAGE_COL_INT: - next->recno = r->raw_recnos[result_slots]; - break; - case WT_PAGE_COL_VAR: - next->recno = r->raw_recnos[result_slots - 1]; - break; - case WT_PAGE_ROW_INT: - case WT_PAGE_ROW_LEAF: - next->recno = WT_RECNO_OOB; - /* - * Confirm there was uncompressed data remaining - * in the buffer, we're about to read it for the - * next chunk's initial key. - */ - WT_RET(__rec_split_row_promote_cell( - session, r, next->image.mem, &next->key)); - break; - } - - /* Update the tracking information. */ - r->entries -= r->raw_entries[result_slots - 1]; - r->first_free = next_start + len; - r->space_avail += r->raw_offsets[result_slots]; - WT_ASSERT(session, r->first_free + r->space_avail <= - (uint8_t *)next->image.mem + next->image.memsize); - } else if (no_more_rows) { - /* - * No more rows to accumulate, writing the entire chunk. - * Finalize the current chunk's information. - */ - chunk->image.size = WT_PTRDIFF32(r->first_free, dsk); - chunk->entries = r->entries; - - /* Clear the tracking information. */ - r->entries = 0; - r->first_free = NULL; - r->space_avail = 0; - } else { - /* - * Compression failed, there are more rows to accumulate and the - * compression function wants to try again; increase the size of - * the "page" and try again after we accumulate some more rows. - */ - WT_STAT_DATA_INCR(session, compress_raw_fail_temporary); - goto split_grow; - } - - /* Write the chunk. */ - WT_RET(__rec_split_write(session, r, - r->cur_ptr, compressed ? dst : NULL, last_block)); - - /* Switch chunks. */ - tmp = r->prev_ptr; - r->prev_ptr = r->cur_ptr; - r->cur_ptr = tmp; - - /* - * We got called because there wasn't enough room in the buffer for the - * next key and we might or might not have written a block. In any case, - * make sure the next key fits into the buffer. - */ - if (r->space_avail < next_len) { -split_grow: /* - * Double the page size and make sure we accommodate at least - * one more record. The reason for the latter is that we may - * be here because there's a large key/value pair that won't - * fit in our initial page buffer, even at its expanded size. - */ - r->page_size *= 2; - return (__rec_split_grow(session, r, r->page_size + next_len)); - } - return (0); -} - -/* * __rec_split_finish_process_prev -- * If the two split chunks together fit in a single page, merge them into * one. If they do not fit in a single page but the last is smaller than @@ -3237,30 +2711,10 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r) static int __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BTREE *btree; - size_t data_size; - - btree = S2BT(session); - /* - * We're done reconciling, write the final page. Call raw compression - * until/unless there's not enough data to compress. - */ - if (r->entries != 0 && r->raw_compression) { - while (r->entries != 0) { - data_size = - WT_PTRDIFF(r->first_free, r->cur_ptr->image.mem); - if (data_size <= btree->allocsize) - break; - WT_RET(__rec_split_raw(session, r, 0, true)); - } - if (r->entries == 0) - return (0); - } - - /* - * We may arrive here with no entries to write if the page was entirely - * empty or if nothing on the page was visible to us. + * We're done reconciling, write the final page. We may arrive here with + * no entries to write if the page was entirely empty or if nothing on + * the page was visible to us. * * Pages with skipped or not-yet-globally visible updates aren't really * empty; otherwise, the page is truly empty and we will merge it into @@ -3274,8 +2728,8 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem); r->cur_ptr->entries = r->entries; - /* If not raw compression, potentially reconsider a previous chunk. */ - if (!r->raw_compression && r->prev_ptr != NULL) + /* Potentially reconsider a previous chunk. */ + if (r->prev_ptr != NULL) WT_RET(__rec_split_finish_process_prev(session, r)); /* Write the remaining data/last page. */ @@ -3915,26 +3369,21 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) - WT_RET(__rec_split_raw( - session, r, key->len + val->len, false)); - } else - if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { - /* - * Turn off prefix compression until a full key written - * to the new page, and (unless already working with an - * overflow key), rebuild the key without compression. - */ - if (r->key_pfx_compress_conf) { - r->key_pfx_compress = false; - if (!ovfl_key) - WT_RET(__rec_cell_build_leaf_key( - session, r, NULL, 0, &ovfl_key)); - } - WT_RET(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { + /* + * Turn off prefix compression until a full key written to the + * new page, and (unless already working with an overflow key), + * rebuild the key without compression. + */ + if (r->key_pfx_compress_conf) { + r->key_pfx_compress = false; + if (!ovfl_key) + WT_RET(__rec_cell_build_leaf_key( + session, r, NULL, 0, &ovfl_key)); } + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); + } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -4083,12 +3532,8 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (val->len > r->space_avail) - WT_RET(__rec_split_raw(session, r, val->len, false)); - } else - if (WT_CROSSING_SPLIT_BND(r, val->len)) - WT_RET(__rec_split_crossing_bnd(session, r, val->len)); + if (WT_CROSSING_SPLIT_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -4224,14 +3669,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (__rec_need_split(r, val->len)) { - if (r->raw_compression) - WT_ERR(__rec_split_raw( - session, r, val->len, false)); - else - WT_ERR(__rec_split_crossing_bnd( - session, r, val->len)); - } + if (__rec_need_split(r, val->len)) + WT_ERR(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4273,14 +3712,8 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (__rec_need_split(r, val->len)) { - if (r->raw_compression) - WT_RET(__rec_split_raw( - session, r, val->len, false)); - else - WT_RET(__rec_split_crossing_bnd( - session, r, val->len)); - } + if (__rec_need_split(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4547,12 +3980,8 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (__rec_need_split(r, val->len)) { - if (r->raw_compression) - WT_RET(__rec_split_raw(session, r, val->len, false)); - else - WT_RET(__rec_split_crossing_bnd(session, r, val->len)); - } + if (__rec_need_split(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -5253,25 +4682,20 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* Boundary: split or write the page. */ if (__rec_need_split(r, key->len + val->len)) { - if (r->raw_compression) - WT_ERR(__rec_split_raw( - session, r, key->len + val->len, false)); - else { - /* - * In one path above, we copied address blocks - * from the page rather than building the actual - * key. In that case, we have to build the key - * now because we are about to promote it. - */ - if (key_onpage_ovfl) { - WT_ERR(__wt_buf_set(session, r->cur, - WT_IKEY_DATA(ikey), ikey->size)); - key_onpage_ovfl = false; - } - - WT_ERR(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + /* + * In one path above, we copied address blocks from the + * page rather than building the actual key. In that + * case, we have to build the key now because we are + * about to promote it. + */ + if (key_onpage_ovfl) { + WT_ERR(__wt_buf_set(session, r->cur, + WT_IKEY_DATA(ikey), ikey->size)); + key_onpage_ovfl = false; } + + WT_ERR(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } /* Copy the key and value onto the page. */ @@ -5322,14 +4746,9 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (__rec_need_split(r, key->len + val->len)) { - if (r->raw_compression) - WT_RET(__rec_split_raw( - session, r, key->len + val->len, false)); - else - WT_RET(__rec_split_crossing_bnd( - session, r, key->len + val->len)); - } + if (__rec_need_split(r, key->len + val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5664,40 +5083,31 @@ build: /* Boundary: split or write the page. */ if (__rec_need_split(r, key->len + val->len)) { - if (r->raw_compression) - WT_ERR(__rec_split_raw( - session, r, key->len + val->len, false)); - else { - /* - * If we copied address blocks from the page - * rather than building the actual key, we have - * to build the key now because we are about to - * promote it. - */ - if (key_onpage_ovfl) { - WT_ERR(__wt_dsk_cell_data_ref(session, - WT_PAGE_ROW_LEAF, kpack, r->cur)); - WT_NOT_READ(key_onpage_ovfl, false); - } - - /* - * Turn off prefix compression until a full key - * written to the new page, and (unless already - * working with an overflow key), rebuild the - * key without compression. - */ - if (r->key_pfx_compress_conf) { - r->key_pfx_compress = false; - if (!ovfl_key) - WT_ERR( - __rec_cell_build_leaf_key( - session, r, NULL, 0, - &ovfl_key)); - } + /* + * If we copied address blocks from the page rather than + * building the actual key, we have to build the key now + * because we are about to promote it. + */ + if (key_onpage_ovfl) { + WT_ERR(__wt_dsk_cell_data_ref(session, + WT_PAGE_ROW_LEAF, kpack, r->cur)); + WT_NOT_READ(key_onpage_ovfl, false); + } - WT_ERR(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + /* + * Turn off prefix compression until a full key written + * to the new page, and (unless already working with an + * overflow key), rebuild the key without compression. + */ + if (r->key_pfx_compress_conf) { + r->key_pfx_compress = false; + if (!ovfl_key) + WT_ERR(__rec_cell_build_leaf_key( + session, r, NULL, 0, &ovfl_key)); } + + WT_ERR(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } /* Copy the key/value pair onto the page. */ @@ -5806,28 +5216,20 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) /* Boundary: split or write the page. */ if (__rec_need_split(r, key->len + val->len)) { - if (r->raw_compression) - WT_RET(__rec_split_raw( - session, r, key->len + val->len, false)); - else { - /* - * Turn off prefix compression until a full key - * written to the new page, and (unless already - * working with an overflow key), rebuild the - * key without compression. - */ - if (r->key_pfx_compress_conf) { - r->key_pfx_compress = false; - if (!ovfl_key) - WT_RET( - __rec_cell_build_leaf_key( - session, r, NULL, 0, - &ovfl_key)); - } - - WT_RET(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + /* + * Turn off prefix compression until a full key written + * to the new page, and (unless already working with an + * overflow key), rebuild the key without compression. + */ + if (r->key_pfx_compress_conf) { + r->key_pfx_compress = false; + if (!ovfl_key) + WT_RET(__rec_cell_build_leaf_key( + session, r, NULL, 0, &ovfl_key)); } + + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } /* Copy the key/value pair onto the page. */ diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index c413fb21f32..0c0cb8f5b43 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -493,7 +493,7 @@ __session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, case 'b': if (WT_PREFIX_MATCH(uri, "backup:")) WT_RET(__wt_curbackup_open( - session, uri, cfg, cursorp)); + session, uri, other, cfg, cursorp)); break; case 's': if (WT_PREFIX_MATCH(uri, "statistics:")) @@ -570,10 +570,11 @@ __session_open_cursor(WT_SESSION *wt_session, WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; - bool statjoin; + bool dup_backup, statjoin; cursor = *cursorp = NULL; + dup_backup = false; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, open_cursor, config, cfg); @@ -589,11 +590,19 @@ __session_open_cursor(WT_SESSION *wt_session, if ((ret = __wt_cursor_cache_get( session, uri, to_dup, cfg, &cursor)) == 0) goto done; + + /* + * Detect if we're duplicating a backup cursor specifically. + * That needs special handling. + */ + if (to_dup != NULL && strcmp(to_dup->uri, "backup:") == 0) + dup_backup = true; WT_ERR_NOTFOUND_OK(ret); if (to_dup != NULL) { uri = to_dup->uri; - if (!WT_PREFIX_MATCH(uri, "colgroup:") && + if (!WT_PREFIX_MATCH(uri, "backup:") && + !WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "lsm:") && @@ -605,10 +614,10 @@ __session_open_cursor(WT_SESSION *wt_session, } WT_ERR(__session_open_cursor_int(session, uri, NULL, - statjoin ? to_dup : NULL, cfg, &cursor)); + statjoin || dup_backup ? to_dup : NULL, cfg, &cursor)); done: - if (to_dup != NULL && !statjoin) + if (to_dup != NULL && !statjoin && !dup_backup) WT_ERR(__wt_cursor_dup_position(to_dup, cursor)); *cursorp = cursor; diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c index a0f7de3179f..fda96c8efe2 100644 --- a/src/third_party/wiredtiger/src/support/scratch.c +++ b/src/third_party/wiredtiger/src/support/scratch.c @@ -128,7 +128,7 @@ __wt_buf_set_printable_format(WT_SESSION_IMPL *session, WT_DECL_RET; WT_PACK pack; const uint8_t *p, *end; - const char *retp, *sep; + const char *sep; p = (const uint8_t *)buffer; end = p + size; @@ -188,9 +188,13 @@ err: __wt_scr_free(session, &tmp); if (ret == 0) return ((const char *)buf->data); - retp = "failed to create printable output"; - __wt_err(session, ret, "%s", retp); - return (retp); + /* + * The byte string may not match the format (it happens if a formatted, + * internal row-store key is truncated, and then passed here by a page + * debugging routine). Our current callers aren't interested in error + * handling in such cases, return a byte string instead. + */ + return (__wt_buf_set_printable(session, buffer, size, buf)); } /* diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index a2019cd3aac..e46c4838063 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -107,9 +107,6 @@ static const char * const __stats_dsrc_desc[] = { "compression: compressed pages written", "compression: page written failed to compress", "compression: page written was too small to compress", - "compression: raw compression call failed, additional data available", - "compression: raw compression call failed, no additional data available", - "compression: raw compression call succeeded", "cursor: bulk-loaded cursor-insert calls", "cursor: close calls that result in cache", "cursor: create calls", @@ -293,9 +290,6 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->compress_write = 0; stats->compress_write_fail = 0; stats->compress_write_too_small = 0; - stats->compress_raw_fail_temporary = 0; - stats->compress_raw_fail = 0; - stats->compress_raw_ok = 0; stats->cursor_insert_bulk = 0; stats->cursor_cache = 0; stats->cursor_create = 0; @@ -480,9 +474,6 @@ __wt_stat_dsrc_aggregate_single( to->compress_write += from->compress_write; to->compress_write_fail += from->compress_write_fail; to->compress_write_too_small += from->compress_write_too_small; - to->compress_raw_fail_temporary += from->compress_raw_fail_temporary; - to->compress_raw_fail += from->compress_raw_fail; - to->compress_raw_ok += from->compress_raw_ok; to->cursor_insert_bulk += from->cursor_insert_bulk; to->cursor_cache += from->cursor_cache; to->cursor_create += from->cursor_create; @@ -700,10 +691,6 @@ __wt_stat_dsrc_aggregate( to->compress_write_fail += WT_STAT_READ(from, compress_write_fail); to->compress_write_too_small += WT_STAT_READ(from, compress_write_too_small); - to->compress_raw_fail_temporary += - WT_STAT_READ(from, compress_raw_fail_temporary); - to->compress_raw_fail += WT_STAT_READ(from, compress_raw_fail); - to->compress_raw_ok += WT_STAT_READ(from, compress_raw_ok); to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk); to->cursor_cache += WT_STAT_READ(from, cursor_cache); to->cursor_create += WT_STAT_READ(from, cursor_create); diff --git a/src/third_party/wiredtiger/test/csuite/random_abort/main.c b/src/third_party/wiredtiger/test/csuite/random_abort/main.c index e2f2820a63a..e4c50cedddd 100644 --- a/src/third_party/wiredtiger/test/csuite/random_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/random_abort/main.c @@ -348,7 +348,7 @@ main(int argc, char *argv[]) testutil_check(__wt_snprintf( buf, sizeof(buf),"%s/%s", home, fname)); while (stat(buf, &sb) != 0) - sleep(1); + testutil_sleep_wait(1, pid); ++i; } sleep(timeout); diff --git a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c index 63e457fe705..81cf528dded 100644 --- a/src/third_party/wiredtiger/test/csuite/schema_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/schema_abort/main.c @@ -1096,7 +1096,7 @@ main(int argc, char *argv[]) testutil_check(__wt_snprintf( statname, sizeof(statname), "%s/%s", home, ckpt_file)); while (stat(statname, &sb) != 0) - sleep(1); + testutil_sleep_wait(1, pid); sleep(timeout); sa.sa_handler = SIG_DFL; testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c index 837b3f400b2..765c49ad2ce 100644 --- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c @@ -676,7 +676,7 @@ main(int argc, char *argv[]) testutil_check(__wt_snprintf( statname, sizeof(statname), "%s/%s", home, ckpt_file)); while (stat(statname, &sb) != 0) - sleep(1); + testutil_sleep_wait(1, pid); sleep(timeout); sa.sa_handler = SIG_DFL; testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); diff --git a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c index fb246f87da1..e73f63cdd8a 100644 --- a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c @@ -32,13 +32,13 @@ * Test case description: Smoke-test the partial update construction. */ -#define DEBUG 0 +#define DEBUG 0 -#define DATASIZE 1024 +#define DATASIZE 1024 +#define MAX_MODIFY_ENTRIES 37 /* Maximum modify vectors */ -#define MAX_MODIFY_ENTRIES 37 -static WT_MODIFY entries[MAX_MODIFY_ENTRIES]; /* Entries vector */ -static int nentries; /* Entries count */ +static WT_MODIFY entries[1000]; /* Entries vector */ +static int nentries; /* Entries count */ /* * The replacement bytes array is 2x the maximum replacement string so we can @@ -213,11 +213,11 @@ slow_apply_api(WT_ITEM *orig) } /* - * diff -- - * Diff the two results. + * compare -- + * Compare two results. */ static void -diff(WT_ITEM *local, WT_ITEM *library) +compare(WT_ITEM *local, WT_ITEM *library) { #if DEBUG if (local->size != library->size || @@ -232,42 +232,92 @@ diff(WT_ITEM *local, WT_ITEM *library) local->data, library->data, local->size) == 0); } +static int nruns = 10000; + /* - * modify_init -- - * Initialize the buffers to a known state. + * modify_run + * Run some tests: + * 1. Create an initial value, a copy and a fake cursor to use with the + * WiredTiger routines. Generate a set of modify vectors and apply them to + * the item stored in the cursor using the modify apply API. Also apply the + * same modify vector to one of the copies using a helper routine written + * to test the modify API. The final value generated with the modify API + * and the helper routine should match. + * + * 2. Use the initial value and the modified value generated above as + * inputs into the calculate-modify API to generate a set of modify + * vectors. Apply this generated vector to the initial value using the + * modify apply API to obtain a final value. The final value generated + * should match the modified value that was used as input to the + * calculate-modify API. */ static void -modify_init(WT_ITEM *local, WT_ITEM *library) +modify_run(bool verbose) { + WT_CURSOR *cursor, _cursor; + WT_DECL_RET; + WT_ITEM *localA, _localA, *localB, _localB; size_t len; + int i, j; - len = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES); - testutil_check(__wt_buf_set(NULL, local, modify_repl, len)); - testutil_check(__wt_buf_set(NULL, library, modify_repl, len)); -} + /* Initialize the RNG. */ + __wt_random_init_seed(NULL, &rnd); -static int nruns = 1000; + /* Set up replacement information. */ + modify_repl_init(); -/* - * modify_run - * Run some tests. - */ -static void -modify_run(WT_CURSOR *cursor, WT_ITEM *local, bool verbose) -{ - int i, j; + /* We need three WT_ITEMs, one of them part of a fake cursor. */ + localA = &_localA; + memset(&_localA, 0, sizeof(_localA)); + localB = &_localB; + memset(&_localB, 0, sizeof(_localB)); + cursor = &_cursor; + memset(&_cursor, 0, sizeof(_cursor)); + cursor->value_format = "u"; for (i = 0; i < nruns; ++i) { - modify_init(local, &cursor->value); + /* Create an initial value. */ + len = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES); + testutil_check(__wt_buf_set(NULL, localA, modify_repl, len)); for (j = 0; j < 1000; ++j) { + /* Copy the current value into the second item. */ + testutil_check(__wt_buf_set( + NULL, localB, localA->data, localA->size)); + + /* + * Create a random set of modify vectors, run the + * underlying library modification function, then + * compare the result against our implementation + * of modify. + */ modify_build(); - - slow_apply_api(local); + testutil_check(__wt_buf_set( + NULL, &cursor->value, localA->data, localA->size)); testutil_check(__wt_modify_apply_api( NULL, cursor, entries, nentries)); - - diff(local, &cursor->value); + slow_apply_api(localA); + compare(localA, &cursor->value); + + /* + * Call the WiredTiger function to build a modification + * vector for the change, and repeat the test using the + * WiredTiger modification vector, then compare results + * against our implementation of modify. + */ + nentries = WT_ELEMENTS(entries); + ret = wiredtiger_calc_modify(NULL, + localB, localA, + WT_MAX(localB->size, localA->size) + 100, + entries, &nentries); + if (ret == WT_NOTFOUND) + continue; + testutil_check(ret); + testutil_check(__wt_buf_set( + NULL, &cursor->value, localB->data, localB->size)); + testutil_check(__wt_modify_apply_api( + NULL, cursor, entries, nentries)); + compare(localA, &cursor->value); } if (verbose) { printf("%d (%d%%)\r", i, (i * 100) / nruns); @@ -275,18 +325,17 @@ modify_run(WT_CURSOR *cursor, WT_ITEM *local, bool verbose) } } if (verbose) - printf("\n"); + printf("%d (100%%)\n", i); + + __wt_buf_free(NULL, localA); + __wt_buf_free(NULL, localB); + __wt_buf_free(NULL, &cursor->value); } int main(int argc, char *argv[]) { TEST_OPTS *opts, _opts; - WT_CURSOR *cursor, _cursor; - WT_ITEM *local, _local; - - if (testutil_is_flag_set("TESTUTIL_ENABLE_LONG_TESTS")) - nruns = 10000; opts = &_opts; memset(opts, 0, sizeof(*opts)); @@ -295,24 +344,8 @@ main(int argc, char *argv[]) testutil_check( wiredtiger_open(opts->home, NULL, "create", &opts->conn)); - /* Initialize the RNG. */ - __wt_random_init_seed(NULL, &rnd); - - /* Set up replacement information. */ - modify_repl_init(); - - /* We need two items, one of them hooked into fake cursor. */ - local = &_local; - memset(&_local, 0, sizeof(_local)); - cursor = &_cursor; - memset(&_cursor, 0, sizeof(_cursor)); - cursor->value_format = "u"; - /* Run the test. */ - modify_run(cursor, local, opts->verbose); - - __wt_buf_free(NULL, local); - __wt_buf_free(NULL, &cursor->value); + modify_run(opts->verbose); testutil_cleanup(opts); return (EXIT_SUCCESS); diff --git a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c index b813a50c458..53fdfe16bd6 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4156_metadata_salvage/main.c @@ -424,9 +424,6 @@ static void wt_open_corrupt(const char *) static void wt_open_corrupt(const char *sfx) { -#ifdef HAVE_ATTACH - WT_UNUSED(sfx); -#else WT_CONNECTION *conn; WT_DECL_RET; char buf[1024]; @@ -446,7 +443,6 @@ wt_open_corrupt(const char *sfx) fprintf(stderr, "OPEN_CORRUPT: wiredtiger_open returned %d\n", ret); testutil_assert(ret == WT_TRY_SALVAGE || ret == 0); -#endif exit (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c index 6ddad5c9063..25dd9e515db 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c @@ -315,62 +315,51 @@ main(int argc, char *argv[]) static const struct { u_int workers; u_int uris; + bool cache_cursors; } runs[] = { - { 1, 1}, - { 8, 1}, - { 16, 1}, - { 16, WT_ELEMENTS(uri_list)}, - {200, 100}, - {300, 100}, - {200, WT_ELEMENTS(uri_list)}, - {600, WT_ELEMENTS(uri_list)}, + { 1, 1, false}, + { 1, 1, true}, + { 8, 1, false}, + { 8, 1, true}, + { 16, 1, false}, + { 16, 1, true}, + { 16, WT_ELEMENTS(uri_list), false}, + { 16, WT_ELEMENTS(uri_list), true}, + {200, 100, false}, + {200, 100, true}, + {200, WT_ELEMENTS(uri_list), false}, + {200, WT_ELEMENTS(uri_list), true}, + {300, 100, false}, + {300, 100, true}, + {600, WT_ELEMENTS(uri_list), false}, + {600, WT_ELEMENTS(uri_list), true}, }; + WT_RAND_STATE rnd; u_int i, n; int ch; - bool run_long; (void)testutil_set_progname(argv); + __wt_random_init_seed(NULL, &rnd); - run_long = false; - while ((ch = __wt_getopt(argv[0], argc, argv, "av")) != EOF) { + while ((ch = __wt_getopt(argv[0], argc, argv, "v")) != EOF) { switch (ch) { - case 'a': - run_long = true; - break; case 'v': verbose = true; break; default: - fprintf(stderr, "usage: %s -a", argv[0]); + fprintf(stderr, "usage: %s [-v]\n", argv[0]); return (EXIT_FAILURE); } } - /* Ignore unless requested */ - if (!run_long && - !testutil_is_flag_set("TESTUTIL_ENABLE_LONG_TESTS")) - return (EXIT_SUCCESS); - (void)signal(SIGALRM, on_alarm); - /* - * This test takes 2 minutes per slot in the runs table, only do the - * first 2 and last 2 slots, unless specifically requested. - */ - n = WT_ELEMENTS(runs); - for (i = 0; i < 2; ++i) { - workers = runs[i].workers; - uris = runs[i].uris; - run(true); - run(false); - } - if (!run_long) - i = n - 2; - for (; i < n; ++i) { - workers = runs[i].workers; - uris = runs[i].uris; - run(true); - run(false); + /* Each test in the table runs for a minute, run 5 tests at random. */ + for (i = 0; i < 5; ++i) { + n = __wt_random(&rnd) % WT_ELEMENTS(runs); + workers = runs[n].workers; + uris = runs[n].uris; + run(runs[n].cache_cursors); } uri_teardown(); diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 481fccddba9..8c957e487ad 100644 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -737,6 +737,8 @@ tasks: # End of csuite test tasks + # Start of Python unit test tasks + - name: unit-test depends_on: - name: compile @@ -749,21 +751,141 @@ tasks: set -o errexit set -o verbose - if [ "Windows_NT" = "$OS" ]; then - ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 - elif [ "$(uname -s)" == "Darwin" ]; then - # Avoid /usr/bin/python, at least on macOS: with System Integrity - # Protection enabled, it ignores DYLD_LIBRARY_PATH and hence - # doesn't find the WiredTiger library in the local tree. - ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 - else # Ubuntu - # Change directory to where the local installed 'wt' binary is located, - # to avoid libtool generated 'wt' script from being selected by run.py, - # which invokes relink_command that tries to changing to a non-existed - # /data/mci/<uniq> directory, as 'make' is done by a separate 'compile' task. - cd .libs - ${python_test_env_vars|} python ../test/suite/run.py -v 2 ${smp_command|} 2>&1 - fi + # Only Windows and OS X variants are expected to run this task + # + # Avoid /usr/bin/python, at least on macOS: with System Integrity + # Protection enabled, it ignores DYLD_LIBRARY_PATH and hence + # doesn't find the WiredTiger library in the local tree. + ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 + + # Break out Python unit tests into multiple buckets/tasks based on test name and runtime + # The test/suite/run.py script can work out test names by casting each command argument + # with "test_" prefix and "*.py" postfix. + # + # One example: + # "test/suite/run.py [ab]" will be translated to testing "test_a*.py" and "test_b*.py" + + - name: unit-test-bucket00 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [ab] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket01 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [c] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket02 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [defg] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket03 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [hijk] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket04 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [lmnopq] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket05 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [rs] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket06 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [t] -v 2 ${smp_command|} 2>&1 + + - name: unit-test-bucket07 + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger" + script: | + set -o errexit + set -o verbose + + cd .libs + ${python_test_env_vars|} python ../test/suite/run.py [uvwxyz] -v 2 ${smp_command|} 2>&1 + + # End of Python unit test tasks - name: compile-windows-alt depends_on: @@ -884,7 +1006,14 @@ buildvariants: - name: csuite-wt2909-checkpoint-integrity-test - name: csuite-wt3338-partial-update-test - name: csuite-wt4333-handle-locks-test - - name: unit-test + - name: unit-test-bucket00 + - name: unit-test-bucket01 + - name: unit-test-bucket02 + - name: unit-test-bucket03 + - name: unit-test-bucket04 + - name: unit-test-bucket05 + - name: unit-test-bucket06 + - name: unit-test-bucket07 - name: fops - name: large-scale-test diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 9a32a96423d..41dae7fe364 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -392,12 +392,9 @@ config_compression(const char *conf_name) */ switch (mmrand(NULL, 1, 20)) { #ifdef HAVE_BUILTIN_EXTENSION_LZ4 - case 1: case 2: /* 10% lz4 */ + case 1: case 2: case 3: /* 15% lz4 */ cstr = "lz4"; break; - case 3: /* 5% lz4-no-raw */ - cstr = "lz4-noraw"; - break; #endif #ifdef HAVE_BUILTIN_EXTENSION_SNAPPY case 4: case 5: case 6: case 7: /* 30% snappy */ @@ -409,12 +406,9 @@ config_compression(const char *conf_name) case 10: case 11: case 12: case 13: /* 20% zlib */ cstr = "zlib"; break; - case 14: /* 5% zlib-no-raw */ - cstr = "zlib-noraw"; - break; #endif #ifdef HAVE_BUILTIN_EXTENSION_ZSTD - case 15: case 16: case 17: /* 15% zstd */ + case 14: case 15: case 16: case 17: /* 20% zstd */ cstr = "zstd"; break; #endif @@ -1138,16 +1132,14 @@ config_map_compression(const char *s, u_int *vp) *vp = COMPRESS_NONE; else if (strcmp(s, "lz4") == 0) *vp = COMPRESS_LZ4; - else if (strcmp(s, "lz4-noraw") == 0) - *vp = COMPRESS_LZ4_NO_RAW; - else if (strcmp(s, "lzo") == 0) - *vp = COMPRESS_LZO; + else if (strcmp(s, "lz4-noraw") == 0) /* CONFIG compatibility */ + *vp = COMPRESS_LZ4; else if (strcmp(s, "snappy") == 0) *vp = COMPRESS_SNAPPY; else if (strcmp(s, "zlib") == 0) *vp = COMPRESS_ZLIB; - else if (strcmp(s, "zlib-noraw") == 0) - *vp = COMPRESS_ZLIB_NO_RAW; + else if (strcmp(s, "zlib-noraw") == 0) /* CONFIG compatibility */ + *vp = COMPRESS_ZLIB; else if (strcmp(s, "zstd") == 0) *vp = COMPRESS_ZSTD; else diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index a64a808d968..44961edfbda 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -58,7 +58,7 @@ typedef struct { } CONFIG; #define COMPRESSION_LIST \ - "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw | zstd)" + "(none | lz4 | snappy | zlib | zstd)" static CONFIG c[] = { { "abort", diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 8c4132d137e..e9063674476 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -62,8 +62,6 @@ #define HELIUM_PATH \ EXTPATH "datasources/helium/.libs/libwiredtiger_helium.so" -#define LZO_PATH ".libs/lzo_compress.so" - #undef M #define M(v) ((v) * WT_MILLION) /* Million */ #undef KILOBYTE @@ -257,12 +255,9 @@ typedef struct { #define COMPRESS_NONE 1 #define COMPRESS_LZ4 2 -#define COMPRESS_LZ4_NO_RAW 3 -#define COMPRESS_LZO 4 -#define COMPRESS_SNAPPY 5 -#define COMPRESS_ZLIB 6 -#define COMPRESS_ZLIB_NO_RAW 7 -#define COMPRESS_ZSTD 8 +#define COMPRESS_SNAPPY 3 +#define COMPRESS_ZLIB 4 +#define COMPRESS_ZSTD 5 u_int c_compression_flag; /* Compression flag value */ u_int c_logging_compression_flag; /* Log compression flag value */ diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index b62885f0369..0c22e98bd3c 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -45,21 +45,12 @@ compressor(uint32_t compress_flag) case COMPRESS_LZ4: p ="lz4"; break; - case COMPRESS_LZ4_NO_RAW: - p ="lz4-noraw"; - break; - case COMPRESS_LZO: - p ="LZO1B-6"; - break; case COMPRESS_SNAPPY: p ="snappy"; break; case COMPRESS_ZLIB: p ="zlib"; break; - case COMPRESS_ZLIB_NO_RAW: - p ="zlib-noraw"; - break; case COMPRESS_ZSTD: p ="zstd"; break; @@ -269,10 +260,9 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) /* Extensions. */ CONFIG_APPEND(p, ",extensions=[" - "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", + "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "", - access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "", access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "", access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "", access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "", diff --git a/src/third_party/wiredtiger/test/suite/README b/src/third_party/wiredtiger/test/suite/README new file mode 100644 index 00000000000..e63fd6dfc64 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/README @@ -0,0 +1,13 @@ +The test/suite directory includes a collection of Python unit tests +that are expected to be executed when code change is introduced in this repo. + +These Python tests are broken down and grouped into multiple buckets/tasks +in Evergreen (CI system) configuration. See test/evergreen.yml for details. + +There is a plan to implement a mechanism to auto-group tests into buckets/tasks +based on history runtime of each test, and generate the Evergreen configuration +dynamically before each Evergreen build variant run, so that no mental overhead +is required when new tests is introduced into test/suite. (WT-4441) + +Before the above mentioned mechansim is put into place, please double check +test/evergreen.yml and test run logs to make sure new test are covered. diff --git a/src/third_party/wiredtiger/test/suite/test_alter04.py b/src/third_party/wiredtiger/test/suite/test_alter04.py new file mode 100644 index 00000000000..97978922848 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_alter04.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import make_scenarios + +# test_alter04.py +# Smoke-test the session alter operations. +# This test confirms os_cache_dirty_max and os_cache_max. +class test_alter04(wttest.WiredTigerTestCase): + name = "alter04" + entries = 100 + cache_alter=('1M', '100K') + # Settings for os_cache[_dirty]_max. + types = [ + ('file', dict(uri='file:', use_cg=False, use_index=False)), + ('lsm', dict(uri='lsm:', use_cg=False, use_index=False)), + ('table-cg', dict(uri='table:', use_cg=True, use_index=False)), + ('table-index', dict(uri='table:', use_cg=False, use_index=True)), + ('table-simple', dict(uri='table:', use_cg=False, use_index=False)), + ] + sizes = [ + ('default', dict(ocreate='')), + ('1M', dict(ocreate='1M')), + ('200K', dict(ocreate='200K')), + ] + reopen = [ + ('no-reopen', dict(reopen=False)), + ('reopen', dict(reopen=True)), + ] + settings = [ + ('cache', dict(setting='os_cache_max')), + ('cache_dirty', dict(setting='os_cache_dirty_max')), + ] + scenarios = make_scenarios(types, sizes, reopen, settings) + + def verify_metadata(self, metastr): + if metastr == '': + return + cursor = self.session.open_cursor('metadata:', None, None) + # + # Walk through all the metadata looking for the entries that are + # the file URIs for components of the table. + # + found = False + while True: + ret = cursor.next() + if ret != 0: + break + key = cursor.get_key() + check_meta = ((key.find("lsm:") != -1 or key.find("file:") != -1) \ + and key.find(self.name) != -1) + if check_meta: + value = cursor[key] + found = True + self.assertTrue(value.find(metastr) != -1) + cursor.close() + self.assertTrue(found == True) + + # Alter: Change the setting after creation + def test_alter04_cache(self): + uri = self.uri + self.name + create_params = 'key_format=i,value_format=i,' + complex_params = '' + # + # If we're not explicitly setting the parameter, then don't + # modify create_params to test using the default. + # + if self.ocreate != '': + new_param = '%s=%s' % (self.setting, self.ocreate) + create_params += '%s,' % new_param + complex_params += '%s,' % new_param + else: + # NOTE: This is hard-coding the default value. If the default + # changes then this will fail and need to be fixed. + new_param = '%s=0' % self.setting + + cgparam = '' + if self.use_cg or self.use_index: + cgparam = 'columns=(k,v),' + if self.use_cg: + cgparam += 'colgroups=(g0),' + + self.session.create(uri, create_params + cgparam) + # Add in column group or index settings. + if self.use_cg: + cgparam = 'columns=(v),' + suburi = 'colgroup:' + self.name + ':g0' + self.session.create(suburi, complex_params + cgparam) + if self.use_index: + suburi = 'index:' + self.name + ':i0' + self.session.create(suburi, complex_params + cgparam) + + # Put some data in table. + c = self.session.open_cursor(uri, None) + for k in range(self.entries): + c[k+1] = 1 + c.close() + + # Verify the string in the metadata + self.verify_metadata(new_param) + + # Run through all combinations of the alter commands + # for all allowed settings. + for a in self.cache_alter: + alter_param = '%s=%s' % (self.setting, a) + self.session.alter(uri, alter_param) + if self.reopen: + self.reopen_conn() + special = self.use_cg or self.use_index + if not special: + self.verify_metadata(alter_param) + else: + self.session.alter(suburi, alter_param) + self.verify_metadata(alter_param) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_backup04.py b/src/third_party/wiredtiger/test/suite/test_backup04.py index 13b2436d7ad..ba39d4aebfb 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup04.py +++ b/src/third_party/wiredtiger/test/suite/test_backup04.py @@ -37,7 +37,7 @@ from wtthread import op_thread # test_backup04.py # Utilities: wt backup -# Test cursor backup with target URIs +# Test incremental cursor backup. class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): dir='backup.dir' # Backup directory name logmax="100K" diff --git a/src/third_party/wiredtiger/test/suite/test_backup07.py b/src/third_party/wiredtiger/test/suite/test_backup07.py index b15ab274a8b..b2b6fcc80de 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup07.py +++ b/src/third_party/wiredtiger/test/suite/test_backup07.py @@ -51,16 +51,14 @@ class test_backup07(wttest.WiredTigerTestCase, suite_subprocess): return 'cache_size=1G,log=(archive=false,enabled,file_max=%s)' % \ self.logmax - # Run background inserts while running checkpoints and incremental backups - # repeatedly. + # Run background inserts while running checkpoints repeatedly. def test_backup07(self): log2 = "WiredTigerLog.0000000002" self.session.create(self.uri, "key_format=S,value_format=S") # Insert small amounts of data at a time stopping just after we - # cross into log file 2. That way we can add more operations into - # log file 2 during the full backup. + # cross into log file 2. loop = 0 c = self.session.open_cursor(self.uri) while not os.path.exists(log2): @@ -74,9 +72,7 @@ class test_backup07(wttest.WiredTigerTestCase, suite_subprocess): # Test a potential bug in full backups and creates. # We allow creates during backup because the file doesn't exist # when the backup metadata is created on cursor open and the newly - # created file is not in the cursor list. However, if using logging - # and the create and inserts/updates appear in a log file copied, - # then currently there will be an error opening the backup directory. + # created file is not in the cursor list. # Open up the backup cursor, create and add data to a new table # and then copy the files. @@ -94,13 +90,14 @@ class test_backup07(wttest.WiredTigerTestCase, suite_subprocess): c.close() self.session.log_flush('sync=on') - # Now copy the files returned by the backup cursor. This will - # include the log file that has updates for the newly created table. + # Now copy the files returned by the backup cursor. This should not + # include the newly created table. while True: ret = bkup_c.next() if ret != 0: break newfile = bkup_c.get_key() + self.assertNotEqual(newfile, self.newuri) sz = os.path.getsize(newfile) self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) shutil.copy(newfile, self.dir) diff --git a/src/third_party/wiredtiger/test/suite/test_backup10.py b/src/third_party/wiredtiger/test/suite/test_backup10.py new file mode 100644 index 00000000000..afac740999c --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_backup10.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +import os, shutil +from helper import compare_files +from suite_subprocess import suite_subprocess +from wtdataset import simple_key +from wtscenario import make_scenarios + +# test_backup10.py +# Test cursor backup with a duplicate backup cursor. +class test_backup10(wttest.WiredTigerTestCase, suite_subprocess): + dir='backup.dir' # Backup directory name + logmax="100K" + newuri="table:newtable" + uri="table:test" + nops=100 + + pfx = 'test_backup' + + # Create a large cache, otherwise this test runs quite slowly. + def conn_config(self): + return 'cache_size=1G,log=(archive=false,enabled,file_max=%s)' % \ + self.logmax + + # Run background inserts while running checkpoints repeatedly. + def test_backup10(self): + log2 = "WiredTigerLog.0000000002" + log3 = "WiredTigerLog.0000000003" + + self.session.create(self.uri, "key_format=S,value_format=S") + + # Insert small amounts of data at a time stopping after we + # cross into log file 2. + loop = 0 + c = self.session.open_cursor(self.uri) + while not os.path.exists(log2): + for i in range(0, self.nops): + num = i + (loop * self.nops) + key = 'key' + str(num) + val = 'value' + str(num) + c[key] = val + loop += 1 + + # Open up the backup cursor. This causes a new log file to be created. + # That log file is not part of the list returned. + os.mkdir(self.dir) + bkup_c = self.session.open_cursor('backup:', None, None) + + # Add some data that will appear in log file 3. + for i in range(0, self.nops): + num = i + (loop * self.nops) + key = 'key' + str(num) + val = 'value' + str(num) + c[key] = val + loop += 1 + c.close() + self.session.log_flush('sync=on') + + # Now copy the files returned by the backup cursor. + orig_logs = [] + while True: + ret = bkup_c.next() + if ret != 0: + break + newfile = bkup_c.get_key() + self.assertNotEqual(newfile, self.newuri) + sz = os.path.getsize(newfile) + self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) + shutil.copy(newfile, self.dir) + if "WiredTigerLog" in newfile: + orig_logs.append(newfile) + self.assertEqual(ret, wiredtiger.WT_NOTFOUND) + + # Now open a duplicate backup cursor. + config = 'target=("log:")' + dupc = self.session.open_cursor(None, bkup_c, config) + dup_logs = [] + while True: + ret = dupc.next() + if ret != 0: + break + newfile = dupc.get_key() + self.assertTrue("WiredTigerLog" in newfile) + sz = os.path.getsize(newfile) + if (newfile not in orig_logs): + self.pr('DUP: Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir) + shutil.copy(newfile, self.dir) + # Record all log files returned for later verification. + dup_logs.append(newfile) + self.assertEqual(ret, wiredtiger.WT_NOTFOUND) + + # We expect that the duplicate logs are a superset of the + # original logs. And we expect the difference to be the + # addition of log file 3 only. + orig_set = set(orig_logs) + dup_set = set(dup_logs) + self.assertTrue(dup_set.issuperset(orig_set)) + diff = dup_set.difference(orig_set) + self.assertEqual(len(diff), 1) + self.assertTrue(log3 in dup_set) + + # Test a few error cases now. + # - We cannot make multiple duplcate backup cursors. + # - We cannot duplicate the duplicate backup cursor. + # - We must use the log target. + msg = "/already a duplicate backup cursor open/" + # Test multiple duplicate backup cursors. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(self.session.open_cursor(None, + bkup_c, config), 0), msg) + # Test duplicate of duplicate backup cursor. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(self.session.open_cursor(None, + dupc, config), 0), msg) + + dupc.close() + + # Test we must use the log target. + msg = "/must be for logs/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(self.session.open_cursor(None, + bkup_c, None), 0), msg) + + # Open duplicate backup cursor again now that the first + # one is closed. Test every log file returned is the same + # as the first time. + dupc = self.session.open_cursor(None, bkup_c, config) + while True: + ret = dupc.next() + if ret != 0: + break + newfile = dupc.get_key() + self.assertTrue("WiredTigerLog" in newfile) + self.assertTrue(newfile in dup_logs) + self.assertEqual(ret, wiredtiger.WT_NOTFOUND) + + dupc.close() + bkup_c.close() + + # After the full backup, open and recover the backup database. + backup_conn = self.wiredtiger_open(self.dir) + backup_conn.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_bug019.py b/src/third_party/wiredtiger/test/suite/test_bug019.py index c25afa692cb..fd68578ce42 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug019.py +++ b/src/third_party/wiredtiger/test/suite/test_bug019.py @@ -94,14 +94,15 @@ class test_bug019(wttest.WiredTigerTestCase): older = newer self.session.checkpoint() - # Wait for up to 30 seconds for pre-allocate to drop in an idle system + # Wait for a long time for pre-allocate to drop in an idle system # it should usually be fast, but on slow systems can take time. - for sleepcount in range(1,30): + max_wait_time = 90 + for sleepcount in range(1,max_wait_time): new_prealloc = self.get_prealloc_stat() if new_prealloc < self.max_prealloc: break time.sleep(1.0) - self.assertTrue(sleepcount < 30) + self.assertTrue(sleepcount < max_wait_time) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_calc_modify.py b/src/third_party/wiredtiger/test/suite/test_calc_modify.py new file mode 100644 index 00000000000..a52096a3125 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_calc_modify.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import random, string +import wiredtiger, wttest + +r = random.Random(42) # Make things repeatable + +# test_calc_modify.py +# Test the wiredtiger_calc_modify API +# +# Try many combinations of: +# - data size +# - data randomness ('a' * N, repeated patterns, uniform random) +# - number and type of modifications (add, remove, replace) +# - space between the modifications +# +# Check that wiredtiger_calc_modify finds a set of modifies when the edit +# difference is under the specified limits, and that applying those +# modifications produces the expected result. If the edit difference is +# larger than the limits, it okay for the call to fail. +class test_calc_modify(wttest.WiredTigerTestCase): + uri = 'table:test_calc_modify' + + # operation types + ADD = 1 + REMOVE = 2 + REPLACE = 3 + + def mkstring(self, size, repeat_size=1): + pattern = ''.join(r.choice(string.ascii_letters + string.digits) for _ in xrange(repeat_size)) + return (pattern * ((size + repeat_size - 1) / repeat_size))[:size] + + def one_test(self, c, k, oldsz, repeatsz, nmod, maxdiff): + oldv = self.mkstring(oldsz, repeatsz) + + offsets = sorted(r.sample(xrange(oldsz), nmod)) + modsizes = sorted(r.sample(xrange(maxdiff), nmod + 1)) + lengths = [modsizes[i+1] - modsizes[i] for i in xrange(nmod)] + modtypes = [r.choice((self.ADD, self.REMOVE, self.REPLACE)) for _ in xrange(nmod)] + + self.pr("offsets: %s" % offsets) + self.pr("modsizes: %s" % modsizes) + self.pr("lengths: %s" % lengths) + self.pr("modtypes: %s" % modtypes) + + orig = oldv + newv = '' + for i in xrange(nmod): + if i > 0 and offsets[i] - offsets[i - 1] < maxdiff: + continue + newv += orig[:offsets[i]] + orig = orig[offsets[i]:] + if modtypes[i] == self.ADD: + newv += self.mkstring(lengths[i], r.randint(1, lengths[i])) + elif modtypes[i] == self.REMOVE: + orig = orig[lengths[i]:] + elif modtypes[i] == self.REPLACE: + newv += self.mkstring(lengths[i], r.randint(1, lengths[i])) + orig = orig[lengths[i]:] + newv += orig + + self.pr("oldv: %s" % oldv) + self.pr("newv: %s" % newv) + try: + mods = wiredtiger.wiredtiger_calc_modify(None, oldv, newv, max(maxdiff, nmod * 64), nmod) + self.pr("calculated mods: %s" % mods) + except wiredtiger.WiredTigerError: + # When the data repeats, the algorithm can register the "wrong" repeated sequence. Retry... + mods = wiredtiger.wiredtiger_calc_modify(None, oldv, newv, nmod * (64 + repeatsz), nmod) + self.pr("calculated mods (round 2): %s" % mods) + self.assertIsNotNone(mods) + + c[k] = oldv + self.session.begin_transaction('isolation=snapshot') + c.set_key(k) + c.modify(mods) + self.session.commit_transaction() + self.assertEqual(c[k], newv) + + def test_calc_modify(self): + self.session.create(self.uri, 'key_format=i,value_format=u') + c = self.session.open_cursor(self.uri) + for k in xrange(1000): + size = r.randint(1000, 10000) + repeats = r.randint(1, size) + nmods = r.randint(1, 10) + maxdiff = r.randint(64, size / 10) + self.pr("size %s, repeats %s, nmods %s, maxdiff %s" % (size, repeats, nmods, maxdiff)) + self.one_test(c, k, size, repeats, nmods, maxdiff) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_compress01.py b/src/third_party/wiredtiger/test/suite/test_compress01.py index aa6f5104216..d675ea25b8b 100644 --- a/src/third_party/wiredtiger/test/suite/test_compress01.py +++ b/src/third_party/wiredtiger/test/suite/test_compress01.py @@ -27,14 +27,13 @@ # OTHER DEALINGS IN THE SOFTWARE. # # test_compress01.py -# Basic block compression operations +# Smoke-test compression # -import os, run import wiredtiger, wttest from wtscenario import make_scenarios -# Test basic compression +# Smoke-test compression class test_compress01(wttest.WiredTigerTestCase): types = [ @@ -43,8 +42,12 @@ class test_compress01(wttest.WiredTigerTestCase): ] compress = [ ('nop', dict(compress='nop')), + ('lz4', dict(compress='lz4')), + ('lz4-noraw', dict(compress='lz4')), # API compatibility test ('snappy', dict(compress='snappy')), - ('none', dict(compress=None)), + ('zlib', dict(compress='zlib')), + ('zlib-noraw', dict(compress='zlib')), # API compatibility test + ('zstd', dict(compress='zstd')), ] scenarios = make_scenarios(types, compress) @@ -58,13 +61,9 @@ class test_compress01(wttest.WiredTigerTestCase): # Create a table, add keys with both big and small values, then verify them. def test_compress(self): - # Use relatively small leaf pages to force big values to be overflow # items, but still large enough that we get some compression action. params = 'key_format=S,value_format=S,leaf_page_max=4096' - if self.compress != None: - params += ',block_compressor=' + self.compress - self.session.create(self.uri, params) cursor = self.session.open_cursor(self.uri, None) for idx in xrange(1,self.nrecords): diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt05.py b/src/third_party/wiredtiger/test/suite/test_encrypt05.py deleted file mode 100644 index fa5ed483462..00000000000 --- a/src/third_party/wiredtiger/test/suite/test_encrypt05.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2018 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# test_encrypt05.py -# Test raw compression with encryption -# - -import os, run, random -import wiredtiger, wttest -from wtscenario import make_scenarios - -# Test raw compression with encryption -class test_encrypt05(wttest.WiredTigerTestCase): - - encrypt = [ - ('rotn', dict( sys_encrypt='rotn', sys_encrypt_args=',keyid=11', - file_encrypt='rotn', file_encrypt_args=',keyid=13')), - ] - compress = [ - ('zlib', dict(log_compress='zlib', block_compress='zlib')), - ] - scenarios = make_scenarios(encrypt, compress) - - nrecords = 500 - bigvalue = 'a' * 500 # we use values that will definitely give compression - - def conn_extensions(self, extlist): - extlist.skip_if_missing = True - extlist.extension('encryptors', self.sys_encrypt) - extlist.extension('encryptors', self.file_encrypt) - extlist.extension('compressors', self.block_compress) - extlist.extension('compressors', self.log_compress) - - def conn_config(self): - encarg = 'encryption=(name={0}{1}),'.format( - self.sys_encrypt, self.sys_encrypt_args) - comparg = '' - if self.log_compress != None: - comparg='log=(compressor={0}),'.format(self.log_compress) - return encarg + comparg - - def getvalue(self, r, n): - if n < len(self.bigvalue): - return self.bigvalue[0: n] - else: - diff = n - len(self.bigvalue) - rchr = ''.join(chr(r.randint(1, 255)) for i in range(diff)) - return self.bigvalue + rchr - - # Create a table, add key/values with specific lengths, then verify them. - def test_encrypt(self): - params = 'key_format=S,value_format=S' - if self.file_encrypt != None: - params += ',encryption=(name=' + self.file_encrypt + \ - self.file_encrypt_args + ')' - if self.block_compress != None: - params += ',block_compressor=' + self.block_compress - # Explicitly set max size for leaf page - params += ',leaf_page_max=8KB' - - # n is the length of the value. This range is experimentally chosen - # to be near an edge case for an 8K leaf size for raw compression. - # We can fit about 10-11 records of this size on the page. We let - # the size creep up to the edge case. The compressor is trying to - # maximize the number of records that can fit on the fixed size - # page, and the calculation is modulated by the encryptor's need for - # a constant buffer growth. - for n in xrange(1045, 1060, 1): - uri='table:test_encrypt05-' + str(n) - self.session.create(uri, params) - r = random.Random() - r.seed(0) - cursor = self.session.open_cursor(uri, None) - for idx in xrange(1,self.nrecords): - key = str(idx) - cursor.set_key(key) - cursor.set_value(self.getvalue(r, n)) - cursor.insert() - cursor.close() - - # Force the cache to disk, so we read - # compressed/encrypted pages from disk. - self.reopen_conn() - - cursor = self.session.open_cursor(uri, None) - r.seed(0) - for idx in xrange(1,self.nrecords): - key = str(idx) - cursor.set_key(key) - self.assertEqual(cursor.search(), 0) - self.assertEquals(cursor.get_value(), self.getvalue(r, n)) - cursor.close() - -if __name__ == '__main__': - wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_log03.py b/src/third_party/wiredtiger/test/suite/test_log03.py new file mode 100755 index 00000000000..e93232a14aa --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_log03.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os, shutil +import helper, wiredtiger, wttest +from wiredtiger import stat + +# test_log03.py +# test configuration for log.dirty_max +class test_log03(wttest.WiredTigerTestCase): + """ + Test log.dirty_max + """ + + homedir = 'HOME' + uri = 'table:test_log03' + nentries = 20000 + + # Tests need to setup the connection in their own way. + def setUpConnectionOpen(self, dir): + return None + + def setUpSessionOpen(self, conn): + return None + + def populate(self): + big_str = 'A' * 10000 + self.session.create(self.uri, "key_format=S,value_format=S") + cursor = self.session.open_cursor(self.uri) + for i in range(self.nentries): + cursor[str(i)] = big_str + cursor.close() + + def fsync_stat(self): + cursor = self.session.open_cursor('statistics:', None, None) + result = cursor[stat.conn.fsync_io][2] + cursor.close() + return result + + def with_log_sync(self, log_size, dirty_pct): + config = "cache_size=1G,create,statistics=(fast),log=(enabled" + config += ",file_max=" + str(log_size) + "M" + config += ",os_cache_dirty_pct=" + str(dirty_pct) + config += "),transaction_sync=(enabled=false,method=none)" + #self.tty('CONFIG: ' + config) + + # Recreate a home directory each time so we have the log + # starting at zero. That makes our calculations easier. + shutil.rmtree(self.homedir, ignore_errors=True) + os.mkdir(self.homedir) + self.conn = self.wiredtiger_open(self.homedir, config) + self.session = self.conn.open_session(None) + self.populate() + result = self.fsync_stat() + self.session.close() + self.conn.close() + return result + + def test_dirty_max(self): + # With this test, we have a baseline of syncs performed for 12M + # log files. Then we set dirty_max to values that are half, + # a third, a quarter and a fifth of the log file, and we would + # expect an increase of syncs each time. The number of syncs + # produced turns out to be a little variable, so we've picked + # conservative increases. + baseline = self.with_log_sync(12, 0) + #self.tty('baseline: ' + str(baseline)) + + for dirty_pct,increase in [50, 8], [33, 16], [25, 24], [20, 32]: + result = self.with_log_sync(12, dirty_pct) + #self.tty('tried: ' + str(dirty_pct) + ', got: ' + str(result)) + self.assertGreater(result, baseline + increase) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index 2cc7ad8a94b..8d5605208cf 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -215,6 +215,21 @@ testutil_is_flag_set(const char *flag) return (enable_long_tests); } +/* + * testutil_print_command_line -- + * Print command line arguments for csuite tests. + */ +void +testutil_print_command_line(int argc, char * const *argv) +{ + int i; + + printf("Running test command: "); + for (i = 0; i < argc; i++) + printf("%s ", argv[i]); + printf("\n"); +} + #ifndef _WIN32 /* * testutil_sleep_wait -- diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c index 0bd724528c1..5bc92633f79 100644 --- a/src/third_party/wiredtiger/test/utility/parse_opts.c +++ b/src/third_party/wiredtiger/test/utility/parse_opts.c @@ -46,6 +46,8 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) opts->progname = testutil_set_progname(argv); + testutil_print_command_line(argc, argv); + while ((ch = __wt_getopt(opts->progname, argc, argv, "A:dh:n:o:pR:T:t:vW:")) != EOF) switch (ch) { diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h index d321452c494..e756f5b4225 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.h +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -249,6 +249,7 @@ void testutil_cleanup(TEST_OPTS *); bool testutil_is_flag_set(const char *); void testutil_make_work_dir(const char *); int testutil_parse_opts(int, char * const *, TEST_OPTS *); +void testutil_print_command_line(int argc, char * const *argv); void testutil_progress(TEST_OPTS *, const char *); #ifndef _WIN32 void testutil_sleep_wait(uint32_t, pid_t); |