diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-05-12 14:47:28 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-05-12 14:47:28 +1000 |
commit | 9880d7563f74ec1e7033bae0e2f5aa312abc7b20 (patch) | |
tree | 9d1893587cc34c117619340b8f2f673666a7e206 | |
parent | 8f6b8fd4647186cc6ce68cb31ecd085c4323157e (diff) | |
parent | df5a23d353d141afcbdb6cdced0538c87e99d389 (diff) | |
download | mongo-9880d7563f74ec1e7033bae0e2f5aa312abc7b20.tar.gz |
Merge branch 'develop' into encryption-api
Conflicts:
ext/compressors/lz4/lz4_compress.c
57 files changed, 715 insertions, 524 deletions
diff --git a/build_posix/aclocal/options.m4 b/build_posix/aclocal/options.m4 index d2cdbf65dce..01d08ce3d16 100644 --- a/build_posix/aclocal/options.m4 +++ b/build_posix/aclocal/options.m4 @@ -197,8 +197,8 @@ if test "$wt_cv_enable_lz4" = "yes"; then AC_CHECK_HEADER(lz4.h,, [AC_MSG_ERROR([--enable-lz4 requires lz4.h])]) AC_LANG_POP([C++]) - AC_CHECK_LIB(lz4, LZ4_compress,, - [AC_MSG_ERROR([--enable-lz4 requires lz4 library])]) + AC_CHECK_LIB(lz4, LZ4_compress_destSize,, + [AC_MSG_ERROR([--enable-lz4 requires lz4 library with LZ4_compress_destSize support])]) fi AM_CONDITIONAL([LZ4], [test "$wt_cv_enable_lz4" = "yes"]) diff --git a/dist/s_define.list b/dist/s_define.list index eae95d527a1..c9f2db406bf 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -12,7 +12,6 @@ LF_SET LLONG_MAX LLONG_MIN SIZE_CHECK -TXNID_LE TXN_API_CALL TXN_API_CALL_NOCONF TXN_API_END diff --git a/dist/s_string.ok b/dist/s_string.ok index 95ebf2555db..79ec77aae22 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -459,6 +459,7 @@ der dereference desc dest +destSize dev dhandle dhandles @@ -662,6 +663,7 @@ mfence minorp mkdir mmap +mmrand mnt msecs msg diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c index 67872e1b190..743cd7b9cf1 100644 --- a/examples/c/ex_backup.c +++ b/examples/c/ex_backup.c @@ -97,16 +97,11 @@ compare_backups(int i) (void)strncpy(msg, "MAIN", sizeof(msg)); else snprintf(msg, sizeof(msg), "%d", i); - if (ret == 0) - fprintf(stdout, - "Iteration %s: Tables %s.%d and %s.%d identical\n", - msg, full_out, i, incr_out, i); - else { - fprintf(stdout, - "Iteration %s: Tables %s.%d and %s.%d differ\n", - msg, full_out, i, incr_out, i); - exit(1); - } + printf( + "Iteration %s: Tables %s.%d and %s.%d %s\n", + msg, full_out, i, incr_out, i, ret == 0 ? "identical" : "differ"); + if (ret != 0) + exit (1); /* * If they compare successfully, clean up. @@ -289,13 +284,16 @@ main(void) ret = setup_directories(); ret = wt_conn->open_session(wt_conn, NULL, NULL, &session); ret = session->create(session, uri, "key_format=S,value_format=S"); + printf("Adding initial data\n"); ret = add_work(session, 0); + printf("Taking initial backup\n"); ret = take_full_backup(session, 0); ret = session->checkpoint(session, NULL); for (i = 1; i < MAX_ITERATIONS; i++) { + printf("Iteration %d: adding data\n", i); ret = add_work(session, i); ret = session->checkpoint(session, NULL); /* @@ -303,14 +301,17 @@ main(void) * comparison purposes. A normal incremental backup * procedure would not include this. */ + printf("Iteration %d: taking full backup\n", i); ret = take_full_backup(session, i); /* * Taking the incremental backup also calls truncate * to archive the log files, if the copies were successful. * See that function for details on that call. */ + printf("Iteration %d: taking incremental backup\n", i); ret = take_incr_backup(session, i); + printf("Iteration %d: dumping and comparing data\n", i); ret = compare_backups(i); } @@ -319,6 +320,7 @@ main(void) * comparison between the incremental and original. */ ret = wt_conn->close(wt_conn, NULL); + printf("Final comparison: dumping and comparing data\n"); ret = compare_backups(0); return (ret); } diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c index 6b602b3d45c..0906e1d131d 100644 --- a/ext/compressors/lz4/lz4_compress.c +++ b/ext/compressors/lz4/lz4_compress.c @@ -48,19 +48,41 @@ typedef struct { } LZ4_COMPRESSOR; /* + * LZ4 decompression requires the exact compressed byte count returned by the + * LZ4_compress and LZ4_compress_destSize functions. WiredTiger doesn't track + * that value, store it in the destination buffer. + * + * Additionally, LZ4_compress_destSize may compress into the middle of a record, + * and after decompression we return the length to the last record successfully + * decompressed, not the number of bytes decompressed; store that value in the + * destination buffer as well. + * + * Use fixed-size, 4B values (WiredTiger never writes buffers larger than 4GB). + * + * The unused field is available for a mode flag if one is needed in the future, + * we guarantee it's 0. + */ +typedef struct { + uint32_t compressed_len; /* True compressed length */ + uint32_t uncompressed_len; /* True uncompressed source length */ + uint32_t useful_len; /* Decompression return value */ + uint32_t unused; /* Guaranteed to be 0 */ +} LZ4_PREFIX; + +/* * lz4_error -- * Output an error message, and return a standard error code. */ static int lz4_error( - WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int zret) + WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int error) { WT_EXTENSION_API *wt_api; wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api; (void)wt_api->err_printf(wt_api, - session, "lz4 error: %s: %d", call, zret); + session, "lz4 error: %s: %d", call, error); return (WT_ERROR); } @@ -74,39 +96,34 @@ lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, uint8_t *dst, size_t dst_len, size_t *result_lenp, int *compression_failed) { - char *lz4buf; - size_t lz4_len; + LZ4_PREFIX prefix; + int lz4_len; - /* - * The buffer should always be large enough due to the lz4_pre_size - * call, but be paranoid and error if it isn't. - */ - if (dst_len < src_len + sizeof(size_t)) - return (lz4_error(compressor, session, - "LZ4 compress buffer too small", 0)); + (void)compressor; /* Unused parameters */ + (void)session; + (void)dst_len; - /* Store the length of the compressed block in the first 8 bytes. */ - lz4buf = (char *)dst + sizeof(size_t); - lz4_len = (size_t)LZ4_compress((const char *)src, lz4buf, (int)src_len); + /* Compress, starting after the prefix bytes. */ + lz4_len = LZ4_compress( + (const char *)src, (char *)dst + sizeof(LZ4_PREFIX), (int)src_len); /* - * Flag no-compression if the result was larger than the original - * size or compression failed. + * If compression succeeded and the compressed length is smaller than + * the original size, return success. */ - if (lz4_len == 0 || lz4_len + sizeof(size_t) >= src_len) - *compression_failed = 1; - else { - /* - * On decompression, lz4 requires the exact compressed byte - * count (the current value of lz4_len). WiredTiger does not - * preserve that value, so save lz4_len at the beginning of the - * destination buffer. - */ - *(size_t *)dst = lz4_len; - *result_lenp = lz4_len + sizeof(size_t); + if (lz4_len != 0 && (size_t)lz4_len + sizeof(LZ4_PREFIX) < src_len) { + prefix.compressed_len = (uint32_t)lz4_len; + prefix.uncompressed_len = (uint32_t)src_len; + prefix.useful_len = (uint32_t)src_len; + prefix.unused = 0; + memcpy(dst, &prefix, sizeof(LZ4_PREFIX)); + + *result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX); *compression_failed = 0; + return (0); } + *compression_failed = 1; return (0); } @@ -121,40 +138,143 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, size_t *result_lenp) { WT_EXTENSION_API *wt_api; - char *compressed_data; + LZ4_PREFIX prefix; int decoded; - size_t src_data_len; + uint8_t *dst_tmp; + + (void)src_len; /* Unused parameters */ wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api; - /* Retrieve compressed length from start of the data buffer. */ - src_data_len = *(size_t *)src; - if (src_data_len + sizeof(size_t) > src_len) { - (void)wt_api->err_printf(wt_api, - session, - "lz4_decompress: stored size exceeds buffer size"); - return (WT_ERROR); + /* + * Retrieve the true length of the compressed block and source and the + * decompressed bytes to return from the start of the source buffer. + */ + memcpy(&prefix, src, sizeof(LZ4_PREFIX)); + + /* + * Decompress, starting after the prefix bytes. Use safe decompression: + * we rely on decompression to detect corruption. + * + * Two code paths, one with and one without a bounce buffer. When doing + * raw compression, we compress to a target size irrespective of row + * boundaries, and return to our caller a "useful" compression length + * based on the last complete row that was compressed. Our caller stores + * that length, not the length of bytes actually compressed by LZ4. In + * other words, our caller doesn't know how many bytes will result from + * decompression, likely hasn't provided us a large enough buffer, and + * we have to allocate a scratch buffer. + */ + if (dst_len < prefix.uncompressed_len) { + if ((dst_tmp = wt_api->scr_alloc( + wt_api, session, (size_t)prefix.uncompressed_len)) == NULL) + return (ENOMEM); + + decoded = LZ4_decompress_safe( + (const char *)src + sizeof(LZ4_PREFIX), (char *)dst_tmp, + (int)prefix.compressed_len, (int)prefix.uncompressed_len); + + if (decoded >= 0) + memcpy(dst, dst_tmp, dst_len); + wt_api->scr_free(wt_api, session, dst_tmp); + } else + decoded = LZ4_decompress_safe( + (const char *)src + sizeof(LZ4_PREFIX), + (char *)dst, (int)prefix.compressed_len, (int)dst_len); + + if (decoded >= 0) { + *result_lenp = prefix.useful_len; + return (0); } - /* Skip over the data size to the start of compressed data. */ - compressed_data = (char *)src + sizeof(size_t); + return ( + lz4_error(compressor, session, "LZ4 decompress error", decoded)); +} + +/* + * lz4_find_slot -- + * Find the slot containing the target offset (binary search). + */ +static inline uint32_t +lz4_find_slot(int target_arg, uint32_t *offsets, uint32_t slots) +{ + uint32_t base, indx, limit, target; + + indx = 1; /* -Wuninitialized */ + + target = (uint32_t)target_arg; /* Type conversion */ + + /* Fast check if we consumed it all, it's a likely result. */ + if (target >= offsets[slots]) + return (slots); /* - * The destination buffer length should always be sufficient because - * wiredtiger keeps track of the byte count before compression. Use - * safe decompression: we may be relying on decompression to detect - * corruption. + * Figure out which slot we got to: binary search. Note the test of + * offset (slot + 1), that's (end-byte + 1) for slot. */ - decoded = LZ4_decompress_safe( - compressed_data, (char *)dst, (int)src_data_len, (int)dst_len); + for (base = 0, limit = slots; limit != 0; limit >>= 1) { + indx = base + (limit >> 1); + if (target > offsets[indx + 1]) { + base = indx + 1; + --limit; + } + } + + return (indx); +} + +/* + * lz4_compress_raw -- + * Pack records into a specified on-disk page size. + */ +static int +lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session, + size_t page_max, int split_pct, size_t extra, + uint8_t *src, uint32_t *offsets, uint32_t slots, + uint8_t *dst, size_t dst_len, int final, + size_t *result_lenp, uint32_t *result_slotsp) +{ + LZ4_PREFIX prefix; + int lz4_len; + uint32_t slot; + int sourceSize, targetDestSize; + + (void)compressor; /* Unused parameters */ + (void)session; + (void)split_pct; + (void)final; + + sourceSize = (int)offsets[slots]; /* Type conversion */ + targetDestSize = + (int)((dst_len < page_max ? dst_len : page_max) - extra); - if (decoded < 0) - return (lz4_error(compressor, session, - "LZ4 decompress error", decoded)); + /* Compress, starting after the prefix bytes. */ + lz4_len = LZ4_compress_destSize((const char *)src, + (char *)dst + sizeof(LZ4_PREFIX), &sourceSize, targetDestSize); - /* return the uncompressed data length */ - *result_lenp = dst_len; + /* + * If compression succeeded and the compressed length is smaller than + * the original size, return success. + */ + if (lz4_len != 0) { + /* Find the first slot we didn't compress. */ + slot = lz4_find_slot(sourceSize, offsets, slots); + + if ((size_t)lz4_len + sizeof(LZ4_PREFIX) < offsets[slot]) { + prefix.compressed_len = (uint32_t)lz4_len; + prefix.uncompressed_len = (uint32_t)sourceSize; + prefix.useful_len = offsets[slot]; + prefix.unused = 0; + memcpy(dst, &prefix, sizeof(LZ4_PREFIX)); + + *result_slotsp = slot; + *result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX); + return (0); + } + } + *result_slotsp = 0; + *result_lenp = 1; return (0); } @@ -164,18 +284,18 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session, */ static int lz4_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session, - uint8_t *src, size_t src_len, - size_t *result_lenp) + uint8_t *src, size_t src_len, size_t *result_lenp) { - (void)compressor; + (void)compressor; /* Unused parameters */ (void)session; (void)src; /* - * LZ4 can use more space than the input data size, use the library - * calculation of that overhead (plus our overhead) to be safe. + * In block mode, LZ4 can use more space than the input data size, use + * the library calculation of that overhead (plus our overhead) to be + * safe. */ - *result_lenp = LZ4_COMPRESSBOUND(src_len) + sizeof(size_t); + *result_lenp = LZ4_COMPRESSBOUND(src_len) + sizeof(LZ4_PREFIX); return (0); } @@ -186,39 +306,30 @@ lz4_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session, static int lz4_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session) { - (void)session; + (void)session; /* Unused parameters */ - /* Free the allocated memory. */ free(compressor); - return (0); } -int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *); - /* - * lz4_extension_init -- - * A simple shared library compression example. + * lz4_add_compressor -- + * Add a LZ4 compressor. */ -int -lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) +static int +lz_add_compressor(WT_CONNECTION *connection, int raw, const char *name) { LZ4_COMPRESSOR *lz4_compressor; - (void)config; /* Unused parameters */ - + /* + * There are two almost identical LZ4 compressors: one using raw + * compression to target a specific block size, and one without. + */ if ((lz4_compressor = calloc(1, sizeof(LZ4_COMPRESSOR))) == NULL) return (errno); - /* - * Allocate a local compressor structure, with a WT_COMPRESSOR structure - * as the first field, allowing us to treat references to either type of - * structure as a reference to the other type. - * - * Heap memory (not static), because it can support multiple databases. - */ lz4_compressor->compressor.compress = lz4_compress; - lz4_compressor->compressor.compress_raw = NULL; + lz4_compressor->compressor.compress_raw = raw ? lz4_compress_raw : NULL; lz4_compressor->compressor.decompress = lz4_decompress; lz4_compressor->compressor.pre_size = lz4_pre_size; lz4_compressor->compressor.terminate = lz4_terminate; @@ -227,7 +338,29 @@ lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) /* Load the compressor */ return (connection->add_compressor( - connection, "lz4", (WT_COMPRESSOR *)lz4_compressor, NULL)); + connection, name, (WT_COMPRESSOR *)lz4_compressor, NULL)); +} + +int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *); + +/* + * lz4_extension_init -- + * WiredTiger LZ4 compression extension - called directly when LZ4 support + * is built in, or via wiredtiger_extension_init when LZ4 support is included + * via extension loading. + */ +int +lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) +{ + int ret; + + (void)config; /* Unused parameters */ + + if ((ret = lz_add_compressor(connection, 1, "lz4")) != 0) + return (ret); + if ((ret = lz_add_compressor(connection, 0, "lz4-noraw")) != 0) + return (ret); + return (0); } /* diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c index a8a608cd5bd..ec7d3109c0c 100644 --- a/src/btree/bt_io.c +++ b/src/btree/bt_io.c @@ -82,7 +82,7 @@ __wt_bt_read(WT_SESSION_IMPL *session, "configured"); /* - * We're allocating the exact number of bytes we're expecting + * Size the buffer based on the in-memory bytes we're expecting * from decompression. */ WT_ERR(__wt_buf_initsize(session, buf, dsk->mem_size)); diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index cc52f63f1f5..6c5b1fb98e8 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -70,7 +70,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) if (__wt_page_is_modified(page) && __wt_txn_visible_all( session, page->modify->update_txn)) { - if (txn->isolation == TXN_ISO_READ_COMMITTED) + if (txn->isolation == WT_ISO_READ_COMMITTED) __wt_txn_get_snapshot(session); leaf_bytes += page->memory_footprint; ++leaf_pages; @@ -117,7 +117,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) */ if (walk != NULL && walk->page != NULL && (mod = walk->page->modify) != NULL && - TXNID_LT(btree->rec_max_txn, mod->rec_max_txn)) + WT_TXNID_LT(btree->rec_max_txn, mod->rec_max_txn)) btree->rec_max_txn = mod->rec_max_txn; WT_ERR(__wt_tree_walk(session, &walk, NULL, flags)); @@ -149,8 +149,8 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) * is written. */ if (!WT_PAGE_IS_INTERNAL(page) && - F_ISSET(txn, TXN_HAS_SNAPSHOT) && - TXNID_LT(txn->snap_max, mod->first_dirty_txn) && + F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) && + WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn) && !F_ISSET(mod, WT_PM_REC_REWRITE)) { __wt_page_modify_set(session, page); continue; @@ -185,7 +185,7 @@ err: /* On error, clear any left-over tree walk. */ if (walk != NULL) WT_TRET(__wt_page_release(session, walk, flags)); - if (txn->isolation == TXN_ISO_READ_COMMITTED && session->ncursors == 0) + if (txn->isolation == WT_ISO_READ_COMMITTED && session->ncursors == 0) __wt_txn_release_snapshot(session); if (btree->checkpointing) { diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index bfb7dec16a8..bd6071a345d 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -993,7 +993,7 @@ err: /* */ for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i) if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL) && - F_ISSET(&s->txn, TXN_RUNNING)) { + F_ISSET(&s->txn, WT_TXN_RUNNING)) { wt_session = &s->iface; WT_TRET(wt_session->rollback_transaction( wt_session, NULL)); diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index d0a5fc4ce29..4d7feea77c4 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -13,6 +13,7 @@ removed. </dl><hr> @section version_254 Upgrading to Version 2.5.4 <dl> + <dt>File handle closing</dt> <dd> In WiredTiger, a separate thread of control periodically reviews open @@ -26,6 +27,13 @@ triggered, can be configured using the \c file_manager configuration values to the ::wiredtiger_open call. </dd> +<dt>LZ4 compression</dt> +<dd> +The LZ4 compression support has been updated in this release in non-backward +compatible ways; tables and files compressed using LZ4 compression should be +dumped and re-loaded into a new database. +</dd> + </dl><hr> @section version_253 Upgrading to Version 2.5.3 <dl> diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3ad7e8a2723..3e7ebd12e52 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -684,14 +684,14 @@ __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref) __wt_txn_update_oldest(session, 1); txn = &session->txn; saved_iso = txn->isolation; - txn->isolation = TXN_ISO_EVICTION; + txn->isolation = WT_ISO_EVICTION; /* * Sanity check: if a transaction has updates, its updates should not * be visible to eviction. */ - WT_ASSERT(session, - !F_ISSET(txn, TXN_HAS_ID) || !__wt_txn_visible(session, txn->id)); + WT_ASSERT(session, !F_ISSET(txn, WT_TXN_HAS_ID) || + !__wt_txn_visible(session, txn->id)); ret = __wt_evict(session, ref, 0); txn->isolation = saved_iso; diff --git a/src/include/api.h b/src/include/api.h index 8f8fd8e98b1..33f685cb279 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -33,11 +33,11 @@ if ((s) != NULL) { \ (s)->dhandle = __olddh; \ (s)->name = __oldname; \ - if (F_ISSET(&(s)->txn, TXN_RUNNING) && \ + if (F_ISSET(&(s)->txn, WT_TXN_RUNNING) && \ (ret) != 0 && \ (ret) != WT_NOTFOUND && \ (ret) != WT_DUPLICATE_KEY) \ - F_SET(&(s)->txn, TXN_ERROR); \ + F_SET(&(s)->txn, WT_TXN_ERROR); \ } \ } while (0) @@ -45,25 +45,25 @@ #define TXN_API_CALL(s, h, n, cur, bt, config, cfg) do { \ int __autotxn = 0; \ API_CALL(s, h, n, bt, cur, config, cfg); \ - __autotxn = !F_ISSET(&(s)->txn, TXN_AUTOCOMMIT | TXN_RUNNING); \ + __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ - F_SET(&(s)->txn, TXN_AUTOCOMMIT) + F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* An API call wrapped in a transaction if necessary. */ #define TXN_API_CALL_NOCONF(s, h, n, cur, bt) do { \ int __autotxn = 0; \ API_CALL_NOCONF(s, h, n, cur, bt); \ - __autotxn = !F_ISSET(&(s)->txn, TXN_AUTOCOMMIT | TXN_RUNNING); \ + __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ - F_SET(&(s)->txn, TXN_AUTOCOMMIT) + F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* End a transactional API call, optional retry on deadlock. */ #define TXN_API_END_RETRY(s, ret, retry) \ API_END(s, ret); \ if (__autotxn) { \ - if (F_ISSET(&(s)->txn, TXN_AUTOCOMMIT)) \ - F_CLR(&(s)->txn, TXN_AUTOCOMMIT); \ - else if (ret == 0 && !F_ISSET(&(s)->txn, TXN_ERROR)) \ + if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ + F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ + else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ ret = __wt_txn_commit((s), NULL); \ else { \ WT_TRET(__wt_txn_rollback((s), NULL)); \ diff --git a/src/include/btree.i b/src/include/btree.i index 5a2253f6078..2ff89c1bdd5 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -367,7 +367,7 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) * The page can never end up with changes older than the oldest * running transaction. */ - if (F_ISSET(&session->txn, TXN_HAS_SNAPSHOT)) + if (F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT)) page->modify->disk_snap_min = session->txn.snap_min; /* @@ -384,7 +384,7 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) } /* Check if this is the largest transaction ID to update the page. */ - if (TXNID_LT(page->modify->update_txn, session->txn.id)) + if (WT_TXNID_LT(page->modify->update_txn, session->txn.id)) page->modify->update_txn = session->txn.id; } @@ -1109,7 +1109,7 @@ __wt_page_can_evict( * transaction. */ if (LF_ISSET(WT_EVICT_CHECK_SPLITS) && - TXNID_LE(txn_global->oldest_id, mod->inmem_split_txn)) + WT_TXNID_LE(txn_global->oldest_id, mod->inmem_split_txn)) return (0); return (1); diff --git a/src/include/txn.h b/src/include/txn.h index 62f565c0535..9f600ac95c1 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -17,11 +17,11 @@ * transaction), WT_TXN_NONE is smaller than any possible ID (visible to all * running transactions). */ -#define TXNID_LE(t1, t2) \ +#define WT_TXNID_LE(t1, t2) \ ((t1) <= (t2)) -#define TXNID_LT(t1, t2) \ - ((t1) != (t2) && TXNID_LE(t1, t2)) +#define WT_TXNID_LT(t1, t2) \ + ((t1) != (t2) && WT_TXNID_LE(t1, t2)) #define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id]) @@ -60,10 +60,10 @@ struct __wt_txn_global { }; typedef enum __wt_txn_isolation { - TXN_ISO_EVICTION, /* Internal: eviction context */ - TXN_ISO_READ_UNCOMMITTED, - TXN_ISO_READ_COMMITTED, - TXN_ISO_SNAPSHOT + WT_ISO_EVICTION, /* Internal: eviction context */ + WT_ISO_READ_UNCOMMITTED, + WT_ISO_READ_COMMITTED, + WT_ISO_SNAPSHOT } WT_TXN_ISOLATION; /* @@ -75,29 +75,29 @@ typedef enum __wt_txn_isolation { struct __wt_txn_op { uint32_t fileid; enum { - TXN_OP_BASIC, - TXN_OP_INMEM, - TXN_OP_REF, - TXN_OP_TRUNCATE_COL, - TXN_OP_TRUNCATE_ROW + WT_TXN_OP_BASIC, + WT_TXN_OP_INMEM, + WT_TXN_OP_REF, + WT_TXN_OP_TRUNCATE_COL, + WT_TXN_OP_TRUNCATE_ROW } type; union { - /* TXN_OP_BASIC, TXN_OP_INMEM */ + /* WT_TXN_OP_BASIC, WT_TXN_OP_INMEM */ WT_UPDATE *upd; - /* TXN_OP_REF */ + /* WT_TXN_OP_REF */ WT_REF *ref; - /* TXN_OP_TRUNCATE_COL */ + /* WT_TXN_OP_TRUNCATE_COL */ struct { uint64_t start, stop; } truncate_col; - /* TXN_OP_TRUNCATE_ROW */ + /* WT_TXN_OP_TRUNCATE_ROW */ struct { WT_ITEM start, stop; enum { - TXN_TRUNC_ALL, - TXN_TRUNC_BOTH, - TXN_TRUNC_START, - TXN_TRUNC_STOP + WT_TXN_TRUNC_ALL, + WT_TXN_TRUNC_BOTH, + WT_TXN_TRUNC_START, + WT_TXN_TRUNC_STOP } mode; } truncate_row; } u; @@ -140,10 +140,10 @@ struct __wt_txn { uint32_t ckpt_nsnapshot; WT_ITEM *ckpt_snapshot; -#define TXN_AUTOCOMMIT 0x01 -#define TXN_ERROR 0x02 -#define TXN_HAS_ID 0x04 -#define TXN_HAS_SNAPSHOT 0x08 -#define TXN_RUNNING 0x10 +#define WT_TXN_AUTOCOMMIT 0x01 +#define WT_TXN_ERROR 0x02 +#define WT_TXN_HAS_ID 0x04 +#define WT_TXN_HAS_SNAPSHOT 0x08 +#define WT_TXN_RUNNING 0x10 uint32_t flags; }; diff --git a/src/include/txn.i b/src/include/txn.i index b1cfba4257d..b06062fc483 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -26,7 +26,7 @@ __txn_next_op(WT_SESSION_IMPL *session, WT_TXN_OP **opp) * Make sure we have allocated a transaction ID. */ WT_RET(__wt_txn_id_check(session)); - WT_ASSERT(session, F_ISSET(txn, TXN_HAS_ID)); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_ID)); WT_RET(__wt_realloc_def(session, &txn->mod_alloc, txn->mod_count + 1, &txn->mod)); @@ -49,7 +49,7 @@ __wt_txn_unmodify(WT_SESSION_IMPL *session) WT_TXN *txn; txn = &session->txn; - if (F_ISSET(txn, TXN_HAS_ID)) { + if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, txn->mod_count > 0); txn->mod_count--; } @@ -67,7 +67,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_RET(__txn_next_op(session, &op)); op->type = F_ISSET(session, WT_SESSION_LOGGING_INMEM) ? - TXN_OP_INMEM : TXN_OP_BASIC; + WT_TXN_OP_INMEM : WT_TXN_OP_BASIC; op->u.upd = upd; upd->txnid = session->txn.id; return (ret); @@ -83,7 +83,7 @@ __wt_txn_modify_ref(WT_SESSION_IMPL *session, WT_REF *ref) WT_TXN_OP *op; WT_RET(__txn_next_op(session, &op)); - op->type = TXN_OP_REF; + op->type = WT_TXN_OP_REF; op->u.ref = ref; return (__wt_txn_log_op(session, NULL)); } @@ -118,14 +118,14 @@ __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id) */ if (checkpoint_snap_min != WT_TXN_NONE && (btree == NULL || btree->checkpoint_gen != txn_global->checkpoint_gen) && - TXNID_LT(checkpoint_snap_min, oldest_id)) + WT_TXNID_LT(checkpoint_snap_min, oldest_id)) /* * Use the checkpoint ID for the visibility check if it is the * oldest ID in the system. */ oldest_id = checkpoint_snap_min; - return (TXNID_LT(id, oldest_id)); + return (WT_TXNID_LT(id, oldest_id)); } /* @@ -151,7 +151,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) * Eviction only sees globally visible updates, or if there is a * checkpoint transaction running, use its transaction. */ - if (txn->isolation == TXN_ISO_EVICTION) + if (txn->isolation == WT_ISO_EVICTION) return (__wt_txn_visible_all(session, id)); /* @@ -164,7 +164,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) * Metadata updates use non-transactional techniques (such as the * schema and metadata locks) to protect access to in-flight updates. */ - if (txn->isolation == TXN_ISO_READ_UNCOMMITTED || + if (txn->isolation == WT_ISO_READ_UNCOMMITTED || session->dhandle == session->meta_dhandle) return (1); @@ -173,7 +173,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) return (1); /* - * TXN_ISO_SNAPSHOT, TXN_ISO_READ_COMMITTED: the ID is visible if it is + * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is * not the result of a concurrent transaction, that is, if was * committed before the snapshot was taken. * @@ -181,9 +181,9 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) * saw when taking the snapshot should be invisible, even if the * snapshot is empty. */ - if (TXNID_LE(txn->snap_max, id)) + if (WT_TXNID_LE(txn->snap_max, id)) return (0); - if (txn->snapshot_count == 0 || TXNID_LT(id, txn->snap_min)) + if (txn->snapshot_count == 0 || WT_TXNID_LT(id, txn->snap_min)) return (1); return (bsearch(&id, txn->snapshot, txn->snapshot_count, @@ -219,7 +219,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) if (cfg != NULL) WT_RET(__wt_txn_config(session, cfg)); - if (txn->isolation == TXN_ISO_SNAPSHOT) { + if (txn->isolation == WT_ISO_SNAPSHOT) { if (session->ncursors > 0) WT_RET(__wt_session_copy_values(session)); @@ -232,7 +232,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) __wt_txn_get_snapshot(session); } - F_SET(txn, TXN_RUNNING); + F_SET(txn, WT_TXN_RUNNING); return (0); } @@ -246,8 +246,8 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session) WT_TXN *txn; txn = &session->txn; - if (F_ISSET(txn, TXN_AUTOCOMMIT)) { - F_CLR(txn, TXN_AUTOCOMMIT); + if (F_ISSET(txn, WT_TXN_AUTOCOMMIT)) { + F_CLR(txn, WT_TXN_AUTOCOMMIT); return (__wt_txn_begin(session, NULL)); } return (0); @@ -287,10 +287,10 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session) /* * Check the published snap_min because read-uncommitted never sets - * TXN_HAS_SNAPSHOT. + * WT_TXN_HAS_SNAPSHOT. */ - if (F_ISSET(txn, TXN_RUNNING) && - !F_ISSET(txn, TXN_HAS_ID) && txn_state->snap_min == WT_TXN_NONE) + if (F_ISSET(txn, WT_TXN_RUNNING) && + !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->snap_min == WT_TXN_NONE) WT_RET(__wt_cache_full_check(session)); return (0); @@ -311,12 +311,12 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) txn = &session->txn; - WT_ASSERT(session, F_ISSET(txn, TXN_RUNNING)); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); /* If the transaction is idle, check that the cache isn't full. */ WT_RET(__wt_txn_idle_cache_check(session)); - if (!F_ISSET(txn, TXN_HAS_ID)) { + if (!F_ISSET(txn, WT_TXN_HAS_ID)) { conn = S2C(session); txn_global = &conn->txn_global; txn_state = &txn_global->states[session->id]; @@ -352,7 +352,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session) */ if (txn->id == WT_TXN_ABORTED) WT_RET_MSG(session, ENOMEM, "Out of transaction IDs"); - F_SET(txn, TXN_HAS_ID); + F_SET(txn, WT_TXN_HAS_ID); } return (0); @@ -368,7 +368,7 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_TXN *txn; txn = &session->txn; - if (txn->isolation == TXN_ISO_SNAPSHOT) + if (txn->isolation == WT_ISO_SNAPSHOT) while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) { if (upd->txnid != WT_TXN_ABORTED) { WT_STAT_FAST_DATA_INCR( @@ -393,8 +393,8 @@ __wt_txn_read_last(WT_SESSION_IMPL *session) txn = &session->txn; /* Release the snap_min ID we put in the global table. */ - if (!F_ISSET(txn, TXN_RUNNING) || - txn->isolation != TXN_ISO_SNAPSHOT) + if (!F_ISSET(txn, WT_TXN_RUNNING) || + txn->isolation != WT_ISO_SNAPSHOT) __wt_txn_release_snapshot(session); } @@ -429,13 +429,13 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session) * further forward, so that once a read-uncommitted cursor is * positioned on a value, it can't be freed. */ - if (txn->isolation == TXN_ISO_READ_UNCOMMITTED && - !F_ISSET(txn, TXN_HAS_ID) && - TXNID_LT(txn_state->snap_min, txn_global->last_running)) + if (txn->isolation == WT_ISO_READ_UNCOMMITTED && + !F_ISSET(txn, WT_TXN_HAS_ID) && + WT_TXNID_LT(txn_state->snap_min, txn_global->last_running)) txn_state->snap_min = txn_global->last_running; - if (txn->isolation != TXN_ISO_READ_UNCOMMITTED && - !F_ISSET(txn, TXN_HAS_SNAPSHOT)) + if (txn->isolation != WT_ISO_READ_UNCOMMITTED && + !F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) __wt_txn_get_snapshot(session); } @@ -462,7 +462,7 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session) WT_ORDERED_READ(session_cnt, conn->session_cnt); for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) - if ((id = s->id) != WT_TXN_NONE && TXNID_LT(id, txn->id)) + if ((id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id)) return (0); return (1); diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index e97f4522f48..84b8d5c9532 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -112,10 +112,10 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) } else { primary = clsm->cursors[clsm->nchunks - 1]; primary_chunk = clsm->primary_chunk; - WT_ASSERT(session, F_ISSET(&session->txn, TXN_HAS_ID)); + WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID)); have_primary = (primary != NULL && primary_chunk != NULL && (primary_chunk->switch_txn == WT_TXN_NONE || - TXNID_LT(session->txn.id, primary_chunk->switch_txn))); + WT_TXNID_LT(session->txn.id, primary_chunk->switch_txn))); } /* @@ -165,11 +165,13 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update) WT_DECL_RET; WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; + WT_TXN *txn; uint64_t *switch_txnp; uint64_t snap_min; lsm_tree = clsm->lsm_tree; session = (WT_SESSION_IMPL *)clsm->iface.session; + txn = &session->txn; /* Merge cursors never update. */ if (F_ISSET(clsm, WT_CLSM_MERGE)) @@ -207,7 +209,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update) if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen) goto open; - if (session->txn.isolation == TXN_ISO_SNAPSHOT) + if (txn->isolation == WT_ISO_SNAPSHOT) __wt_txn_cursor_op(session); /* @@ -220,16 +222,16 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update) * conflict. */ clsm->nupdates = 1; - if (session->txn.isolation == TXN_ISO_SNAPSHOT && + if (txn->isolation == WT_ISO_SNAPSHOT && F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) { WT_ASSERT(session, - F_ISSET(&session->txn, TXN_HAS_SNAPSHOT)); - snap_min = session->txn.snap_min; + F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)); + snap_min = txn->snap_min; for (switch_txnp = &clsm->switch_txn[clsm->nchunks - 2]; clsm->nupdates < clsm->nchunks; clsm->nupdates++, switch_txnp--) { - if (TXNID_LT(*switch_txnp, snap_min)) + if (WT_TXNID_LT(*switch_txnp, snap_min)) break; WT_ASSERT(session, !__wt_txn_visible_all( @@ -246,7 +248,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update) * - a read operation and the cursor is open for reading. */ if ((!update || - session->txn.isolation != TXN_ISO_SNAPSHOT || + txn->isolation != WT_ISO_SNAPSHOT || F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) && ((update && clsm->primary_chunk != NULL) || (!update && F_ISSET(clsm, WT_CLSM_OPEN_READ)))) @@ -418,7 +420,7 @@ __clsm_open_cursors( * Ensure that any snapshot update has cursors on the right set of * chunks to guarantee visibility is correct. */ - if (update && txn->isolation == TXN_ISO_SNAPSHOT) + if (update && txn->isolation == WT_ISO_SNAPSHOT) F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT); /* @@ -1286,10 +1288,10 @@ __clsm_put(WT_SESSION_IMPL *session, lsm_tree = clsm->lsm_tree; WT_ASSERT(session, - F_ISSET(&session->txn, TXN_HAS_ID) && + F_ISSET(&session->txn, WT_TXN_HAS_ID) && clsm->primary_chunk != NULL && (clsm->primary_chunk->switch_txn == WT_TXN_NONE || - TXNID_LE(session->txn.id, clsm->primary_chunk->switch_txn))); + WT_TXNID_LE(session->txn.id, clsm->primary_chunk->switch_txn))); /* * Clear the existing cursor position. Don't clear the primary cursor: @@ -1537,6 +1539,9 @@ __wt_clsm_open(WT_SESSION_IMPL *session, /* Flag any errors from the tree get. */ WT_ERR(ret); + /* Make sure we have exclusive access if and only if we want it */ + WT_ASSERT(session, !bulk || lsm_tree->exclusive); + WT_ERR(__wt_calloc_one(session, &clsm)); cursor = &clsm->iface; diff --git a/src/lsm/lsm_cursor_bulk.c b/src/lsm/lsm_cursor_bulk.c index 1de732c56ef..d67eb33c9e8 100644 --- a/src/lsm/lsm_cursor_bulk.c +++ b/src/lsm/lsm_cursor_bulk.c @@ -16,13 +16,28 @@ static int __clsm_close_bulk(WT_CURSOR *cursor) { WT_CURSOR_LSM *clsm; + WT_CURSOR *bulk_cursor; WT_LSM_TREE *lsm_tree; + WT_SESSION_IMPL *session; clsm = (WT_CURSOR_LSM *)cursor; lsm_tree = clsm->lsm_tree; + session = (WT_SESSION_IMPL *)clsm->iface.session; + + /* Close the bulk cursor to ensure the chunk is written to disk. */ + bulk_cursor = clsm->cursors[0]; + WT_RET(bulk_cursor->close(bulk_cursor)); + clsm->cursors[0] = NULL; + clsm->nchunks = 0; + + /* Set ondisk, and flush the metadata */ F_SET(lsm_tree->chunk[0], WT_LSM_CHUNK_ONDISK); + WT_RET(__wt_lsm_meta_write(session, lsm_tree)); + ++lsm_tree->dsk_gen; + /* Close the LSM cursor */ WT_RET(__wt_clsm_close(cursor)); + return (0); } /* diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 0533e628601..4be0cd2a09c 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -227,7 +227,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) for (i = 0; i < WT_LSM_MAX_WORKERS; i++) { WT_ERR(__wt_open_internal_session( S2C(session), "lsm-worker", 1, 0, &worker_session)); - worker_session->isolation = TXN_ISO_READ_UNCOMMITTED; + worker_session->isolation = WT_ISO_READ_UNCOMMITTED; manager->lsm_worker_cookies[i].session = worker_session; } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 439837e96be..97360089d48 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -10,7 +10,8 @@ static int __lsm_tree_cleanup_old(WT_SESSION_IMPL *, const char *); static int __lsm_tree_open_check(WT_SESSION_IMPL *, WT_LSM_TREE *); -static int __lsm_tree_open(WT_SESSION_IMPL *, const char *, WT_LSM_TREE **); +static int __lsm_tree_open( + WT_SESSION_IMPL *, const char *, int, WT_LSM_TREE **); static int __lsm_tree_set_name(WT_SESSION_IMPL *, WT_LSM_TREE *, const char *); /* @@ -435,7 +436,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, */ if (ret == 0) WT_WITH_HANDLE_LIST_LOCK(session, - ret = __lsm_tree_open(session, uri, &lsm_tree)); + ret = __lsm_tree_open(session, uri, 1, &lsm_tree)); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); @@ -544,7 +545,8 @@ __lsm_tree_open_check(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * Open an LSM tree structure. */ static int -__lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, WT_LSM_TREE **treep) +__lsm_tree_open(WT_SESSION_IMPL *session, + const char *uri, int exclusive, WT_LSM_TREE **treep) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -560,7 +562,8 @@ __lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, WT_LSM_TREE **treep) WT_RET(__wt_lsm_manager_start(session)); /* Make sure no one beat us to it. */ - if ((ret = __lsm_tree_find(session, uri, 0, treep)) != WT_NOTFOUND) + if ((ret = __lsm_tree_find( + session, uri, exclusive, treep)) != WT_NOTFOUND) return (ret); /* Try to open the tree. */ @@ -586,6 +589,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, WT_LSM_TREE **treep) * with getting handles exclusive. */ lsm_tree->refcnt = 1; + lsm_tree->exclusive = (int8_t)exclusive; lsm_tree->queue_ref = 0; /* Set a flush timestamp as a baseline. */ @@ -617,8 +621,9 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session, ret = __lsm_tree_find(session, uri, exclusive, treep); if (ret == WT_NOTFOUND) - ret = __lsm_tree_open(session, uri, treep); + ret = __lsm_tree_open(session, uri, exclusive, treep); + WT_ASSERT(session, ret != 0 || exclusive == (*treep)->exclusive); return (ret); } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 1145c329639..33174c2d40d 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -307,7 +307,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if ((ret = __wt_session_get_btree( session, chunk->uri, NULL, NULL, 0)) == 0) { saved_isolation = session->txn.isolation; - session->txn.isolation = TXN_ISO_EVICTION; + session->txn.isolation = WT_ISO_EVICTION; ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; WT_TRET(__wt_session_release_btree(session)); diff --git a/src/reconcile/rec_track.c b/src/reconcile/rec_track.c index 2533ad9e201..e417dbfaa83 100644 --- a/src/reconcile/rec_track.c +++ b/src/reconcile/rec_track.c @@ -722,7 +722,7 @@ __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) */ for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i) for (e = &head[i]; (txnc = *e) != NULL;) { - if (TXNID_LE(oldest_txn, txnc->current)) { + if (WT_TXNID_LE(oldest_txn, txnc->current)) { e = &txnc->next[i]; continue; } @@ -732,7 +732,7 @@ __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) /* Second, discard any no longer needed transaction-cache records. */ decr = 0; for (e = &head[0]; (txnc = *e) != NULL;) { - if (TXNID_LE(oldest_txn, txnc->current)) { + if (WT_TXNID_LE(oldest_txn, txnc->current)) { e = &txnc->next[0]; continue; } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index ef8accccc6b..8be461f82f0 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -862,11 +862,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, continue; /* Track the largest/smallest transaction IDs on the list. */ - if (TXNID_LT(max_txn, txnid)) + if (WT_TXNID_LT(max_txn, txnid)) max_txn = txnid; - if (TXNID_LT(txnid, min_txn)) + if (WT_TXNID_LT(txnid, min_txn)) min_txn = txnid; - if (TXNID_LT(txnid, r->skipped_txn) && + if (WT_TXNID_LT(txnid, r->skipped_txn) && !__wt_txn_visible_all(session, txnid)) r->skipped_txn = txnid; @@ -894,7 +894,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * used to avoid evicting clean pages from memory with changes required * to satisfy a snapshot read. */ - if (TXNID_LT(r->max_txn, max_txn)) + if (WT_TXNID_LT(r->max_txn, max_txn)) r->max_txn = max_txn; /* diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c index 56c6f7b0551..d5d8acd5321 100644 --- a/src/schema/schema_drop.c +++ b/src/schema/schema_drop.c @@ -120,8 +120,13 @@ __drop_table( for (i = 0; i < WT_COLGROUPS(table); i++) { if ((colgroup = table->cgroups[i]) == NULL) continue; - WT_ERR(__wt_metadata_remove(session, colgroup->name)); + /* + * Drop the column group before updating the metadata to avoid + * the metadata for the table becoming inconsistent if we can't + * get exclusive access. + */ WT_ERR(__wt_schema_drop(session, colgroup->source, cfg)); + WT_ERR(__wt_metadata_remove(session, colgroup->name)); } /* Drop the indices. */ @@ -129,8 +134,13 @@ __drop_table( for (i = 0; i < table->nindices; i++) { if ((idx = table->indices[i]) == NULL) continue; - WT_ERR(__wt_metadata_remove(session, idx->name)); + /* + * Drop the column group before updating the metadata to avoid + * the metadata for the table becoming inconsistent if we can't + * get exclusive access. + */ WT_ERR(__wt_schema_drop(session, idx->source, cfg)); + WT_ERR(__wt_metadata_remove(session, idx->name)); } WT_ERR(__wt_schema_remove_table(session, table)); diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c index 3e619fe9cff..be9cb2a87c6 100644 --- a/src/schema/schema_rename.c +++ b/src/schema/schema_rename.c @@ -155,15 +155,18 @@ __rename_tree(WT_SESSION_IMPL *session, cval.str + cval.len)); /* + * Do the rename before updating the metadata to avoid leaving the + * metadata inconsistent if the rename fails. + */ + WT_ERR(__wt_schema_rename(session, os->data, ns->data, cfg)); + + /* * Remove the old metadata entry. * Insert the new metadata entry. */ WT_ERR(__wt_metadata_remove(session, name)); WT_ERR(__wt_metadata_insert(session, nn->data, nv->data)); - /* Rename the file. */ - WT_ERR(__wt_schema_rename(session, os->data, ns->data, cfg)); - err: __wt_scr_free(session, &nn); __wt_scr_free(session, &ns); __wt_scr_free(session, &nv); diff --git a/src/session/session_api.c b/src/session/session_api.c index 2aa8e924302..7951b3ff50d 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -95,7 +95,7 @@ __session_close(WT_SESSION *wt_session, const char *config) WT_UNUSED(cfg); /* Rollback any active transaction. */ - if (F_ISSET(&session->txn, TXN_RUNNING)) + if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_TRET(__session_rollback_transaction(wt_session, NULL)); /* @@ -193,7 +193,7 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config) session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, reconfigure, config, cfg); - if (F_ISSET(&session->txn, TXN_RUNNING)) + if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "transaction in progress"); WT_TRET(__wt_session_reset_cursors(session)); @@ -202,9 +202,9 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config) if (cval.len != 0) session->isolation = session->txn.isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? - TXN_ISO_SNAPSHOT : + WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? - TXN_ISO_READ_UNCOMMITTED : TXN_ISO_READ_COMMITTED; + WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED; err: API_END_RET_NOTFOUND_MAP(session, ret); } @@ -757,7 +757,7 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config) SESSION_API_CALL(session, begin_transaction, config, cfg); WT_STAT_FAST_CONN_INCR(session, txn_begin); - if (F_ISSET(&session->txn, TXN_RUNNING)) + if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "Transaction already running"); ret = __wt_txn_begin(session, cfg); @@ -781,7 +781,7 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config) WT_STAT_FAST_CONN_INCR(session, txn_commit); txn = &session->txn; - if (F_ISSET(txn, TXN_ERROR)) { + if (F_ISSET(txn, WT_TXN_ERROR)) { __wt_errx(session, "failed transaction requires rollback"); ret = EINVAL; } @@ -836,7 +836,7 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) /* Assign pinned to the lesser of id or snap_min */ if (txn_state->id != WT_TXN_NONE && - TXNID_LT(txn_state->id, txn_state->snap_min)) + WT_TXNID_LT(txn_state->id, txn_state->snap_min)) pinned = txn_state->id; else pinned = txn_state->snap_min; @@ -880,7 +880,7 @@ __session_checkpoint(WT_SESSION *wt_session, const char *config) * from evicting anything newer than this because we track the oldest * transaction ID in the system that is not visible to all readers. */ - if (F_ISSET(txn, TXN_RUNNING)) + if (F_ISSET(txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "Checkpoint not permitted in a transaction"); @@ -1066,7 +1066,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, } /* Initialize transaction support: default to read-committed. */ - session_ret->isolation = TXN_ISO_READ_COMMITTED; + session_ret->isolation = WT_ISO_READ_COMMITTED; WT_ERR(__wt_txn_init(session_ret)); /* diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 2a18eb3be93..d4fd52ec8bd 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -161,7 +161,7 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) * transactional context. Check now so the error message isn't * confusing. */ - if (session->compact->file_count != 0 && F_ISSET(txn, TXN_RUNNING)) + if (session->compact->file_count != 0 && F_ISSET(txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, " File compaction not permitted in a transaction"); diff --git a/src/txn/txn.c b/src/txn/txn.c index 05b27cd9a56..0492e39342f 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -20,7 +20,7 @@ __wt_txnid_cmp(const void *v1, const void *v2) id1 = *(uint64_t *)v1; id2 = *(uint64_t *)v2; - return ((id1 == id2) ? 0 : TXNID_LT(id1, id2) ? -1 : 1); + return ((id1 == id2) ? 0 : WT_TXNID_LT(id1, id2) ? -1 : 1); } /* @@ -35,15 +35,15 @@ __txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, uint64_t snap_max) txn = &session->txn; if (n <= 10) - WT_INSERTION_SORT(txn->snapshot, n, uint64_t, TXNID_LT); + WT_INSERTION_SORT(txn->snapshot, n, uint64_t, WT_TXNID_LT); else qsort(txn->snapshot, n, sizeof(uint64_t), __wt_txnid_cmp); txn->snapshot_count = n; txn->snap_max = snap_max; - txn->snap_min = (n > 0 && TXNID_LE(txn->snapshot[0], snap_max)) ? + txn->snap_min = (n > 0 && WT_TXNID_LE(txn->snapshot[0], snap_max)) ? txn->snapshot[0] : snap_max; - F_SET(txn, TXN_HAS_SNAPSHOT); + F_SET(txn, WT_TXN_HAS_SNAPSHOT); WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE); } @@ -62,11 +62,11 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session) WT_ASSERT(session, txn_state->snap_min == WT_TXN_NONE || - session->txn.isolation == TXN_ISO_READ_UNCOMMITTED || + session->txn.isolation == WT_ISO_READ_UNCOMMITTED || !__wt_txn_visible_all(session, txn_state->snap_min)); txn_state->snap_min = WT_TXN_NONE; - F_CLR(txn, TXN_HAS_SNAPSHOT); + F_CLR(txn, WT_TXN_HAS_SNAPSHOT); } /* @@ -139,9 +139,9 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) */ if (s != txn_state && (id = s->id) != WT_TXN_NONE && - TXNID_LE(prev_oldest_id, id)) { + WT_TXNID_LE(prev_oldest_id, id)) { txn->snapshot[n++] = id; - if (TXNID_LT(id, snap_min)) + if (WT_TXNID_LT(id, snap_min)) snap_min = id; } } @@ -150,7 +150,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) * If we got a new snapshot, update the published snap_min for this * session. */ - WT_ASSERT(session, TXNID_LE(prev_oldest_id, snap_min)); + WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min)); WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); txn_state->snap_min = snap_min; @@ -201,7 +201,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) * oldest ID isn't too far behind, avoid scanning. */ if (prev_oldest_id == current_id || - (!force && TXNID_LT(current_id, prev_oldest_id + 100))) + (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100))) return; /* @@ -236,7 +236,8 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) * it gets a valid one. */ if ((id = s->id) != WT_TXN_NONE && - TXNID_LE(prev_oldest_id, id) && TXNID_LT(id, snap_min)) + WT_TXNID_LE(prev_oldest_id, id) && + WT_TXNID_LT(id, snap_min)) snap_min = id; /* @@ -248,24 +249,24 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) * more details. */ if ((id = s->snap_min) != WT_TXN_NONE && - TXNID_LT(id, oldest_id)) { + WT_TXNID_LT(id, oldest_id)) { oldest_id = id; oldest_session = &conn->sessions[i]; } } - if (TXNID_LT(snap_min, oldest_id)) + if (WT_TXNID_LT(snap_min, oldest_id)) oldest_id = snap_min; /* Update the last running ID. */ - if (TXNID_LT(txn_global->last_running, snap_min)) { + if (WT_TXNID_LT(txn_global->last_running, snap_min)) { txn_global->last_running = snap_min; last_running_moved = 1; } else last_running_moved = 0; /* Update the oldest ID. */ - if (TXNID_LT(prev_oldest_id, oldest_id) && + if (WT_TXNID_LT(prev_oldest_id, oldest_id) && WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) { WT_ORDERED_READ(session_cnt, conn->session_cnt); ckpt_id = txn_global->checkpoint_id; @@ -278,13 +279,13 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force) continue; if ((id = s->id) != WT_TXN_NONE && - TXNID_LT(id, oldest_id)) + WT_TXNID_LT(id, oldest_id)) oldest_id = id; if ((id = s->snap_min) != WT_TXN_NONE && - TXNID_LT(id, oldest_id)) + WT_TXNID_LT(id, oldest_id)) oldest_id = id; } - if (TXNID_LT(txn_global->oldest_id, oldest_id)) + if (WT_TXNID_LT(txn_global->oldest_id, oldest_id)) txn_global->oldest_id = oldest_id; txn_global->scan_count = 0; } else { @@ -320,9 +321,9 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? - TXN_ISO_SNAPSHOT : + WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? - TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED; + WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; /* * The default sync setting is inherited from the connection, but can @@ -360,7 +361,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) txn_state = &txn_global->states[session->id]; /* Clear the transaction's ID from the global table. */ - if (F_ISSET(txn, TXN_HAS_ID)) { + if (F_ISSET(txn, WT_TXN_HAS_ID)) { WT_ASSERT(session, txn_state->id != WT_TXN_NONE && txn->id != WT_TXN_NONE); WT_PUBLISH(txn_state->id, WT_TXN_NONE); @@ -380,7 +381,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; - F_CLR(txn, TXN_ERROR | TXN_HAS_ID | TXN_RUNNING); + F_CLR(txn, WT_TXN_ERROR | WT_TXN_HAS_ID | WT_TXN_RUNNING); } /* @@ -396,9 +397,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) u_int i; txn = &session->txn; - WT_ASSERT(session, !F_ISSET(txn, TXN_ERROR)); + WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR)); - if (!F_ISSET(txn, TXN_RUNNING)) + if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); /* Commit notification. */ @@ -462,7 +463,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); txn = &session->txn; - if (!F_ISSET(txn, TXN_RUNNING)) + if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); /* Rollback notification. */ @@ -477,20 +478,20 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) continue; switch (op->type) { - case TXN_OP_BASIC: - case TXN_OP_INMEM: + case WT_TXN_OP_BASIC: + case WT_TXN_OP_INMEM: op->u.upd->txnid = WT_TXN_ABORTED; break; - case TXN_OP_REF: + case WT_TXN_OP_REF: __wt_delete_page_rollback(session, op->u.ref); break; - case TXN_OP_TRUNCATE_COL: - case TXN_OP_TRUNCATE_ROW: + case WT_TXN_OP_TRUNCATE_COL: + case WT_TXN_OP_TRUNCATE_ROW: /* * Nothing to do: these operations are only logged for * recovery. The in-memory changes will be rolled back - * with a combination of TXN_OP_REF and TXN_OP_INMEM - * operations. + * with a combination of WT_TXN_OP_REF and + * WT_TXN_OP_INMEM operations. */ break; } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 1ae593fd6be..47c0e59f053 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -397,7 +397,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) "starting write leaves", &verb_timer)); /* Flush dirty leaf pages before we start the checkpoint. */ - session->isolation = txn->isolation = TXN_ISO_READ_COMMITTED; + session->isolation = txn->isolation = WT_ISO_READ_COMMITTED; WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves)); /* @@ -497,18 +497,13 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * to open one. We are holding other handle locks, it is not safe to * lock conn->spinlock. */ - session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED; + session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED; saved_meta_next = session->meta_track_next; session->meta_track_next = NULL; WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint(session, cfg)); session->meta_track_next = saved_meta_next; WT_ERR(ret); - if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) { - WT_WITH_DHANDLE(session, session->meta_dhandle, - ret = __wt_checkpoint_sync(session, NULL)); - WT_ERR(ret); - } WT_ERR(__checkpoint_verbose_track(session, "metadata sync completed", &verb_timer)); @@ -531,11 +526,11 @@ err: /* * overwritten the checkpoint, so what ends up on disk is not * consistent. */ - session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED; + session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED; if (tracking) WT_TRET(__wt_meta_track_off(session, 0, ret != 0)); - if (F_ISSET(txn, TXN_RUNNING)) { + if (F_ISSET(txn, WT_TXN_RUNNING)) { /* * Clear the dhandle so the visibility check doesn't get * confused about the snap min. Don't bother restoring the @@ -998,7 +993,17 @@ fake: /* if (fake_ckpt && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) WT_INIT_LSN(&ckptlsn); - /* Update the object's metadata. */ + /* + * Update the object's metadata. + * + * If the object is the metadata, the call to __wt_meta_ckptlist_set + * will update the turtle file and swap the new one into place. We + * need to make sure the metadata is on disk before the turtle file is + * updated. + */ + if (F_ISSET(conn, WT_CONN_CKPT_SYNC) && WT_IS_METADATA(dhandle)) + WT_ERR(__wt_checkpoint_sync(session, NULL)); + WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, &ckptlsn)); diff --git a/src/txn/txn_ext.c b/src/txn/txn_ext.c index e35b6f16ea1..36d42a8996f 100644 --- a/src/txn/txn_ext.c +++ b/src/txn/txn_ext.c @@ -40,9 +40,9 @@ __wt_ext_transaction_isolation_level( session = (WT_SESSION_IMPL *)wt_session; txn = &session->txn; - if (txn->isolation == TXN_ISO_READ_COMMITTED) + if (txn->isolation == WT_ISO_READ_COMMITTED) return (WT_TXN_ISO_READ_COMMITTED); - if (txn->isolation == TXN_ISO_READ_UNCOMMITTED) + if (txn->isolation == WT_ISO_READ_UNCOMMITTED) return (WT_TXN_ISO_READ_UNCOMMITTED); return (WT_TXN_ISO_SNAPSHOT); } diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 901785628e1..9b6a3d46014 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -97,13 +97,13 @@ void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op) { switch (op->type) { - case TXN_OP_BASIC: - case TXN_OP_INMEM: - case TXN_OP_REF: - case TXN_OP_TRUNCATE_COL: + case WT_TXN_OP_BASIC: + case WT_TXN_OP_INMEM: + case WT_TXN_OP_REF: + case WT_TXN_OP_TRUNCATE_COL: break; - case TXN_OP_TRUNCATE_ROW: + case WT_TXN_OP_TRUNCATE_ROW: __wt_buf_free(session, &op->u.truncate_row.start); __wt_buf_free(session, &op->u.truncate_row.stop); break; @@ -163,7 +163,7 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* We'd better have a transaction. */ WT_ASSERT(session, - F_ISSET(txn, TXN_RUNNING) && F_ISSET(txn, TXN_HAS_ID)); + F_ISSET(txn, WT_TXN_RUNNING) && F_ISSET(txn, WT_TXN_HAS_ID)); WT_ASSERT(session, txn->mod_count > 0); op = txn->mod + txn->mod_count - 1; @@ -172,17 +172,17 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) logrec = txn->logrec; switch (op->type) { - case TXN_OP_BASIC: + case WT_TXN_OP_BASIC: return (__txn_op_log(session, logrec, op, cbt)); - case TXN_OP_INMEM: - case TXN_OP_REF: + case WT_TXN_OP_INMEM: + case WT_TXN_OP_REF: /* Nothing to log, we're done. */ return (0); - case TXN_OP_TRUNCATE_COL: + case WT_TXN_OP_TRUNCATE_COL: return (__wt_logop_col_truncate_pack(session, logrec, op->fileid, op->u.truncate_col.start, op->u.truncate_col.stop)); - case TXN_OP_TRUNCATE_ROW: + case WT_TXN_OP_TRUNCATE_ROW: return (__wt_logop_row_truncate_pack(session, txn->logrec, op->fileid, &op->u.truncate_row.start, &op->u.truncate_row.stop, @@ -387,12 +387,12 @@ __wt_txn_truncate_log( WT_RET(__txn_next_op(session, &op)); if (btree->type == BTREE_ROW) { - op->type = TXN_OP_TRUNCATE_ROW; - op->u.truncate_row.mode = TXN_TRUNC_ALL; + op->type = WT_TXN_OP_TRUNCATE_ROW; + op->u.truncate_row.mode = WT_TXN_TRUNC_ALL; WT_CLEAR(op->u.truncate_row.start); WT_CLEAR(op->u.truncate_row.stop); if (start != NULL) { - op->u.truncate_row.mode = TXN_TRUNC_START; + op->u.truncate_row.mode = WT_TXN_TRUNC_START; item = &op->u.truncate_row.start; WT_RET(__wt_cursor_get_raw_key(&start->iface, item)); WT_RET(__wt_buf_set( @@ -400,15 +400,15 @@ __wt_txn_truncate_log( } if (stop != NULL) { op->u.truncate_row.mode = - (op->u.truncate_row.mode == TXN_TRUNC_ALL) ? - TXN_TRUNC_STOP : TXN_TRUNC_BOTH; + (op->u.truncate_row.mode == WT_TXN_TRUNC_ALL) ? + WT_TXN_TRUNC_STOP : WT_TXN_TRUNC_BOTH; item = &op->u.truncate_row.stop; WT_RET(__wt_cursor_get_raw_key(&stop->iface, item)); WT_RET(__wt_buf_set( session, item, item->data, item->size)); } } else { - op->type = TXN_OP_TRUNCATE_COL; + op->type = WT_TXN_OP_TRUNCATE_COL; op->u.truncate_col.start = (start == NULL) ? 0 : start->recno; op->u.truncate_col.stop = diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 540b0528995..f11b585da1c 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -194,18 +194,18 @@ __txn_op_apply( /* Set up the cursors. */ start = stop = NULL; switch (mode) { - case TXN_TRUNC_ALL: + case WT_TXN_TRUNC_ALL: /* Both cursors stay NULL. */ break; - case TXN_TRUNC_BOTH: + case WT_TXN_TRUNC_BOTH: start = cursor; WT_ERR(__recovery_cursor( session, r, lsnp, fileid, 1, &stop)); break; - case TXN_TRUNC_START: + case WT_TXN_TRUNC_START: start = cursor; break; - case TXN_TRUNC_STOP: + case WT_TXN_TRUNC_STOP: stop = cursor; break; diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index 94403195888..65389aefc90 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -26,7 +26,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" +#include "wt_internal.h" /* For __wt_XXX */ #include "test_util.i" static struct { diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index 72700365dfe..3131a30bba4 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -130,8 +130,7 @@ main(int argc, char *argv[]) /* Clean up on signal. */ (void)signal(SIGINT, onint); - if ((ret = testutil_work_dir_from_path(g.home, 512, working_dir)) != 0) - testutil_die(ret, "provided directory name is too long"); + testutil_work_dir_from_path(g.home, 512, working_dir); printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { @@ -245,7 +244,8 @@ cleanup(void) g.running = 0; g.ntables_created = 0; - return (testutil_clean_work_dir(g.home)); + testutil_clean_work_dir(g.home); + return (0); } static int diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index 411ebc50de9..914b68e522d 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -38,8 +38,8 @@ #include <string.h> #include <unistd.h> +#include "wt_internal.h" /* For __wt_XXX */ #include "test_util.i" -#include "wt_internal.h" /* For __wt_random */ #define URI_BASE "table:__wt" /* File name */ diff --git a/test/fops/fops.c b/test/fops/fops.c index ec7c3d1098f..1b72e6a561c 100644 --- a/test/fops/fops.c +++ b/test/fops/fops.c @@ -45,31 +45,6 @@ typedef struct { static STATS *run_stats; -/* - * r -- - * Return a 32-bit pseudo-random number. - * - * This is an implementation of George Marsaglia's multiply-with-carry pseudo- - * random number generator. Computationally fast, with reasonable randomness - * properties. - */ -static inline uint32_t -r(void) -{ - static uint32_t m_w = 0, m_z = 0; - - if (m_w == 0) { - struct timeval t; - (void)gettimeofday(&t, NULL); - m_w = (uint32_t)t.tv_sec; - m_z = (uint32_t)t.tv_usec; - } - - m_z = 36969 * (m_z & 65535) + (m_z >> 16); - m_w = 18000 * (m_w & 65535) + (m_w >> 16); - return (m_z << 16) + (m_w & 65535); -} - int fop_start(u_int nthreads) { @@ -123,15 +98,17 @@ fop(void *arg) { STATS *s; uintptr_t id; + uint32_t rnd[2]; u_int i; id = (uintptr_t)arg; sched_yield(); /* Get all the threads created. */ s = &run_stats[id]; + __wt_random_init(rnd); for (i = 0; i < nops; ++i, sched_yield()) - switch (r() % 9) { + switch (__wt_random(rnd) % 9) { case 0: ++s->bulk; obj_bulk(); diff --git a/test/fops/t.c b/test/fops/t.c index 6017ccfaf88..12535f152fd 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -48,7 +48,6 @@ static void wt_startup(char *); static void wt_shutdown(void); extern int __wt_optind; -extern int __wt_getopt(const char *, int, char * const *, const char *); extern char *__wt_optarg; int @@ -120,8 +119,7 @@ main(int argc, char *argv[]) if (argc != 0) return (usage()); - if ((ret = testutil_work_dir_from_path(home, 512, working_dir)) != 0) - testutil_die(ret, "provided directory name is too long"); + testutil_work_dir_from_path(home, 512, working_dir); /* Clean up on signal. */ (void)signal(SIGINT, onint); @@ -196,7 +194,7 @@ wt_shutdown(void) static void shutdown(void) { - (void)testutil_clean_work_dir(home); + testutil_clean_work_dir(home); } static int diff --git a/test/fops/thread.h b/test/fops/thread.h index 5d25993f9f8..e0c7e9ef3e2 100644 --- a/test/fops/thread.h +++ b/test/fops/thread.h @@ -44,6 +44,7 @@ #include <unistd.h> #endif +#include "wt_internal.h" /* For __wt_XXX */ #include "test_util.i" extern WT_CONNECTION *conn; /* WiredTiger connection */ diff --git a/test/format/backup.c b/test/format/backup.c index d70353e282c..3b95ea92b5e 100644 --- a/test/format/backup.c +++ b/test/format/backup.c @@ -104,7 +104,7 @@ backup(void *arg) * Perform a backup at somewhere under 10 seconds (so we get at * least one done), and then at 45 second intervals. */ - for (period = MMRAND(1, 10);; period = 45) { + for (period = mmrand(NULL, 1, 10);; period = 45) { /* Sleep for short periods so we don't make the run wait. */ while (period > 0 && !g.workers_finished) { --period; diff --git a/test/format/bdb.c b/test/format/bdb.c index 254dd95e1d3..fec23112549 100644 --- a/test/format/bdb.c +++ b/test/format/bdb.c @@ -147,7 +147,7 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) size_t size; int ret; - key_gen(keybuf, &size, keyno, 0); + key_gen(keybuf, &size, keyno); key.data = keybuf; key.size = (uint32_t)size; @@ -193,7 +193,7 @@ bdb_remove(uint64_t keyno, int *notfoundp) size_t size; int ret; - key_gen(keybuf, &size, keyno, 0); + key_gen(keybuf, &size, keyno); key.data = keybuf; key.size = (uint32_t)size; diff --git a/test/format/bulk.c b/test/format/bulk.c index 37a737e3c2c..7cf4ba559dc 100644 --- a/test/format/bulk.c +++ b/test/format/bulk.c @@ -58,9 +58,9 @@ wts_load(void) is_bulk ? "bulk" : NULL, &cursor)) != 0) die(ret, "session.open_cursor"); - /* Set up the default key buffer. */ + /* Set up the key/value buffers. */ key_gen_setup(&keybuf); - val_gen_setup(&valbuf); + val_gen_setup(NULL, &valbuf); for (;;) { if (++g.key_cnt > g.c_rows) { @@ -72,9 +72,9 @@ wts_load(void) if (g.key_cnt % 100 == 0) track("bulk load", g.key_cnt, NULL); - key_gen(keybuf, &key.size, (uint64_t)g.key_cnt, 0); + key_gen(keybuf, &key.size, (uint64_t)g.key_cnt); key.data = keybuf; - value_gen(valbuf, &value.size, (uint64_t)g.key_cnt); + val_gen(NULL, valbuf, &value.size, (uint64_t)g.key_cnt); value.data = valbuf; switch (g.type) { diff --git a/test/format/compact.c b/test/format/compact.c index 60c99db13c3..ad603504023 100644 --- a/test/format/compact.c +++ b/test/format/compact.c @@ -55,7 +55,7 @@ compact(void *arg) * Perform compaction at somewhere under 15 seconds (so we get at * least one done), and then at 23 second intervals. */ - for (period = MMRAND(1, 15);; period = 23) { + for (period = mmrand(NULL, 1, 15);; period = 23) { /* Sleep for short periods so we don't make the run wait. */ while (period > 0 && !g.workers_finished) { --period; diff --git a/test/format/config.c b/test/format/config.c index efb01f0d666..0f25af4d801 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -58,7 +58,7 @@ config_setup(void) * them. */ if (!config_is_perm("data_source")) - switch (MMRAND(1, 3)) { + switch (mmrand(NULL, 1, 3)) { case 1: config_single("data_source=file", 0); break; @@ -71,7 +71,7 @@ config_setup(void) } if (!config_is_perm("file_type")) - switch (DATASOURCE("lsm") ? 5 : MMRAND(1, 10)) { + switch (DATASOURCE("lsm") ? 5 : mmrand(NULL, 1, 10)) { case 1: config_single("file_type=fix", 0); break; @@ -118,9 +118,9 @@ config_setup(void) * the min, 0 otherwise. */ if (F_ISSET(cp, C_BOOL)) - *cp->v = MMRAND(1, 100) <= cp->min ? 1 : 0; + *cp->v = mmrand(NULL, 1, 100) <= cp->min ? 1 : 0; else - *cp->v = CONF_RAND(cp); + *cp->v = mmrand(NULL, cp->min, cp->maxrand); } /* Required shared libraries. */ @@ -162,7 +162,7 @@ config_setup(void) g.c_cache = 30 * g.c_chunk_size; if (!config_is_perm("insert_pct")) - g.c_insert_pct = MMRAND(50, 85); + g.c_insert_pct = mmrand(NULL, 50, 85); } /* Make the default maximum-run length 20 minutes. */ @@ -200,7 +200,7 @@ config_checksum(void) { /* Choose a checksum mode if nothing was specified. */ if (!config_is_perm("checksum")) - switch (MMRAND(1, 10)) { + switch (mmrand(NULL, 1, 10)) { case 1: /* 10% */ config_single("checksum=on", 0); break; @@ -231,18 +231,21 @@ config_compression(void) */ if (!config_is_perm("compression")) { cstr = "compression=none"; - switch (MMRAND(1, 20)) { + switch (mmrand(NULL, 1, 20)) { case 1: case 2: case 3: case 4: /* 20% no compression */ break; - case 5: case 6: /* 10% bzip */ + case 5: /* 5% bzip */ cstr = "compression=bzip"; break; - case 7: /* 5% bzip-raw */ + case 6: /* 5% bzip-raw */ cstr = "compression=bzip-raw"; break; - case 8: case 9: case 10: case 11: /* 20% lz4 */ + case 7: case 8: case 9: case 10: /* 20% lz4 */ cstr = "compression=lz4"; break; + case 11: /* 5% lz4-no-raw */ + cstr = "compression=lz4-noraw"; + break; case 12: case 13: case 14: case 15: /* 20% snappy */ cstr = "compression=snappy"; break; @@ -272,7 +275,7 @@ config_isolation(void) */ if (!config_is_perm("isolation")) { /* Avoid "maybe uninitialized" warnings. */ - switch (MMRAND(1, 4)) { + switch (mmrand(NULL, 1, 4)) { case 1: cstr = "isolation=random"; break; @@ -345,7 +348,7 @@ config_print(int error_display) (void)fflush(fp); if (fp != stdout) - (void)fclose(fp); + fclose_and_clear(&fp); } /* @@ -368,7 +371,7 @@ config_file(const char *name) continue; config_single(buf, 1); } - (void)fclose(fp); + fclose_and_clear(&fp); } /* @@ -523,6 +526,8 @@ config_map_compression(const char *s, u_int *vp) *vp = COMPRESS_BZIP_RAW; else if (strcmp(s, "lz4") == 0) *vp = COMPRESS_LZ4; + else if (strcmp(s, "lz4-noraw") == 0) + *vp = COMPRESS_LZ4_NO_RAW; else if (strcmp(s, "lzo") == 0) *vp = COMPRESS_LZO; else if (strcmp(s, "snappy") == 0) diff --git a/test/format/config.h b/test/format/config.h index 3a849a36c26..6ac45755d27 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -57,12 +57,6 @@ typedef struct { char **vstr; /* Value for string options */ } CONFIG; -/* - * Get a random value between a config min/max pair (inclusive for both min - * and max). - */ -#define CONF_RAND(cp) MMRAND((cp)->min, (cp)->maxrand) - static CONFIG c[] = { { "abort", "if timed run should drop core", /* 0% */ @@ -117,8 +111,8 @@ static CONFIG c[] = { C_BOOL, 10, 0, 0, &g.c_compact, NULL }, { "compression", - "type of compression " - "(none | bzip | bzip-raw | lz4 | lzo | snappy | zlib | zlib-noraw)", + "type of compression (none | bzip | " + "bzip-raw | lz4 | lz4-noraw | lzo | snappy | zlib | zlib-noraw)", C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_compression }, { "data_extend", diff --git a/test/format/format.h b/test/format/format.h index b3e2c708181..56b1aa883cc 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -50,8 +50,8 @@ #endif #include <time.h> +#include "wt_internal.h" /* __wt_XXX */ #include "test_util.i" -#include <wiredtiger_ext.h> #ifdef BDB #include <db.h> @@ -95,13 +95,6 @@ extern WT_EXTENSION_API *wt_api; #undef GIGABYTE #define GIGABYTE(v) ((v) * 1073741824ULL) -#define F_CLR(p, mask) ((p)->flags &= ~((uint32_t)(mask))) -#define F_ISSET(p, mask) ((p)->flags & ((uint32_t)(mask))) -#define F_SET(p, mask) ((p)->flags |= ((uint32_t)(mask))) - -/* Get a random value between a min/max pair. */ -#define MMRAND(min, max) (rng() % (((max) + 1) - (min)) + (min)) - #define WT_NAME "wt" /* Object name */ #define DATASOURCE(v) (strcmp(v, g.c_data_source) == 0 ? 1 : 0) @@ -140,7 +133,7 @@ typedef struct { WT_EXTENSION_API *wt_api; int rand_log_stop; /* Logging turned off */ - FILE *rand_log; /* Random number log */ + FILE *randfp; /* Random number log */ uint32_t run_cnt; /* Run counter */ @@ -156,6 +149,8 @@ typedef struct { pthread_rwlock_t backup_lock; /* Hot backup running */ + uint32_t rnd[2]; /* Global RNG state */ + /* * We have a list of records that are appended, but not yet "resolved", * that is, we haven't yet incremented the g.rows value to reflect the @@ -166,6 +161,8 @@ typedef struct { size_t append_cnt; /* Current unresolved records */ pthread_rwlock_t append_lock; /* Single-thread resolution */ + pthread_rwlock_t death_lock; /* Single-thread failure */ + char *uri; /* Object name */ char *config_open; /* Command-line configuration */ @@ -239,10 +236,11 @@ typedef struct { #define COMPRESS_BZIP 2 #define COMPRESS_BZIP_RAW 3 #define COMPRESS_LZ4 4 -#define COMPRESS_LZO 5 -#define COMPRESS_SNAPPY 6 -#define COMPRESS_ZLIB 7 -#define COMPRESS_ZLIB_NO_RAW 8 +#define COMPRESS_LZ4_NO_RAW 5 +#define COMPRESS_LZO 6 +#define COMPRESS_SNAPPY 7 +#define COMPRESS_ZLIB 8 +#define COMPRESS_ZLIB_NO_RAW 9 u_int c_compression_flag; /* Compression flag value */ #define ISOLATION_RANDOM 1 @@ -259,6 +257,8 @@ typedef struct { extern GLOBAL g; typedef struct { + uint32_t rnd[2]; /* thread RNG state */ + uint64_t search; /* operations */ uint64_t insert; uint64_t update; @@ -278,7 +278,7 @@ typedef struct { #define TINFO_COMPLETE 2 /* Finished */ #define TINFO_JOINED 3 /* Resolved */ volatile int state; /* state */ -} TINFO WT_GCC_ATTRIBUTE((aligned(64))); +} TINFO WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT))); #ifdef HAVE_BERKELEY_DB void bdb_close(void); @@ -298,15 +298,16 @@ void config_file(const char *); void config_print(int); void config_setup(void); void config_single(const char *, int); -void key_len_setup(void); +void fclose_and_clear(FILE **); +void key_gen(uint8_t *, size_t *, uint64_t); +void key_gen_insert(uint32_t *, uint8_t *, size_t *, uint64_t); void key_gen_setup(uint8_t **); -void key_gen(uint8_t *, size_t *, uint64_t, int); +void key_len_setup(void); void path_setup(const char *); -uint32_t rng(void); -void rng_init(void); +uint32_t rng(uint32_t *); void track(const char *, uint64_t, TINFO *); -void val_gen_setup(uint8_t **); -void value_gen(uint8_t *, size_t *, uint64_t); +void val_gen(uint32_t *, uint8_t *, size_t *, uint64_t); +void val_gen_setup(uint32_t *, uint8_t **); void wts_close(void); void wts_create(void); void wts_dump(const char *, int); @@ -323,3 +324,13 @@ void die(int, const char *, ...) __attribute__((__noreturn__)) #endif ; + +/* + * mmrand -- + * Return a random value between a min/max pair. + */ +static inline uint32_t +mmrand(uint32_t *rnd, u_int min, u_int max) +{ + return (rng(rnd) % (((max) + 1) - (min)) + (min)); +} diff --git a/test/format/ops.c b/test/format/ops.c index 3b8a8cbcbb5..bbb57b4d8c3 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -28,15 +28,15 @@ #include "format.h" -static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int col_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); static int nextprev(WT_CURSOR *, int, int *); static void *ops(void *); static int read_row(WT_CURSOR *, WT_ITEM *, uint64_t); -static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -66,13 +66,6 @@ wts_ops(int lastrun) memset(&compact_tid, 0, sizeof(compact_tid)); /* - * We support replay of threaded runs, but don't log random numbers - * after threaded operations start, there's no point. - */ - if (!SINGLETHREADED) - g.rand_log_stop = 1; - - /* * There are two mechanisms to specify the length of the run, a number * of operations and a timer, when either expire the run terminates. * Each thread does an equal share of the total operations (and make @@ -95,6 +88,13 @@ wts_ops(int lastrun) /* Initialize the table extension code. */ table_append_init(); + /* + * We support replay of threaded runs, but don't log random numbers + * after threaded operations start, there's no point. + */ + if (!SINGLETHREADED) + g.rand_log_stop = 1; + /* Open a session. */ if (g.logging != 0) { if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) @@ -194,7 +194,7 @@ wts_ops(int lastrun) * Return the current session configuration. */ static const char * -ops_session_config(void) +ops_session_config(uint32_t *rnd) { u_int v; @@ -202,7 +202,7 @@ ops_session_config(void) * The only current session configuration is the isolation level. */ if ((v = g.c_isolation_flag) == ISOLATION_RANDOM) - v = MMRAND(2, 4); + v = mmrand(rnd, 2, 4); switch (v) { case ISOLATION_READ_UNCOMMITTED: return ("isolation=read-uncommitted"); @@ -231,13 +231,16 @@ ops(void *arg) tinfo = arg; + /* Initialize the per-thread random number generator. */ + __wt_random_init(tinfo->rnd); + conn = g.wts_conn; keybuf = valbuf = NULL; readonly = 0; /* -Wconditional-uninitialized */ /* Set up the default key and value buffers. */ key_gen_setup(&keybuf); - val_gen_setup(&valbuf); + val_gen_setup(tinfo->rnd, &valbuf); /* Set the first operation where we'll create sessions and cursors. */ session_op = 0; @@ -245,7 +248,7 @@ ops(void *arg) cursor = cursor_insert = NULL; /* Set the first operation where we'll perform checkpoint operations. */ - ckpt_op = g.c_checkpoints ? MMRAND(100, 10000) : 0; + ckpt_op = g.c_checkpoints ? mmrand(tinfo->rnd, 100, 10000) : 0; ckpt_available = 0; for (intxn = 0; !tinfo->quit; ++tinfo->ops) { @@ -269,8 +272,8 @@ ops(void *arg) (ret = session->close(session, NULL)) != 0) die(ret, "session.close"); - if ((ret = conn->open_session( - conn, NULL, ops_session_config(), &session)) != 0) + if ((ret = conn->open_session(conn, NULL, + ops_session_config(tinfo->rnd), &session)) != 0) die(ret, "connection.open_session"); /* @@ -285,7 +288,7 @@ ops(void *arg) * checkpoints. */ if (!SINGLETHREADED && !DATASOURCE("lsm") && - ckpt_available && MMRAND(1, 10) == 1) { + ckpt_available && mmrand(tinfo->rnd, 1, 10) == 1) { if ((ret = session->open_cursor(session, g.uri, NULL, ckpt_name, &cursor)) != 0) die(ret, "session.open_cursor"); @@ -318,7 +321,7 @@ ops(void *arg) die(ret, "session.open_cursor"); /* Pick the next session/cursor close/open. */ - session_op += 100 * MMRAND(1, 50); + session_op += mmrand(tinfo->rnd, 100, 5000); /* Updates supported. */ readonly = 0; @@ -334,7 +337,8 @@ ops(void *arg) * checkpoint. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb") || - DATASOURCE("lsm") || readonly || MMRAND(1, 5) == 1) + DATASOURCE("lsm") || + readonly || mmrand(tinfo->rnd, 1, 5) == 1) ckpt_config = NULL; else { (void)snprintf(ckpt_name, sizeof(ckpt_name), @@ -369,14 +373,15 @@ ops(void *arg) ckpt_available = 1; /* Pick the next checkpoint operation. */ - ckpt_op += 1000 * MMRAND(5, 20); + ckpt_op += mmrand(tinfo->rnd, 5000, 20000); } /* * If we're not single-threaded and we're not in a transaction, * start a transaction 20% of the time. */ - if (!SINGLETHREADED && !intxn && MMRAND(1, 10) >= 8) { + if (!SINGLETHREADED && + !intxn && mmrand(tinfo->rnd, 1, 10) >= 8) { if ((ret = session->begin_transaction(session, NULL)) != 0) die(ret, "session.begin_transaction"); @@ -385,7 +390,7 @@ ops(void *arg) insert = notfound = 0; - keyno = MMRAND(1, g.rows); + keyno = mmrand(tinfo->rnd, 1, (u_int)g.rows); key.data = keybuf; value.data = valbuf; @@ -396,7 +401,7 @@ ops(void *arg) * of deletes will mean fewer inserts and writes. Modifications * are always followed by a read to confirm it worked. */ - op = readonly ? UINT32_MAX : (uint32_t)(rng() % 100); + op = readonly ? UINT32_MAX : mmrand(tinfo->rnd, 1, 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.type) { @@ -418,7 +423,8 @@ ops(void *arg) ++tinfo->insert; switch (g.type) { case ROW: - if (row_insert(cursor, &key, &value, keyno)) + if (row_insert( + tinfo, cursor, &key, &value, keyno)) goto deadlock; insert = 1; break; @@ -433,7 +439,7 @@ ops(void *arg) goto skip_insert; /* Insert, then reset the insert cursor. */ - if (col_insert( + if (col_insert(tinfo, cursor_insert, &key, &value, &keyno)) goto deadlock; if ((ret = @@ -448,12 +454,14 @@ ops(void *arg) ++tinfo->update; switch (g.type) { case ROW: - if (row_update(cursor, &key, &value, keyno)) + if (row_update( + tinfo, cursor, &key, &value, keyno)) goto deadlock; break; case FIX: case VAR: -skip_insert: if (col_update(cursor, &key, &value, keyno)) +skip_insert: if (col_update(tinfo, + cursor, &key, &value, keyno)) goto deadlock; break; } @@ -471,8 +479,8 @@ skip_insert: if (col_update(cursor, &key, &value, keyno)) * a random direction. */ if (!insert) { - dir = (int)MMRAND(0, 1); - for (np = 0; np < MMRAND(1, 8); ++np) { + dir = (int)mmrand(tinfo->rnd, 0, 1); + for (np = 0; np < mmrand(tinfo->rnd, 1, 8); ++np) { if (notfound) break; if (nextprev(cursor, dir, ¬found)) @@ -494,7 +502,7 @@ skip_insert: if (col_update(cursor, &key, &value, keyno)) * rollback 10% of the time. */ if (intxn) - switch (MMRAND(1, 10)) { + switch (mmrand(tinfo->rnd, 1, 10)) { case 1: case 2: case 3: case 4: /* 40% */ if ((ret = session->commit_transaction( session, NULL)) != 0) @@ -550,7 +558,7 @@ wts_read_scan(void) /* Open a session and cursor pair. */ if ((ret = conn->open_session( - conn, NULL, ops_session_config(), &session)) != 0) + conn, NULL, ops_session_config(NULL), &session)) != 0) die(ret, "connection.open_session"); if ((ret = session->open_cursor( session, g.uri, NULL, NULL, &cursor)) != 0) @@ -558,7 +566,7 @@ wts_read_scan(void) /* Check a random subset of the records using the key. */ for (last_cnt = cnt = 0; cnt < g.key_cnt;) { - cnt += rng() % 17 + 1; + cnt += mmrand(NULL, 1, 17); if (cnt > g.rows) cnt = g.rows; if (cnt - last_cnt > 1000) { @@ -604,7 +612,7 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) cursor->set_key(cursor, keyno); break; case ROW: - key_gen((uint8_t *)key->data, &key->size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno); cursor->set_key(cursor, key); break; } @@ -790,7 +798,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) * Update a row in a row-store file. */ static int -row_update( +row_update(TINFO *tinfo, WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { WT_SESSION *session; @@ -798,8 +806,8 @@ row_update( session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno, 0); - value_gen((uint8_t *)value->data, &value->size, keyno); + key_gen((uint8_t *)key->data, &key->size, keyno); + val_gen(tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) @@ -835,14 +843,15 @@ row_update( * Update a row in a column-store file. */ static int -col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(TINFO *tinfo, + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { WT_SESSION *session; int ret; session = cursor->session; - value_gen((uint8_t *)value->data, &value->size, keyno); + val_gen(tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) { @@ -876,7 +885,7 @@ col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { int notfound; - key_gen((uint8_t *)key->data, &key->size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno); bdb_update(key->data, key->size, value->data, value->size, ¬found); (void)notfound_chk("col_update", ret, notfound, keyno); } @@ -993,7 +1002,7 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert( +row_insert(TINFO *tinfo, WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { WT_SESSION *session; @@ -1001,8 +1010,8 @@ row_insert( session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno, 1); - value_gen((uint8_t *)value->data, &value->size, keyno); + key_gen_insert(tinfo->rnd, (uint8_t *)key->data, &key->size, keyno); + val_gen(tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) @@ -1038,7 +1047,8 @@ row_insert( * Insert an element in a column-store file. */ static int -col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(TINFO *tinfo, + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { WT_SESSION *session; uint64_t keyno; @@ -1046,7 +1056,7 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) session = cursor->session; - value_gen((uint8_t *)value->data, &value->size, g.rows + 1); + val_gen(tinfo->rnd, (uint8_t *)value->data, &value->size, g.rows + 1); if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); @@ -1083,7 +1093,7 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { int notfound; - key_gen((uint8_t *)key->data, &key->size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno); bdb_update(key->data, key->size, value->data, value->size, ¬found); } #else @@ -1104,7 +1114,7 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno); /* Log the operation */ if (g.logging == LOG_OPS) @@ -1176,7 +1186,7 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) * do the same thing for the BDB store. */ if (g.type == FIX) { - key_gen((uint8_t *)key->data, &key->size, keyno, 0); + key_gen((uint8_t *)key->data, &key->size, keyno); bdb_update(key->data, key->size, "\0", 1, ¬found); } else bdb_remove(keyno, ¬found); diff --git a/test/format/salvage.c b/test/format/salvage.c index a715ca96d07..52fc7619627 100644 --- a/test/format/salvage.c +++ b/test/format/salvage.c @@ -103,7 +103,7 @@ corrupt(void) found: if (fstat(fd, &sb) == -1) die(errno, "salvage-corrupt: fstat"); - offset = MMRAND(0, sb.st_size); + offset = mmrand(NULL, 0, (u_int)sb.st_size); len = (size_t)(20 + (sb.st_size / 100) * 2); (void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home); if ((fp = fopen(buf, "w")) == NULL) @@ -111,7 +111,7 @@ found: if (fstat(fd, &sb) == -1) (void)fprintf(fp, "salvage-corrupt: offset %" PRIuMAX ", length " SIZET_FMT "\n", (uintmax_t)offset, len); - (void)fclose(fp); + fclose_and_clear(&fp); if (lseek(fd, offset, SEEK_SET) == -1) die(errno, "salvage-corrupt: lseek"); diff --git a/test/format/t.c b/test/format/t.c index 4b0d0d7a22d..c31d86f073d 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -34,7 +34,6 @@ static void startup(void); static void usage(void); extern int __wt_optind; -extern int __wt_getopt(const char *, int, char * const *, const char *); extern char *__wt_optarg; int @@ -106,6 +105,9 @@ main(int argc, char *argv[]) argc -= __wt_optind; argv += __wt_optind; + /* Initialize the global random number generator. */ + __wt_random_init(g.rnd); + /* Set up paths. */ path_setup(home); @@ -153,16 +155,15 @@ main(int argc, char *argv[]) g.c_runs = 1; /* - * Initialize locks to single-thread named checkpoints and backups, and - * to single-thread last-record updates. + * Initialize locks to single-thread named checkpoints and backups, last + * last-record updates, and failures. */ if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0) die(ret, "pthread_rwlock_init: append lock"); if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0) die(ret, "pthread_rwlock_init: backup lock"); - - /* Seed the random number generator. */ - srand((u_int)(0xdeadbeef ^ (u_int)time(NULL))); + if ((ret = pthread_rwlock_init(&g.death_lock, NULL)) != 0) + die(ret, "pthread_rwlock_init: death lock"); printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { @@ -244,10 +245,8 @@ main(int argc, char *argv[]) } /* Flush/close any logging information. */ - if (g.logfp != NULL) - (void)fclose(g.logfp); - if (g.rand_log != NULL) - (void)fclose(g.rand_log); + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); config_print(0); @@ -270,17 +269,9 @@ startup(void) { int ret; - /* Close the logging file. */ - if (g.logfp != NULL) { - (void)fclose(g.logfp); - g.logfp = NULL; - } - - /* Close the random number logging file. */ - if (g.rand_log != NULL) { - (void)fclose(g.rand_log); - g.rand_log = NULL; - } + /* Flush/close any logging information. */ + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); /* Create or initialize the home and data-source directories. */ if ((ret = system(g.home_init)) != 0) @@ -291,7 +282,7 @@ startup(void) die(errno, "fopen: %s", g.home_log); /* Open/truncate the random number logging file. */ - if ((g.rand_log = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL) + if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL) die(errno, "%s", g.home_rand); } @@ -304,6 +295,9 @@ die(int e, const char *fmt, ...) { va_list ap; + /* Single-thread error handling. */ + (void)pthread_rwlock_wrlock(&g.death_lock); + if (fmt != NULL) { /* Death message. */ fprintf(stderr, "%s: ", g.progname); va_start(ap, fmt); @@ -315,10 +309,8 @@ die(int e, const char *fmt, ...) } /* Flush/close any logging information. */ - if (g.logfp != NULL) - (void)fclose(g.logfp); - if (g.rand_log != NULL) - (void)fclose(g.rand_log); + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); /* Display the configuration that failed. */ if (g.run_cnt) diff --git a/test/format/util.c b/test/format/util.c index 8744fff05a0..20511a6fe49 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -33,7 +33,7 @@ #endif static inline uint32_t -kv_len(uint64_t keyno, uint32_t min, uint32_t max) +kv_len(uint32_t *rnd, uint64_t keyno, uint32_t min, uint32_t max) { /* * Focus on relatively small key/value items, admitting the possibility @@ -47,7 +47,7 @@ kv_len(uint64_t keyno, uint32_t min, uint32_t max) max = KILOBYTE(100); } else if (keyno % 20 != 0 && max > min + 20) max = min + 20; - return (MMRAND(min, max)); + return (mmrand(rnd, min, max)); } void @@ -66,7 +66,7 @@ key_len_setup(void) */ for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) g.key_rand_len[i] = - kv_len((uint64_t)i, g.c_key_min, g.c_key_max); + kv_len(NULL, (uint64_t)i, g.c_key_min, g.c_key_max); } void @@ -85,17 +85,16 @@ key_gen_setup(uint8_t **keyp) *keyp = key; } -void -key_gen(uint8_t *key, size_t *sizep, uint64_t keyno, int insert) +static void +key_gen_common(uint8_t *key, size_t *sizep, uint64_t keyno, int suffix) { - int len, suffix; + int len; /* * The key always starts with a 10-digit string (the specified cnt) * followed by two digits, a random number between 1 and 15 if it's * an insert, otherwise 00. */ - suffix = insert ? (int)MMRAND(1, 15) : 0; len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix); /* @@ -110,10 +109,22 @@ key_gen(uint8_t *key, size_t *sizep, uint64_t keyno, int insert) *sizep = (size_t)len; } +void +key_gen(uint8_t *key, size_t *sizep, uint64_t keyno) +{ + key_gen_common(key, sizep, keyno, 0); +} + +void +key_gen_insert(uint32_t *rnd, uint8_t *key, size_t *sizep, uint64_t keyno) +{ + key_gen_common(key, sizep, keyno, (int)mmrand(rnd, 1, 15)); +} + static uint32_t val_dup_data_len; /* Length of duplicate data items */ void -val_gen_setup(uint8_t **valp) +val_gen_setup(uint32_t *rnd, uint8_t **valp) { uint8_t *val; size_t i, len; @@ -135,12 +146,12 @@ val_gen_setup(uint8_t **valp) *valp = val; - val_dup_data_len = - kv_len((uint64_t)MMRAND(1, 20), g.c_value_min, g.c_value_max); + val_dup_data_len = kv_len(rnd, + (uint64_t)mmrand(rnd, 1, 20), g.c_value_min, g.c_value_max); } void -value_gen(uint8_t *val, size_t *sizep, uint64_t keyno) +val_gen(uint32_t *rnd, uint8_t *val, size_t *sizep, uint64_t keyno) { /* * Fixed-length records: take the low N bits from the last digit of @@ -148,13 +159,13 @@ value_gen(uint8_t *val, size_t *sizep, uint64_t keyno) */ if (g.type == FIX) { switch (g.c_bitcnt) { - case 8: val[0] = MMRAND(1, 0xff); break; - case 7: val[0] = MMRAND(1, 0x7f); break; - case 6: val[0] = MMRAND(1, 0x3f); break; - case 5: val[0] = MMRAND(1, 0x1f); break; - case 4: val[0] = MMRAND(1, 0x0f); break; - case 3: val[0] = MMRAND(1, 0x07); break; - case 2: val[0] = MMRAND(1, 0x03); break; + case 8: val[0] = (uint8_t)mmrand(rnd, 1, 0xff); break; + case 7: val[0] = (uint8_t)mmrand(rnd, 1, 0x7f); break; + case 6: val[0] = (uint8_t)mmrand(rnd, 1, 0x3f); break; + case 5: val[0] = (uint8_t)mmrand(rnd, 1, 0x1f); break; + case 4: val[0] = (uint8_t)mmrand(rnd, 1, 0x0f); break; + case 3: val[0] = (uint8_t)mmrand(rnd, 1, 0x07); break; + case 2: val[0] = (uint8_t)mmrand(rnd, 1, 0x03); break; case 1: val[0] = 1; break; } *sizep = 1; @@ -180,14 +191,15 @@ value_gen(uint8_t *val, size_t *sizep, uint64_t keyno) * use the same data value all the time. */ if ((g.type == ROW || g.type == VAR) && - g.c_repeat_data_pct != 0 && MMRAND(1, 100) < g.c_repeat_data_pct) { + g.c_repeat_data_pct != 0 && + mmrand(rnd, 1, 100) < g.c_repeat_data_pct) { (void)strcpy((char *)val, "DUPLICATEV"); val[10] = '/'; *sizep = val_dup_data_len; } else { (void)sprintf((char *)val, "%010" PRIu64, keyno); val[10] = '/'; - *sizep = kv_len(keyno, g.c_value_min, g.c_value_max); + *sizep = kv_len(rnd, keyno, g.c_value_min, g.c_value_max); } } @@ -349,32 +361,34 @@ path_setup(const char *home) * Return a random number. */ uint32_t -rng(void) +rng(uint32_t *rnd) { char buf[64]; uint32_t r; /* - * We can entirely reproduce a run based on the random numbers used - * in the initial run, plus the configuration files. It would be - * nice to just log the initial RNG seed, rather than logging every - * random number generated, but we'd have to include our own RNG, - * Berkeley DB calls rand() internally, and that messes up the pattern - * of random numbers. + * Threaded operations have their own RNG information, otherwise we + * use the default. + */ + if (rnd == NULL) + rnd = g.rnd; + + /* + * We can reproduce a single-threaded run based on the random numbers + * used in the initial run, plus the configuration files. * * Check g.replay and g.rand_log_stop: multithreaded runs log/replay * until they get to the operations phase, then turn off log/replay, * threaded operation order can't be replayed. */ if (g.rand_log_stop) - return ((uint32_t)rand()); + return (__wt_random(rnd)); if (g.replay) { - if (fgets(buf, sizeof(buf), g.rand_log) == NULL) { - if (feof(g.rand_log)) { + if (fgets(buf, sizeof(buf), g.randfp) == NULL) { + if (feof(g.randfp)) { fprintf(stderr, - "end of random number log reached, " - "exiting\n"); + "\n" "end of random number log reached\n"); exit(EXIT_SUCCESS); } die(errno, "random number log"); @@ -383,11 +397,28 @@ rng(void) return ((uint32_t)strtoul(buf, NULL, 10)); } - r = (uint32_t)rand(); + r = __wt_random(rnd); /* Save and flush the random number so we're up-to-date on error. */ - (void)fprintf(g.rand_log, "%" PRIu32 "\n", r); - (void)fflush(g.rand_log); + (void)fprintf(g.randfp, "%" PRIu32 "\n", r); + (void)fflush(g.randfp); return (r); } + +/* + * fclose_and_clear -- + * Close a file and clear the handle so we don't close twice. + */ +void +fclose_and_clear(FILE **fpp) +{ + FILE *fp; + + if ((fp = *fpp) == NULL) + return; + *fpp = NULL; + if (fclose(fp) != 0) + die(errno, "fclose"); + return; +} diff --git a/test/format/wts.c b/test/format/wts.c index 0b0d94a0123..c8abe987b8d 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -132,7 +132,7 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) * Sometimes specify a set of sources just to exercise that code. */ if (g.c_statistics_server) { - if (MMRAND(0, 5) == 1 && + if (mmrand(NULL, 0, 5) == 1 && memcmp(g.uri, "file:", strlen("file:")) == 0) p += snprintf(p, REMAIN(p, end), ",statistics=(fast)" @@ -256,15 +256,15 @@ wts_create(void) * Configure the maximum key/value sizes, but leave it as the default * if we come up with something crazy. */ - maxintlkey = MMRAND(maxintlpage / 50, maxintlpage / 40); + maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) p += snprintf(p, REMAIN(p, end), ",internal_key_max=%d", maxintlkey); - maxleafkey = MMRAND(maxleafpage / 50, maxleafpage / 40); + maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) p += snprintf(p, REMAIN(p, end), ",leaf_key_max=%d", maxleafkey); - maxleafvalue = MMRAND(maxleafpage * 10, maxleafpage / 40); + maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) p += snprintf(p, REMAIN(p, end), ",leaf_value_max=%d", maxleafvalue); @@ -295,7 +295,7 @@ wts_create(void) ",huffman_value=english"); if (g.c_dictionary) p += snprintf(p, REMAIN(p, end), - ",dictionary=%d", MMRAND(123, 517)); + ",dictionary=%d", mmrand(NULL, 123, 517)); break; } @@ -328,6 +328,10 @@ wts_create(void) p += snprintf(p, REMAIN(p, end), ",block_compressor=\"lz4\""); break; + case COMPRESS_LZ4_NO_RAW: + p += snprintf(p, REMAIN(p, end), + ",block_compressor=\"lz4-noraw\""); + break; case COMPRESS_LZO: p += snprintf(p, REMAIN(p, end), ",block_compressor=\"LZO1B-6\""); @@ -552,8 +556,7 @@ wts_stats(void) if ((ret = cursor->close(cursor)) != 0) die(ret, "cursor.close"); - if ((ret = fclose(fp)) != 0) - die(ret, "fclose"); + fclose_and_clear(&fp); if ((ret = session->close(session, NULL)) != 0) die(ret, "session.close"); diff --git a/test/huge/huge.c b/test/huge/huge.c index d47c37b1e7c..b716ecda084 100644 --- a/test/huge/huge.c +++ b/test/huge/huge.c @@ -173,7 +173,7 @@ main(int argc, char *argv[]) { CONFIG *cp; size_t len, *lp; - int ch, ret, small; + int ch, small; char *working_dir; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) @@ -200,8 +200,7 @@ main(int argc, char *argv[]) if (argc != 0) usage(); - if ((ret = testutil_work_dir_from_path(home, 512, working_dir)) != 0) - testutil_die(ret, "provided directory name is too long"); + testutil_work_dir_from_path(home, 512, working_dir); /* Allocate a buffer to use. */ len = small ? ((size_t)SMALL_MAX) : ((size_t)4 * GIGABYTE); @@ -221,7 +220,7 @@ main(int argc, char *argv[]) } free(big); - (void)testutil_clean_work_dir(home); + testutil_clean_work_dir(home); return (EXIT_SUCCESS); } diff --git a/test/packing/intpack-test.c b/test/packing/intpack-test.c index c07cab255bc..361ee696b20 100644 --- a/test/packing/intpack-test.c +++ b/test/packing/intpack-test.c @@ -28,7 +28,7 @@ #include <assert.h> -#include "wt_internal.h" +#include "wt_internal.h" /* For __wt_XXX */ int main(void) diff --git a/test/packing/intpack-test2.c b/test/packing/intpack-test2.c index 6956141ab58..02df8447908 100644 --- a/test/packing/intpack-test2.c +++ b/test/packing/intpack-test2.c @@ -28,7 +28,7 @@ #include <assert.h> -#include "wt_internal.h" +#include "wt_internal.h" /* For __wt_XXX */ int main(void) diff --git a/test/packing/packing-test.c b/test/packing/packing-test.c index 1fd9d74ffd6..0c3eb78da02 100644 --- a/test/packing/packing-test.c +++ b/test/packing/packing-test.c @@ -28,7 +28,7 @@ #include <assert.h> -#include "wt_internal.h" +#include "wt_internal.h" /* For __wt_XXX */ static void check(const char *fmt, ...) diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index b6132aefa34..a31d61dfdcc 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -26,7 +26,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" +#include "wt_internal.h" /* For __wt_XXX */ #include "test_util.i" #include <assert.h> diff --git a/test/thread/rw.c b/test/thread/rw.c index a5a3e324104..35733e7d647 100644 --- a/test/thread/rw.c +++ b/test/thread/rw.c @@ -36,6 +36,8 @@ typedef struct { char *name; /* object name */ u_int nops; /* Thread op count */ + uint32_t rnd[2]; /* RNG */ + int remove; /* cursor.remove */ int update; /* cursor.update */ int reads; /* cursor.search */ @@ -43,31 +45,6 @@ typedef struct { static INFO *run_info; -/* - * r -- - * Return a 32-bit pseudo-random number. - * - * This is an implementation of George Marsaglia's multiply-with-carry pseudo- - * random number generator. Computationally fast, with reasonable randomness - * properties. - */ -static inline uint32_t -r(void) -{ - static uint32_t m_w = 0, m_z = 0; - - if (m_w == 0) { - struct timeval t; - (void)gettimeofday(&t, NULL); - m_w = (uint32_t)t.tv_sec; - m_z = (uint32_t)t.tv_usec; - } - - m_z = 36969 * (m_z & 65535) + (m_z >> 16); - m_w = 18000 * (m_w & 65535) + (m_w >> 16); - return (m_z << 16) + (m_w & 65535); -} - int rw_start(u_int readers, u_int writers) { @@ -182,7 +159,7 @@ rw_start(u_int readers, u_int writers) * Read operation. */ static inline void -reader_op(WT_SESSION *session, WT_CURSOR *cursor) +reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key; u_int keyno; @@ -191,7 +168,7 @@ reader_op(WT_SESSION *session, WT_CURSOR *cursor) key = &_key; - keyno = r() % nkeys + 1; + keyno = __wt_random(s->rnd) % nkeys + 1; if (ftype == ROW) { key->data = keybuf; key->size = (uint32_t) @@ -223,6 +200,7 @@ reader(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; __wt_thread_id(tid, sizeof(tid)); + __wt_random_init(s->rnd); printf(" read thread %2d starting: tid: %s, file: %s\n", id, tid, s->name); @@ -237,7 +215,7 @@ reader(void *arg) if ((ret = session->open_cursor( session, s->name, NULL, NULL, &cursor)) != 0) testutil_die(ret, "session.open_cursor"); - reader_op(session, cursor); + reader_op(session, cursor, s); if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); } @@ -249,7 +227,7 @@ reader(void *arg) session, s->name, NULL, NULL, &cursor)) != 0) testutil_die(ret, "session.open_cursor"); for (i = 0; i < s->nops; ++i, ++s->reads, sched_yield()) - reader_op(session, cursor); + reader_op(session, cursor, s); if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); } @@ -275,7 +253,7 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) key = &_key; value = &_value; - keyno = r() % nkeys + 1; + keyno = __wt_random(s->rnd) % nkeys + 1; if (ftype == ROW) { key->data = keybuf; key->size = (uint32_t) @@ -323,6 +301,7 @@ writer(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; __wt_thread_id(tid, sizeof(tid)); + __wt_random_init(s->rnd); printf("write thread %2d starting: tid: %s, file: %s\n", id, tid, s->name); diff --git a/test/thread/t.c b/test/thread/t.c index baeaa26c69f..71df65d8419 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) { u_int readers, writers; - int ch, cnt, ret, runs; + int ch, cnt, runs; char *config_open, *working_dir; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) @@ -143,8 +143,7 @@ main(int argc, char *argv[]) if (argc != 0) return (usage()); - if ((ret = testutil_work_dir_from_path(home, 512, working_dir)) != 0) - testutil_die(ret, "provided directory name is too long"); + testutil_work_dir_from_path(home, 512, working_dir); if (vary_nops && !multiple_files) { fprintf(stderr, @@ -191,7 +190,7 @@ wt_connect(char *config_open) char config[512]; size_t print_count; - (void)testutil_clean_work_dir(home); + testutil_clean_work_dir(home); testutil_make_work_dir(home); print_count = (size_t)snprintf(config, sizeof(config), @@ -234,7 +233,7 @@ wt_shutdown(void) static void shutdown(void) { - (void)testutil_clean_work_dir(home); + testutil_clean_work_dir(home); } static int diff --git a/test/thread/thread.h b/test/thread/thread.h index 45f8354313b..b27eea2ef7c 100644 --- a/test/thread/thread.h +++ b/test/thread/thread.h @@ -38,8 +38,8 @@ #include <string.h> #include <unistd.h> +#include "wt_internal.h" /* For __wt_XXX */ #include "test_util.i" -#include "wt_internal.h" #define FNAME "file:wt.%03d" /* File name */ #define FNAME_STAT "__stats" /* File name for statistics */ diff --git a/test/utility/test_util.i b/test/utility/test_util.i index 90917fc9f49..c6970bec790 100644 --- a/test/utility/test_util.i +++ b/test/utility/test_util.i @@ -68,30 +68,34 @@ testutil_die(int e, const char *fmt, ...) * Takes a buffer, its size and the intended work directory. * Creates the full intended work directory in buffer. */ -static inline int +static inline void testutil_work_dir_from_path(char *buffer, size_t inputSize, char *dir) { /* If no directory is provided, use the default. */ if (dir == NULL) { if (inputSize < sizeof(DEFAULT_DIR)) - return (1); + testutil_die(ENOMEM, + "Not enough memory in buffer for directory %s%c%s", + dir, DIR_DELIM, DEFAULT_DIR); + snprintf(buffer, inputSize, DEFAULT_DIR); - return (0); + return; } /* Additional bytes for the directory and WT_TEST. */ - if (inputSize < strlen(dir) + sizeof(DEFAULT_DIR)) - return (1); + if (inputSize < strlen(dir) + sizeof(DEFAULT_DIR) + sizeof(DIR_DELIM)) + testutil_die(ENOMEM, + "Not enough memory in buffer for directory %s%c%s", + dir, DIR_DELIM, DEFAULT_DIR); snprintf(buffer, inputSize, "%s%c%s", dir, DIR_DELIM, DEFAULT_DIR); - return (0); } /* * testutil_clean_work_dir -- * Remove any existing work directories, can optionally fail on error */ -static inline int +static inline void testutil_clean_work_dir(char *dir) { size_t inputSize; @@ -104,9 +108,9 @@ testutil_clean_work_dir(char *dir) testutil_die(ENOMEM, "Failed to allocate memory"); snprintf(buffer, inputSize, "%s%s", RM_COMMAND, dir); - ret = system(buffer); + if ((ret = system(buffer)) != 0) + testutil_die(ret, "System call to remove directory failed"); free(buffer); - return (ret); } /* |