diff options
49 files changed, 1066 insertions, 524 deletions
diff --git a/bench/wtperf/runners/log-append-zero.wtperf b/bench/wtperf/runners/log-append-zero.wtperf new file mode 100644 index 00000000000..973d2cddd0d --- /dev/null +++ b/bench/wtperf/runners/log-append-zero.wtperf @@ -0,0 +1,8 @@ +# wtperf options file: Test a log file with a multi-threaded +# append workload. +conn_config="cache_size=1G,log=(enabled=true,file_max=20MB,zero_fill=true),checkpoint=(log_size=1G)" +table_config="type=file" +icount=50000000 +report_interval=5 +run_time=0 +populate_threads=8 diff --git a/bench/wtperf/runners/multi-btree-stress.wtperf b/bench/wtperf/runners/multi-btree-stress.wtperf new file mode 100644 index 00000000000..b10b08f6035 --- /dev/null +++ b/bench/wtperf/runners/multi-btree-stress.wtperf @@ -0,0 +1,17 @@ +# wtperf options file: multi-database configuration attempting to +# trigger slow operations by overloading CPU and disk. +# References Jira WT-2131 +conn_config="cache_size=2GB,eviction=(threads_min=2,threads_max=2),log=(enabled=false),direct_io=(data,checkpoint),buffer_alignment=4096,checkpoint_sync=true,checkpoint=(wait=60)" +table_config="allocation_size=4k,prefix_compression=false,split_pct=75,leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file" +# Divide original icount by database_count. +database_count=5 +icount=50000 +populate_threads=1 +random_range=50000000 +report_interval=5 +run_time=3600 +threads=((count=1,inserts=1),(count=10,reads=1)) +value_sz=100 +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 8dceeab2832..20c30e10482 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -600,7 +600,34 @@ worker(void *arg) if (ret == WT_NOTFOUND) break; -op_err: lprintf(cfg, ret, 0, +op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) { + /* + * If we are running with explicit transactions + * configured and we hit a WT_ROLLBACK, then we + * should rollback the current transaction and + * attempt to continue. + * This does break the guarantee of insertion + * order in cases of ordered inserts, as we + * aren't retrying here. + */ + lprintf(cfg, ret, 1, + "%s for: %s, range: %"PRIu64, op_name(op), + key_buf, wtperf_value_range(cfg)); + if ((ret = session->rollback_transaction( + session, NULL)) != 0) { + lprintf(cfg, ret, 0, + "Failed rollback_transaction"); + goto err; + } + if ((ret = session->begin_transaction( + session, NULL)) != 0) { + lprintf(cfg, ret, 0, + "Worker begin transaction failed"); + goto err; + } + break; + } + lprintf(cfg, ret, 0, "%s failed for: %s, range: %"PRIu64, op_name(op), key_buf, wtperf_value_range(cfg)); goto err; @@ -644,7 +671,7 @@ op_err: lprintf(cfg, ret, 0, if ((ret = session->begin_transaction( session, NULL)) != 0) { lprintf(cfg, ret, 0, - "Worker transaction commit failed"); + "Worker begin transaction failed"); goto err; } } @@ -1171,8 +1198,12 @@ monitor(void *arg) if (latency_max != 0 && (read_max > latency_max || insert_max > latency_max || update_max > latency_max)) + /* + * Make this a non-fatal error and print WARNING in + * the output so Jenkins can flag it as unstable. + */ lprintf(cfg, 0, 0, - "max latency exceeded: threshold %" PRIu32 + "WARNING: max latency exceeded: threshold %" PRIu32 " read max %" PRIu32 " insert max %" PRIu32 " update max %" PRIu32, latency_max, read_max, insert_max, update_max); diff --git a/dist/api_data.py b/dist/api_data.py index 5652edc4ebe..6fd7dcd0093 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -411,6 +411,41 @@ connection_runtime_config = [ interval in seconds at which to check for files that are inactive and close them''', min=1, max=100000), ]), + Config('log', '', r''' + enable logging. Enabling logging uses three sessions from the + configured session_max''', + type='category', subconfig=[ + Config('archive', 'true', r''' + automatically archive unneeded log files''', + type='boolean'), + Config('compressor', 'none', r''' + configure a compressor for log records. Permitted values are + \c "none" or custom compression engine name created with + WT_CONNECTION::add_compressor. If WiredTiger has builtin support + for \c "bzip2", \c "snappy", \c "lz4" or \c "zlib" compression, + these names are also available. See @ref compression for more + information'''), + Config('enabled', 'false', r''' + enable logging subsystem''', + type='boolean'), + Config('file_max', '100MB', r''' + the maximum size of log files''', + min='100KB', max='2GB'), + Config('path', '', r''' + the path to a directory into which the log files are written. + If the value is not an absolute path name, the files are created + relative to the database home'''), + Config('prealloc', 'true', r''' + pre-allocate log files.''', + type='boolean'), + Config('recover', 'on', r''' + run recovery or error if recovery needs to run after an + unclean shutdown.''', + choices=['error','on']), + Config('zero_fill', 'false', r''' + manually write zeroes into log files''', + type='boolean'), + ]), Config('lsm_manager', '', r''' configure database wide options for LSM tree management. The LSM manager is started automatically the first time an LSM tree is opened. @@ -611,38 +646,6 @@ common_wiredtiger_open = [ maximum number of simultaneous hazard pointers per session handle''', min='15'), - Config('log', '', r''' - enable logging. Enabling logging uses three sessions from the - configured session_max''', - type='category', subconfig=[ - Config('archive', 'true', r''' - automatically archive unneeded log files''', - type='boolean'), - Config('compressor', 'none', r''' - configure a compressor for log records. Permitted values are - \c "none" or custom compression engine name created with - WT_CONNECTION::add_compressor. If WiredTiger has builtin support - for \c "bzip2", \c "snappy", \c "lz4" or \c "zlib" compression, - these names are also available. See @ref compression for more - information'''), - Config('enabled', 'false', r''' - enable logging subsystem''', - type='boolean'), - Config('file_max', '100MB', r''' - the maximum size of log files''', - min='100KB', max='2GB'), - Config('path', '', r''' - the path to a directory into which the log files are written. - If the value is not an absolute path name, the files are created - relative to the database home'''), - Config('prealloc', 'true', r''' - pre-allocate log files.''', - type='boolean'), - Config('recover', 'on', r''' - run recovery or error if recovery needs to run after an - unclean shutdown.''', - choices=['error','on']), - ]), Config('mmap', 'true', r''' Use memory mapping to access files when possible''', type='boolean'), diff --git a/dist/flags.py b/dist/flags.py index d98f249335e..65b68cf4277 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -92,6 +92,7 @@ flags = { 'CONN_CKPT_SYNC', 'CONN_CLOSING', 'CONN_EVICTION_RUN', + 'CONN_LAS_OPEN', 'CONN_LEAK_MEMORY', 'CONN_LOG_SERVER_RUN', 'CONN_LSM_MERGE', diff --git a/dist/s_string.ok b/dist/s_string.ok index 021e222919e..d234a3c101f 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -646,6 +646,7 @@ intrin inuse io ip +islocked ispo iteratively jnr diff --git a/dist/stat_data.py b/dist/stat_data.py index 5bf7000f402..76fdf185137 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -272,6 +272,7 @@ connection_stats = [ LogStat('log_sync_dir', 'log sync_dir operations'), LogStat('log_write_lsn', 'log server thread advances write LSN'), LogStat('log_writes', 'log write operations'), + LogStat('log_zero_fills', 'log files manually zero-filled'), ########################################## # Reconciliation statistics diff --git a/src/async/async_api.c b/src/async/async_api.c index 1d819474728..dc26f2d11c3 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -53,7 +53,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri, * for the cursor. */ WT_RET(__wt_open_internal_session( - conn, "async-cursor", true, true, &session)); + conn, "async-cursor", true, 0, &session)); __wt_spin_lock(session, &async->ops_lock); WT_ERR(__wt_calloc_one(session, &af)); WT_ERR(__wt_strdup(session, uri, &af->uri)); @@ -229,7 +229,7 @@ __async_start(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_CONNECTION_IMPL *conn; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); conn->async_cfg = 1; @@ -256,9 +256,9 @@ __async_start(WT_SESSION_IMPL *session) * workers and we may want to selectively stop some workers * while leaving the rest running. */ - WT_RET(__wt_open_internal_session(conn, - "async-worker", true, true, &async->worker_sessions[i])); - F_SET(async->worker_sessions[i], WT_SESSION_SERVER_ASYNC); + session_flags = WT_SESSION_SERVER_ASYNC; + WT_RET(__wt_open_internal_session(conn, "async-worker", + true, session_flags, &async->worker_sessions[i])); } for (i = 0; i < conn->async_workers; i++) { /* @@ -305,7 +305,7 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_DECL_RET; WT_SESSION *wt_session; bool run; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); async = conn->async; @@ -371,10 +371,9 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) /* * Each worker has its own session. */ + session_flags = WT_SESSION_SERVER_ASYNC; WT_RET(__wt_open_internal_session(conn, "async-worker", - true, true, &async->worker_sessions[i])); - F_SET(async->worker_sessions[i], - WT_SESSION_SERVER_ASYNC); + true, session_flags, &async->worker_sessions[i])); } for (i = conn->async_workers; i < tmp_conn.async_workers; i++) { /* diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index 18b6860c758..b2c9e4b67f8 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -55,10 +55,12 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) * The page's modification information can change underfoot if * the page is being reconciled, serialize with reconciliation. */ - F_CAS_ATOMIC_WAIT(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_lock(session, &page->page_lock)); + ret = bm->compact_page_skip(bm, session, mod->mod_replace.addr, mod->mod_replace.size, skipp); - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + + WT_TRET(__wt_fair_unlock(session, &page->page_lock)); WT_RET(ret); } return (0); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index ee2898f60be..15ae93522a7 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -636,7 +636,10 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, ": %s\n", __wt_page_type_string(page->type)); __dmsg(ds, "\t" "disk %p, entries %" PRIu32, page->dsk, entries); - __dmsg(ds, "%s", __wt_page_is_modified(page) ? ", dirty" : ", clean"); + __dmsg(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean"); + __dmsg(ds, ", %s", __wt_fair_islocked( + session, &page->page_lock) ? "locked" : "unlocked"); + if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) __dmsg(ds, ", keys-built"); if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) @@ -647,8 +650,6 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, ", evict-lru"); if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) __dmsg(ds, ", overflow-keys"); - if (F_ISSET_ATOMIC(page, WT_PAGE_RECONCILIATION)) - __dmsg(ds, ", reconciliation"); if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) __dmsg(ds, ", split-insert"); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index c27d42d38f4..998667e3e1f 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -55,7 +55,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) */ WT_ASSERT(session, !__wt_page_is_modified(page)); WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); - WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_RECONCILIATION)); + WT_ASSERT(session, !__wt_fair_islocked(session, &page->page_lock)); #ifdef HAVE_DIAGNOSTIC { diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 29153ced178..adda9145ee4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -866,6 +866,18 @@ __split_parent_lock( *parentp = NULL; /* + * A checkpoint reconciling this parent page can deadlock with + * our split. We have an exclusive page lock on the child before + * we acquire the page's reconciliation lock, and reconciliation + * acquires the page's reconciliation lock before it encounters + * the child's exclusive lock (which causes reconciliation to + * loop until the exclusive lock is resolved). If we want to split + * the parent, give up to avoid that deadlock. + */ + if (S2BT(session)->checkpointing != WT_CKPT_OFF) + return (EBUSY); + + /* * Get a page-level lock on the parent to single-thread splits into the * page because we need to single-thread sizing/growing the page index. * It's OK to queue up multiple splits as the child pages split, but the @@ -882,32 +894,11 @@ __split_parent_lock( */ for (;;) { parent = ref->home; - F_CAS_ATOMIC(parent, WT_PAGE_RECONCILIATION, ret); - if (ret == 0) { - /* - * We can race with another thread deepening our parent. - * To deal with that, read the parent pointer each time - * we try to lock it, and check it's still correct after - * it's locked. - */ - if (parent == ref->home) - break; - F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); - continue; - } - - /* - * A checkpoint reconciling this parent page can deadlock with - * our split. We have an exclusive page lock on the child before - * we acquire the page's reconciliation lock, and reconciliation - * acquires the page's reconciliation lock before it encounters - * the child's exclusive lock (which causes reconciliation to - * loop until the exclusive lock is resolved). If we can't lock - * the parent, give up to avoid that deadlock. - */ - if (S2BT(session)->checkpointing != WT_CKPT_OFF) - return (EBUSY); - __wt_yield(); + WT_RET(__wt_fair_lock(session, &parent->page_lock)); + if (parent == ref->home) + break; + /* Try again if the page deepened while we were waiting */ + WT_RET(__wt_fair_unlock(session, &parent->page_lock)); } /* @@ -930,7 +921,7 @@ __split_parent_lock( *parentp = parent; return (0); -err: F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); +err: WT_TRET(__wt_fair_unlock(session, &parent->page_lock)); return (ret); } @@ -946,7 +937,7 @@ __split_parent_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard) if (hazard) ret = __wt_hazard_clear(session, parent); - F_CLR_ATOMIC(parent, WT_PAGE_RECONCILIATION); + WT_TRET(__wt_fair_unlock(session, &parent->page_lock)); return (ret); } diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 247bdef65c8..237d900c3d1 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -140,8 +140,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) */ if (!WT_PAGE_IS_INTERNAL(page) && F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) && - WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn) && - mod->rec_result != WT_PM_REC_REWRITE) { + WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) { __wt_page_modify_set(session, page); continue; } diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index a964ac39874..2eb406c2af8 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -24,10 +24,9 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) /* * Lookaside table statistics are copied from the underlying lookaside * table data-source statistics. If there's no lookaside table, values - * remain 0. In the current system, there's always a lookaside table, - * but there's no reason not to be cautious. + * remain 0. */ - if (conn->las_cursor == NULL) + if (!F_ISSET(conn, WT_CONN_LAS_OPEN)) return; /* @@ -35,7 +34,8 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) * to it by way of the underlying btree handle, but it's a little ugly. */ cstats = conn->stats; - dstats = ((WT_CURSOR_BTREE *)conn->las_cursor)->btree->dhandle->stats; + dstats = ((WT_CURSOR_BTREE *) + conn->las_session->las_cursor)->btree->dhandle->stats; WT_STAT_SET(session, cstats, cache_lookaside_insert, WT_STAT_READ(dstats, cursor_insert)); @@ -44,40 +44,6 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) } /* - * __las_cursor_create -- - * Open a new lookaside table cursor. - */ -static int -__las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) -{ - WT_BTREE *btree; - const char *open_cursor_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; - - WT_RET(__wt_open_cursor( - session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp)); - - /* - * Set special flags for the lookaside table: the lookaside flag (used, - * for example, to avoid writing records during reconciliation), also - * turn off checkpoints and logging. - * - * Test flags before setting them so updates can't race in subsequent - * opens (the first update is safe because it's single-threaded from - * wiredtiger_open). - */ - btree = S2BT(session); - if (!F_ISSET(btree, WT_BTREE_LOOKASIDE)) - F_SET(btree, WT_BTREE_LOOKASIDE); - if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) - F_SET(btree, WT_BTREE_NO_CHECKPOINT); - if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) - F_SET(btree, WT_BTREE_NO_LOGGING); - - return (0); -} - -/* * __wt_las_create -- * Initialize the database's lookaside store. */ @@ -85,7 +51,7 @@ int __wt_las_create(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DECL_RET; + uint32_t session_flags; const char *drop_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL }; @@ -93,30 +59,28 @@ __wt_las_create(WT_SESSION_IMPL *session) /* * Done at startup: we cannot do it on demand because we require the - * schema lock to create and drop the file, and it may not always be + * schema lock to create and drop the table, and it may not always be * available. * - * Open an internal session, used for the shared lookaside cursor. - * - * Sessions associated with a lookaside cursor should never be tapped - * for eviction. + * Discard any previous incarnation of the table. */ - WT_RET(__wt_open_internal_session( - conn, "lookaside table", true, true, &conn->las_session)); - session = conn->las_session; - F_SET(session, WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION); - - /* Discard any previous incarnation of the file. */ WT_RET(__wt_session_drop(session, WT_LAS_URI, drop_cfg)); - /* Re-create the file. */ + /* Re-create the table. */ WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT)); - /* Open the shared cursor. */ - WT_WITHOUT_DHANDLE(session, - ret = __las_cursor_create(session, &conn->las_cursor)); + /* + * Open a shared internal session used to access the lookaside table. + * This session should never be tapped for eviction. + */ + session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + WT_RET(__wt_open_internal_session( + conn, "lookaside table", true, session_flags, &conn->las_session)); - return (ret); + /* Flag that the lookaside table has been created. */ + F_SET(conn, WT_CONN_LAS_OPEN); + + return (0); } /* @@ -138,7 +102,6 @@ __wt_las_destroy(WT_SESSION_IMPL *session) wt_session = &conn->las_session->iface; ret = wt_session->close(wt_session, NULL); - conn->las_cursor = NULL; conn->las_session = NULL; return (ret); @@ -176,6 +139,40 @@ __wt_las_is_written(WT_SESSION_IMPL *session) } /* + * __wt_las_cursor_create -- + * Open a new lookaside table cursor. + */ +int +__wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) +{ + WT_BTREE *btree; + const char *open_cursor_cfg[] = { + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL }; + + WT_RET(__wt_open_cursor( + session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp)); + + /* + * Set special flags for the lookaside table: the lookaside flag (used, + * for example, to avoid writing records during reconciliation), also + * turn off checkpoints and logging. + * + * Test flags before setting them so updates can't race in subsequent + * opens (the first update is safe because it's single-threaded from + * wiredtiger_open). + */ + btree = S2BT(session); + if (!F_ISSET(btree, WT_BTREE_LOOKASIDE)) + F_SET(btree, WT_BTREE_LOOKASIDE); + if (!F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) + F_SET(btree, WT_BTREE_NO_CHECKPOINT); + if (!F_ISSET(btree, WT_BTREE_NO_LOGGING)) + F_SET(btree, WT_BTREE_NO_LOGGING); + + return (0); +} + +/* * __wt_las_cursor -- * Return a lookaside cursor. */ @@ -184,7 +181,6 @@ __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags) { WT_CONNECTION_IMPL *conn; - WT_DECL_RET; *cursorp = NULL; @@ -202,20 +198,15 @@ __wt_las_cursor( conn = S2C(session); - /* Eviction and sweep threads have their own lookaside table cursors. */ - if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) { - if (session->las_cursor == NULL) { - WT_WITHOUT_DHANDLE(session, ret = - __las_cursor_create(session, &session->las_cursor)); - WT_RET(ret); - } - + /* + * Some threads have their own lookaside table cursors, else lock the + * shared lookaside cursor. + */ + if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) *cursorp = session->las_cursor; - } else { - /* Lock the shared lookaside cursor. */ + else { __wt_spin_lock(session, &conn->las_lock); - - *cursorp = conn->las_cursor; + *cursorp = conn->las_session->las_cursor; } /* Turn caching and eviction off. */ @@ -253,8 +244,8 @@ __wt_las_cursor_close( F_SET(session, session_flags); /* - * Eviction and sweep threads have their own lookaside table cursors; - * else, unlock the shared lookaside cursor. + * Some threads have their own lookaside table cursors, else unlock the + * shared lookaside cursor. */ if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) __wt_spin_unlock(session, &conn->las_lock); diff --git a/src/config/config.c b/src/config/config.c index 27de6264a28..505b843aa86 100644 --- a/src/config/config.c +++ b/src/config/config.c @@ -745,11 +745,16 @@ __wt_config_gets_def(WT_SESSION_IMPL *session, *value = false_value; value->val = def; + if (cfg == NULL || cfg[0] == NULL || cfg[1] == NULL) return (0); - else if (cfg[2] == NULL) + + if (cfg[2] == NULL) { WT_RET_NOTFOUND_OK( __wt_config_getones(session, cfg[1], key, value)); + return (0); + } + return (__wt_config_gets(session, cfg, key, value)); } diff --git a/src/config/config_def.c b/src/config/config_def.c index a3dc24fafc4..419f4124133 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -66,6 +66,21 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK + confchk_wiredtiger_open_log_subconfigs[] = { + { "archive", "boolean", NULL, NULL, NULL, 0 }, + { "compressor", "string", NULL, NULL, NULL, 0 }, + { "enabled", "boolean", NULL, NULL, NULL, 0 }, + { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, + { "path", "string", NULL, NULL, NULL, 0 }, + { "prealloc", "boolean", NULL, NULL, NULL, 0 }, + { "recover", "string", + NULL, "choices=[\"error\",\"on\"]", + NULL, 0 }, + { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK confchk_wiredtiger_open_lsm_manager_subconfigs[] = { { "merge", "boolean", NULL, NULL, NULL, 0 }, { "worker_thread_max", "int", NULL, "min=3,max=20", NULL, 0 }, @@ -116,6 +131,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3 }, + { "log", "category", + NULL, NULL, + confchk_wiredtiger_open_log_subconfigs, 8 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -453,20 +471,6 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK - confchk_wiredtiger_open_log_subconfigs[] = { - { "archive", "boolean", NULL, NULL, NULL, 0 }, - { "compressor", "string", NULL, NULL, NULL, 0 }, - { "enabled", "boolean", NULL, NULL, NULL, 0 }, - { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, - { "path", "string", NULL, NULL, NULL, 0 }, - { "prealloc", "boolean", NULL, NULL, NULL, 0 }, - { "recover", "string", - NULL, "choices=[\"error\",\"on\"]", - NULL, 0 }, - { NULL, NULL, NULL, NULL, NULL, 0 } -}; - -static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = { { "enabled", "boolean", NULL, NULL, NULL, 0 }, { "method", "string", @@ -517,7 +521,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 7 }, + confchk_wiredtiger_open_log_subconfigs, 8 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -592,7 +596,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 7 }, + confchk_wiredtiger_open_log_subconfigs, 8 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -665,7 +669,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 7 }, + confchk_wiredtiger_open_log_subconfigs, 8 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -737,7 +741,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 7 }, + confchk_wiredtiger_open_log_subconfigs, 8 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -814,12 +818,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),lsm_manager=(merge=,worker_thread_max=4)" - ",lsm_merge=,shared_cache=(chunk=10MB,name=,quota=0,reserve=0," - "size=500MB),statistics=none,statistics_log=(on_close=0," + "close_scan_interval=10),log=(archive=,compressor=,enabled=0," + "file_max=100MB,path=,prealloc=,recover=on,zero_fill=0)," + "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", - confchk_WT_CONNECTION_reconfigure, 17 + confchk_WT_CONNECTION_reconfigure, 18 }, { "WT_CURSOR.close", "", @@ -969,13 +975,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { "file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," - "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," - "lsm_merge=,mmap=,multiprocess=0,session_max=100," - "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),use_environment_priv=0,verbose=", + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0" + ",verbose=", confchk_wiredtiger_open, 34 }, { "wiredtiger_open_all", @@ -989,14 +996,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { "file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," - "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," - "lsm_merge=,mmap=,multiprocess=0,session_max=100," - "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),use_environment_priv=0,verbose=,version=(major=0," - "minor=0)", + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0" + ",verbose=,version=(major=0,minor=0)", confchk_wiredtiger_open_all, 35 }, { "wiredtiger_open_basecfg", @@ -1009,13 +1016,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," - "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," - "lsm_merge=,mmap=,multiprocess=0,session_max=100," - "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=,version=(major=0,minor=0)", + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),verbose=," + "version=(major=0,minor=0)", confchk_wiredtiger_open_basecfg, 31 }, { "wiredtiger_open_usercfg", @@ -1028,13 +1036,13 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," "log=(archive=,compressor=,enabled=0,file_max=100MB,path=," - "prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4)," - "lsm_merge=,mmap=,multiprocess=0,session_max=100," - "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=", + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),verbose=", confchk_wiredtiger_open_usercfg, 30 }, { NULL, NULL, NULL, 0 } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index b5d0e8f2883..b50ad750158 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1051,6 +1051,7 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_ERR(__wt_async_reconfig(session, cfg)); WT_ERR(__wt_cache_config(session, true, cfg)); WT_ERR(__wt_checkpoint_server_create(session, cfg)); + WT_ERR(__wt_logmgr_reconfig(session, cfg)); WT_ERR(__wt_lsm_manager_reconfig(session, cfg)); WT_ERR(__wt_statlog_create(session, cfg)); WT_ERR(__wt_sweep_config(session, cfg)); @@ -2037,9 +2038,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, /* Start the worker threads and run recovery. */ WT_ERR(__wt_connection_workers(session, cfg)); - /* Create the lookaside table. */ - WT_ERR(__wt_las_create(session)); - WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 6294e3b01a7..aa14e9aadde 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -243,6 +243,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_CACHE_POOL *cp; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t session_flags; conn = S2C(session); cache = conn->cache; @@ -252,8 +253,9 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) * Create a session that can be used by the cache pool thread, do * it in the main thread to avoid shutdown races */ + session_flags = WT_SESSION_NO_DATA_HANDLES; if ((ret = __wt_open_internal_session( - conn, "cache-pool", false, false, &cache->cp_session)) != 0) + conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0) WT_RET_MSG(NULL, ret, "Failed to create session for cache pool"); @@ -275,7 +277,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) * in each connection saves having a complex election process when * the active connection shuts down. */ - F_SET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE); + F_SET(cp, WT_CACHE_POOL_ACTIVE); F_SET(cache, WT_CACHE_POOL_RUN); WT_RET(__wt_thread_create(session, &cache->cp_tid, __wt_cache_pool_server, cache->cp_session)); @@ -366,10 +368,10 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) if (--cp->refs == 0) { WT_ASSERT(session, TAILQ_EMPTY(&cp->cache_pool_qh)); - F_CLR_ATOMIC(cp, WT_CACHE_POOL_ACTIVE); + F_CLR(cp, WT_CACHE_POOL_ACTIVE); } - if (!F_ISSET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE)) { + if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE)) { WT_TRET(__wt_verbose( session, WT_VERB_SHARED_CACHE, "Destroying cache pool")); __wt_spin_lock(session, &__wt_process.spinlock); @@ -398,7 +400,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) /* Notify other participants if we were managing */ if (F_ISSET(cache, WT_CACHE_POOL_MANAGER)) { - F_CLR_ATOMIC(cp, WT_CACHE_POOL_MANAGED); + cp->pool_managed = 0; WT_TRET(__wt_verbose(session, WT_VERB_SHARED_CACHE, "Shutting down shared cache manager connection")); } @@ -438,7 +440,7 @@ __cache_pool_balance(WT_SESSION_IMPL *session, bool forward) * - Reduce the amount allocated, if we are over the budget * - Increase the amount used if there is capacity and any pressure. */ - while (F_ISSET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE) && + while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && F_ISSET(S2C(session)->cache, WT_CACHE_POOL_RUN)) { WT_ERR(__cache_pool_adjust( session, highest, bump_threshold, forward, &adjusted)); @@ -728,7 +730,7 @@ __wt_cache_pool_server(void *arg) cache = S2C(session)->cache; forward = true; - while (F_ISSET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE) && + while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && F_ISSET(cache, WT_CACHE_POOL_RUN)) { if (cp->currently_used <= cp->size) WT_ERR(__wt_cond_wait(session, @@ -738,13 +740,12 @@ __wt_cache_pool_server(void *arg) * Re-check pool run flag - since we want to avoid getting the * lock on shutdown. */ - if (!F_ISSET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE) && + if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && F_ISSET(cache, WT_CACHE_POOL_RUN)) break; /* Try to become the managing thread */ - F_CAS_ATOMIC(cp, WT_CACHE_POOL_MANAGED, ret); - if (ret == 0) { + if (__wt_atomic_cas8(&cp->pool_managed, 0, 1)) { F_SET(cache, WT_CACHE_POOL_MANAGER); WT_ERR(__wt_verbose(session, WT_VERB_SHARED_CACHE, "Cache pool switched manager thread")); diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 7fc790d5efa..caf0c3b68f0 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -123,22 +123,24 @@ static int __ckpt_server_start(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; + uint32_t session_flags; /* Nothing to do if the server is already running. */ if (conn->ckpt_session != NULL) return (0); F_SET(conn, WT_CONN_SERVER_CHECKPOINT); - /* The checkpoint server gets its own session. */ - WT_RET(__wt_open_internal_session( - conn, "checkpoint-server", true, true, &conn->ckpt_session)); - session = conn->ckpt_session; /* + * The checkpoint server gets its own session. + * * Checkpoint does enough I/O it may be called upon to perform slow * operations for the block manager. */ - F_SET(session, WT_SESSION_CAN_WAIT); + session_flags = WT_SESSION_CAN_WAIT; + WT_RET(__wt_open_internal_session(conn, + "checkpoint-server", true, session_flags, &conn->ckpt_session)); + session = conn->ckpt_session; WT_RET(__wt_cond_alloc( session, "checkpoint server", false, &conn->ckpt_cond)); diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 77e7693042b..0b364b5fd4b 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -678,11 +678,15 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) conn = S2C(session); /* - * Close open data handles: first, everything but the metadata file - * (as closing a normal file may open and write the metadata file), - * then the metadata file. This function isn't called often, and I - * don't want to "know" anything about the metadata file's position on - * the list, so we do it the hard way. + * Empty the session cache: any data handles created in a connection + * method may be cached here, and we're about to close them. + */ + __wt_session_close_cache(session); + + /* + * Close open data handles: first, everything but the metadata file (as + * closing a normal file may open and write the metadata file), then + * the metadata file. */ restart: TAILQ_FOREACH(dhandle, &conn->dhqh, q) { diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index eba0a2769d6..9068e7e85a2 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -42,7 +42,8 @@ __logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg) * Parse and setup the logging server options. */ static int -__logmgr_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) +__logmgr_config( + WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; @@ -50,22 +51,37 @@ __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) conn = S2C(session); /* - * The logging configuration is off by default. + * If we're reconfiguring, enabled must match the already + * existing setting. + * + * If it is off and the user it turning it on, or it is on + * and the user is turning it off, return an error. */ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); + if (reconfig && + ((cval.val != 0 && + !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || + (cval.val == 0 && + FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)))) + return (EINVAL); *runp = cval.val != 0; /* - * Setup a log path, compression and encryption even if logging is - * disabled in case we are going to print a log. + * Setup a log path and compression even if logging is disabled in case + * we are going to print a log. Only do this on creation. Once a + * compressor or log path are set they cannot be changed. */ - conn->log_compressor = NULL; - WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval)); - WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor)); - - WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); - WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path)); - + if (!reconfig) { + conn->log_compressor = NULL; + WT_RET(__wt_config_gets_none( + session, cfg, "log.compressor", &cval)); + WT_RET(__wt_compressor_config( + session, &cval, &conn->log_compressor)); + + WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); + WT_RET(__wt_strndup( + session, cval.str, cval.len, &conn->log_path)); + } /* We are done if logging isn't enabled. */ if (!*runp) return (0); @@ -74,28 +90,56 @@ __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); - WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); - conn->log_file_max = (wt_off_t)cval.val; - WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); + if (!reconfig) { + /* + * Ignore if the user tries to change the file size. The + * amount of memory allocated to the log slots may be based + * on the log file size at creation and we don't want to + * re-allocate that memory while running. + */ + WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); + conn->log_file_max = (wt_off_t)cval.val; + WT_STAT_FAST_CONN_SET(session, + log_max_filesize, conn->log_file_max); + } - WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); /* - * If pre-allocation is configured, set the initial number to one. + * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ - if (cval.val != 0) { - FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC); + WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); + if (cval.val != 0) conn->log_prealloc = 1; - } + + /* + * Note that it is meaningless to reconfigure this value during + * runtime. It only matters on create before recovery runs. + */ WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); + WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); + if (cval.val != 0) + FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL); + WT_RET(__logmgr_sync_cfg(session, cfg)); return (0); } /* + * __wt_logmgr_reconfig -- + * Reconfigure logging. + */ +int +__wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) +{ + bool dummy; + + return (__logmgr_config(session, cfg, &dummy, true)); +} + +/* * __log_archive_once -- * Perform one iteration of log archiving. Must be called with the * log archive lock held. @@ -216,7 +260,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session) */ for (i = reccount; i < (u_int)conn->log_prealloc; i++) { WT_ERR(__wt_log_allocfile( - session, ++log->prep_fileid, WT_LOG_PREPNAME, true)); + session, ++log->prep_fileid, WT_LOG_PREPNAME)); WT_STAT_FAST_CONN_INCR(session, log_prealloc_files); } /* @@ -722,7 +766,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); /* Handle configuration. */ - WT_RET(__logmgr_config(session, cfg, &run)); + WT_RET(__logmgr_config(session, cfg, &run, false)); /* If logging is not configured, we're done. */ if (!run) @@ -777,6 +821,7 @@ int __wt_logmgr_open(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); @@ -788,8 +833,9 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) * Start the log close thread. It is not configurable. * If logging is enabled, this thread runs. */ - WT_RET(__wt_open_internal_session( - conn, "log-close-server", false, false, &conn->log_file_session)); + session_flags = WT_SESSION_NO_DATA_HANDLES; + WT_RET(__wt_open_internal_session(conn, + "log-close-server", false, session_flags, &conn->log_file_session)); WT_RET(__wt_cond_alloc(conn->log_file_session, "log close server", false, &conn->log_file_cond)); @@ -804,19 +850,14 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) * Start the log write LSN thread. It is not configurable. * If logging is enabled, this thread runs. */ - WT_RET(__wt_open_internal_session( - conn, "log-wrlsn-server", false, false, &conn->log_wrlsn_session)); + WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server", + false, session_flags, &conn->log_wrlsn_session)); WT_RET(__wt_cond_alloc(conn->log_wrlsn_session, "log write lsn server", false, &conn->log_wrlsn_cond)); WT_RET(__wt_thread_create(conn->log_wrlsn_session, &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session)); conn->log_wrlsn_tid_set = true; - /* If no log thread services are configured, we're done. */ - if (!FLD_ISSET(conn->log_flags, - (WT_CONN_LOG_ARCHIVE | WT_CONN_LOG_PREALLOC))) - return (0); - /* * If a log server thread exists, the user may have reconfigured * archiving or pre-allocation. Signal the thread. Otherwise the @@ -829,8 +870,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) WT_RET(__wt_cond_signal(session, conn->log_cond)); } else { /* The log server gets its own session. */ - WT_RET(__wt_open_internal_session( - conn, "log-server", false, false, &conn->log_session)); + WT_RET(__wt_open_internal_session(conn, + "log-server", false, session_flags, &conn->log_session)); WT_RET(__wt_cond_alloc(conn->log_session, "log server", false, &conn->log_cond)); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 199cf213e0a..04815c8e152 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -38,7 +38,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) * need to get cleaned up on close. */ WT_RET(__wt_open_internal_session( - conn, "connection", true, false, &session)); + conn, "connection", false, 0, &session)); /* * The connection's default session is originally a static structure, @@ -228,33 +228,45 @@ int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) { /* - * Start the eviction thread. - */ - WT_RET(__wt_evict_create(session)); - - /* * Start the optional statistics thread. Start statistics first so that * other optional threads can know if statistics are enabled or not. */ WT_RET(__wt_statlog_create(session, cfg)); WT_RET(__wt_logmgr_create(session, cfg)); - /* Run recovery. */ + /* + * Run recovery. + * NOTE: This call will start (and stop) eviction if recovery is + * required. Recovery must run before the lookaside table is created + * (because recovery will update the metadata), and before eviction is + * started for real. + */ WT_RET(__wt_txn_recover(session)); + /* + * Start the optional logging/archive threads. + * NOTE: The log manager must be started before checkpoints so that the + * checkpoint server knows if logging is enabled. It must also be + * started before any operation that can commit, or the commit can + * block. + */ + WT_RET(__wt_logmgr_open(session)); + + /* Create the lookaside table. */ + WT_RET(__wt_las_create(session)); + + /* + * Start eviction threads. + * NOTE: Eviction must be started after the lookaside table is created. + */ + WT_RET(__wt_evict_create(session)); + /* Start the handle sweep thread. */ WT_RET(__wt_sweep_create(session)); /* Start the optional async threads. */ WT_RET(__wt_async_create(session, cfg)); - /* - * Start the optional logging/archive thread. - * NOTE: The log manager must be started before checkpoints so that the - * checkpoint server knows if logging is enabled. - */ - WT_RET(__wt_logmgr_open(session)); - /* Start the optional checkpoint thread. */ WT_RET(__wt_checkpoint_server_create(session, cfg)); diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index d8c7227ae61..ec3a630581a 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -447,9 +447,10 @@ __statlog_start(WT_CONNECTION_IMPL *conn) return (0); F_SET(conn, WT_CONN_SERVER_STATISTICS); + /* The statistics log server gets its own session. */ WT_RET(__wt_open_internal_session( - conn, "statlog-server", true, true, &conn->stat_session)); + conn, "statlog-server", true, 0, &conn->stat_session)); session = conn->stat_session; WT_RET(__wt_cond_alloc( diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 2de0cc12069..23846f978fe 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -353,16 +353,13 @@ int __wt_sweep_create(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_SERVER_SWEEP); - WT_RET(__wt_open_internal_session( - conn, "sweep-server", true, true, &conn->sweep_session)); - session = conn->sweep_session; - /* * Handle sweep does enough I/O it may be called upon to perform slow * operations for the block manager. @@ -372,8 +369,11 @@ __wt_sweep_create(WT_SESSION_IMPL *session) * * Don't tap the sweep thread for eviction. */ - F_SET(session, WT_SESSION_CAN_WAIT | - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION); + session_flags = WT_SESSION_CAN_WAIT | + WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + WT_RET(__wt_open_internal_session( + conn, "sweep-server", true, session_flags, &conn->sweep_session)); + session = conn->sweep_session; WT_RET(__wt_cond_alloc( session, "handle sweep server", false, &conn->sweep_cond)); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3c00ee30896..f9171900ca4 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -236,27 +236,35 @@ __evict_workers_resize(WT_SESSION_IMPL *session) WT_DECL_RET; WT_EVICT_WORKER *workers; size_t alloc; - uint32_t i; + uint32_t i, session_flags; conn = S2C(session); - alloc = conn->evict_workers_alloc * sizeof(*workers); - WT_RET(__wt_realloc(session, &alloc, - conn->evict_workers_max * sizeof(*workers), &conn->evict_workctx)); - workers = conn->evict_workctx; + if (conn->evict_workers_alloc < conn->evict_workers_max) { + alloc = conn->evict_workers_alloc * sizeof(*workers); + WT_RET(__wt_realloc(session, &alloc, + conn->evict_workers_max * sizeof(*workers), + &conn->evict_workctx)); + workers = conn->evict_workctx; + } for (i = conn->evict_workers_alloc; i < conn->evict_workers_max; i++) { - WT_ERR(__wt_open_internal_session(conn, - "eviction-worker", true, false, &workers[i].session)); - workers[i].id = i; - /* - * Eviction worker threads get their own lookaside table cursor. + * Eviction worker threads get their own session. * Eviction worker threads may be called upon to perform slow * operations for the block manager. + * + * Eviction worker threads get their own lookaside table cursor + * if the lookaside table is open. Note that eviction is also + * started during recovery, before the lookaside table is + * created. */ - F_SET(workers[i].session, - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_CAN_WAIT); + session_flags = WT_SESSION_CAN_WAIT; + if (F_ISSET(conn, WT_CONN_LAS_OPEN)) + FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR); + WT_ERR(__wt_open_internal_session(conn, "eviction-worker", + false, session_flags, &workers[i].session)); + workers[i].id = i; if (i < conn->evict_workers_min) { ++conn->evict_workers; @@ -278,33 +286,37 @@ int __wt_evict_create(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; + uint32_t session_flags; conn = S2C(session); /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_EVICTION_RUN); - /* We need a session handle because we're reading/writing pages. */ - WT_RET(__wt_open_internal_session( - conn, "eviction-server", true, false, &conn->evict_session)); + /* + * We need a session handle because we're reading/writing pages. + * + * The eviction server gets its own lookaside table cursor. + * + * If there's only a single eviction thread, it may be called upon to + * perform slow operations for the block manager. (The flag is not + * reset if reconfigured later, but I doubt that's a problem.) + */ + session_flags = F_ISSET(conn, WT_CONN_LAS_OPEN) ? + WT_SESSION_LOOKASIDE_CURSOR : 0; + if (conn->evict_workers_max == 0) + FLD_SET(session_flags, WT_SESSION_CAN_WAIT); + WT_RET(__wt_open_internal_session(conn, + "eviction-server", false, session_flags, &conn->evict_session)); session = conn->evict_session; /* * If eviction workers were configured, allocate sessions for them now. * This is done to reduce the chance that we will open new eviction * sessions after WT_CONNECTION::close is called. - * - * If there's only a single eviction thread, it may be called upon to - * perform slow operations for the block manager. (The flag is not - * reset if reconfigured later, but I doubt that's a problem.) */ if (conn->evict_workers_max > 0) WT_RET(__evict_workers_resize(session)); - else - F_SET(session, WT_SESSION_CAN_WAIT); - - /* The eviction server gets its own lookaside table cursor. */ - F_SET(session, WT_SESSION_LOOKASIDE_CURSOR); /* * Start the primary eviction server thread after the worker threads @@ -358,6 +370,8 @@ __wt_evict_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_cond_signal(session, cache->evict_waiter_cond)); WT_TRET(__wt_thread_join(session, workers[i].tid)); } + conn->evict_workers = 0; + /* Handle shutdown when cleaning up after a failed open. */ if (conn->evict_workctx != NULL) { for (i = 0; i < conn->evict_workers_alloc; i++) { @@ -367,6 +381,7 @@ __wt_evict_destroy(WT_SESSION_IMPL *session) } __wt_free(session, conn->evict_workctx); } + conn->evict_workers_alloc = 0; if (conn->evict_session != NULL) { wt_session = &conn->evict_session->iface; @@ -1457,15 +1472,12 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_DECL_RET; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; - int count; - bool q_found, txn_busy; + uint64_t init_evict_count, max_pages_evicted; + bool txn_busy; conn = S2C(session); cache = conn->cache; - /* First, wake the eviction server. */ - WT_RET(__wt_evict_server_wake(session)); - /* * If the current transaction is keeping the oldest ID pinned, it is in * the middle of an operation. This may prevent the oldest ID from @@ -1479,11 +1491,15 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) session->nhazard > 0 || (txn_state->snap_min != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); - if (txn_busy) { - if (pct_full < 100) - return (0); - busy = true; - } + + if (txn_busy && pct_full < 100) + return (0); + + if (busy == 1) + txn_busy = 1; + + /* Wake the eviction server if we need to do work. */ + WT_RET(__wt_evict_server_wake(session)); /* * If we're busy, either because of the transaction check we just did, @@ -1491,9 +1507,11 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) * as a page read), limit the work to a single eviction and return. If * that's not the case, we can do more. */ - count = busy ? 1 : 10; + init_evict_count = cache->pages_evict; for (;;) { + max_pages_evicted = txn_busy ? 5 : 20; + /* * A pathological case: if we're the oldest transaction in the * system and the eviction server is stuck trying to find space, @@ -1507,43 +1525,34 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) return (WT_ROLLBACK); } + /* See if eviction is still needed. */ + if (!__wt_eviction_needed(session, NULL) || + cache->pages_evict > init_evict_count + max_pages_evicted) + return (0); + /* Evict a page. */ - q_found = false; switch (ret = __evict_page(session, false)) { case 0: cache->app_evicts++; - if (--count == 0) + if (txn_busy) return (0); - - q_found = true; - break; + /* FALLTHROUGH */ case EBUSY: - continue; + break; case WT_NOTFOUND: + /* Allow the queue to re-populate before retrying. */ + WT_RET(__wt_cond_wait( + session, cache->evict_waiter_cond, 100000)); + cache->app_waits++; break; default: return (ret); } - /* See if eviction is still needed. */ - if (!__wt_eviction_needed(session, NULL)) - return (0); - - /* If we found pages in the eviction queue, continue there. */ - if (q_found) - continue; - - /* Wait for the queue to re-populate before trying again. */ - WT_RET( - __wt_cond_wait(session, cache->evict_waiter_cond, 100000)); - - cache->app_waits++; - /* Check if things have changed so that we are busy. */ - if (!busy && txn_state->snap_min != WT_TXN_NONE && - txn_global->current != txn_global->oldest_id) { - busy = true; - count = 1; - } + /* Check if we have become busy. */ + if (!txn_busy && txn_state->snap_min != WT_TXN_NONE && + txn_global->current != txn_global->oldest_id) + txn_busy = true; } /* NOTREACHED */ } diff --git a/src/include/btmem.h b/src/include/btmem.h index 0302533bb04..41b2c98f9e8 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -578,8 +578,7 @@ struct __wt_page { #define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */ #define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */ #define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */ -#define WT_PAGE_RECONCILIATION 0x20 /* Page reconciliation lock */ -#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */ +#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ /* @@ -603,6 +602,12 @@ struct __wt_page { #define WT_READGEN_STEP 100 uint64_t read_gen; + /* + * Used to protect and co-ordinate splits for internal pages and + * reconciliation for all pages. + */ + WT_FAIR_LOCK page_lock; + size_t memory_footprint; /* Memory attached to the page */ /* Page's on-disk representation: NULL for pages created in memory. */ diff --git a/src/include/btree.i b/src/include/btree.i index c7466019e14..14b5303cca9 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -977,7 +977,8 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) WT_BTREE *btree; WT_INSERT_HEAD *ins_head; WT_INSERT *ins; - int i; + size_t size; + int count; btree = S2BT(session); @@ -1007,25 +1008,36 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) return (false); /* - * There is no point splitting if the list is small, no deep items is - * our heuristic for that. A 1/4 probability of adding a new skiplist - * level, with level-0 always created, means there will be a 5th level - * entry for roughly every 1024 entries in the list. If there are at - * least 4 5th level entries (4K items), the list is large enough. + * There is no point doing an in-memory split unless there is a lot of + * data in the last skiplist on the page. Split if there are enough + * items and the skiplist does not fit within a single disk page. + * + * Rather than scanning the whole list, walk a higher level, which + * gives a sample of the items -- at level 0 we have all the items, at + * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more + * than 30 items and more data than would fit in a disk page, split. */ -#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(5, WT_SKIP_MAXDEPTH - 1) +#define WT_MIN_SPLIT_DEPTH 2 +#define WT_MIN_SPLIT_COUNT 30 +#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */ + ins_head = page->pg_row_entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); if (ins_head == NULL) return (false); - for (i = 0, ins = ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH]; - ins != NULL; ins = ins->next[WT_MIN_SPLIT_SKIPLIST_DEPTH]) - if (++i == 4) { + for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH]; + ins != NULL; ins = ins->next[WT_MIN_SPLIT_DEPTH]) { + count += WT_MIN_SPLIT_MULTIPLIER; + size += WT_MIN_SPLIT_MULTIPLIER * + (WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd)); + if (count > WT_MIN_SPLIT_COUNT && + size > (size_t)btree->maxleafpage) { WT_STAT_FAST_CONN_INCR(session, cache_inmem_splittable); WT_STAT_FAST_DATA_INCR(session, cache_inmem_splittable); return (true); } + } return (false); } diff --git a/src/include/cache.h b/src/include/cache.h index f199372ea5e..caf8996e68b 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -151,7 +151,8 @@ struct __wt_cache_pool { /* Locked: List of connections participating in the cache pool. */ TAILQ_HEAD(__wt_cache_pool_qh, __wt_connection_impl) cache_pool_qh; -#define WT_CACHE_POOL_MANAGED 0x01 /* Cache pool has a manager thread */ -#define WT_CACHE_POOL_ACTIVE 0x02 /* Cache pool is active */ - uint8_t flags_atomic; + uint8_t pool_managed; /* Cache pool has a manager thread */ + +#define WT_CACHE_POOL_ACTIVE 0x01 /* Cache pool is active */ + uint8_t flags; }; diff --git a/src/include/connection.h b/src/include/connection.h index 2c20c2f7936..2dfb24a83da 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -336,12 +336,12 @@ struct __wt_connection_impl { const char *stat_stamp; /* Statistics log entry timestamp */ uint64_t stat_usecs; /* Statistics log period */ -#define WT_CONN_LOG_ARCHIVE 0x01 /* Archive is enabled */ -#define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */ -#define WT_CONN_LOG_EXISTED 0x04 /* Log files found */ -#define WT_CONN_LOG_PREALLOC 0x08 /* Pre-allocation is enabled */ -#define WT_CONN_LOG_RECOVER_DONE 0x10 /* Recovery completed */ -#define WT_CONN_LOG_RECOVER_ERR 0x20 /* Error if recovery required */ +#define WT_CONN_LOG_ARCHIVE 0x01 /* Archive is enabled */ +#define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */ +#define WT_CONN_LOG_EXISTED 0x04 /* Log files found */ +#define WT_CONN_LOG_RECOVER_DONE 0x08 /* Recovery completed */ +#define WT_CONN_LOG_RECOVER_ERR 0x10 /* Error if recovery required */ +#define WT_CONN_LOG_ZERO_FILL 0x20 /* Manually zero files */ uint32_t log_flags; /* Global logging configuration */ WT_CONDVAR *log_cond; /* Log server wait mutex */ WT_SESSION_IMPL *log_session; /* Log server session */ @@ -377,7 +377,6 @@ struct __wt_connection_impl { */ WT_SPINLOCK las_lock; /* Lookaside table spinlock */ WT_SESSION_IMPL *las_session; /* Lookaside table session */ - WT_CURSOR *las_cursor; /* Lookaside table cursor */ bool las_written; /* Lookaside table has been written */ WT_ITEM las_sweep_key; /* Sweep server's saved key */ diff --git a/src/include/extern.h b/src/include/extern.h index cfc1dc8f26e..1f63f07646e 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -186,6 +186,7 @@ extern int __wt_las_create(WT_SESSION_IMPL *session); extern int __wt_las_destroy(WT_SESSION_IMPL *session); extern void __wt_las_set_written(WT_SESSION_IMPL *session); extern bool __wt_las_is_written(WT_SESSION_IMPL *session); +extern int __wt_las_cursor_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp); extern int __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags); extern int __wt_las_sweep(WT_SESSION_IMPL *session); @@ -246,6 +247,7 @@ extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool fina extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]); extern int __wt_log_wrlsn(WT_SESSION_IMPL *session); extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]); @@ -335,7 +337,7 @@ extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_in extern void __wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count); extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id); extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot); -extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest, bool prealloc); +extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest); extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum); extern int __wt_log_open(WT_SESSION_IMPL *session); extern int __wt_log_close(WT_SESSION_IMPL *session); @@ -592,8 +594,8 @@ extern int __wt_session_copy_values(WT_SESSION_IMPL *session); extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config); extern int __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]); -extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp); extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp); +extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp); extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp); extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config); extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp); diff --git a/src/include/flags.h b/src/include/flags.h index ca3c3c38245..24dccd30913 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -6,17 +6,18 @@ #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 #define WT_CONN_EVICTION_RUN 0x00000008 -#define WT_CONN_LEAK_MEMORY 0x00000010 -#define WT_CONN_LOG_SERVER_RUN 0x00000020 -#define WT_CONN_LSM_MERGE 0x00000040 -#define WT_CONN_PANIC 0x00000080 -#define WT_CONN_SERVER_ASYNC 0x00000100 -#define WT_CONN_SERVER_CHECKPOINT 0x00000200 -#define WT_CONN_SERVER_LSM 0x00000400 -#define WT_CONN_SERVER_RUN 0x00000800 -#define WT_CONN_SERVER_STATISTICS 0x00001000 -#define WT_CONN_SERVER_SWEEP 0x00002000 -#define WT_CONN_WAS_BACKUP 0x00004000 +#define WT_CONN_LAS_OPEN 0x00000010 +#define WT_CONN_LEAK_MEMORY 0x00000020 +#define WT_CONN_LOG_SERVER_RUN 0x00000040 +#define WT_CONN_LSM_MERGE 0x00000080 +#define WT_CONN_PANIC 0x00000100 +#define WT_CONN_SERVER_ASYNC 0x00000200 +#define WT_CONN_SERVER_CHECKPOINT 0x00000400 +#define WT_CONN_SERVER_LSM 0x00000800 +#define WT_CONN_SERVER_RUN 0x00001000 +#define WT_CONN_SERVER_STATISTICS 0x00002000 +#define WT_CONN_SERVER_SWEEP 0x00004000 +#define WT_CONN_WAS_BACKUP 0x00008000 #define WT_EVICTING 0x00000001 #define WT_EVICT_LOOKASIDE 0x00000002 #define WT_EVICT_UPDATE_RESTORE 0x00000004 diff --git a/src/include/hardware.h b/src/include/hardware.h index 32353072c5b..1ab2c3d39c4 100644 --- a/src/include/hardware.h +++ b/src/include/hardware.h @@ -37,29 +37,6 @@ &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ } while (0) -#define F_CAS_ATOMIC(p, mask, ret) do { \ - uint8_t __orig; \ - ret = 0; \ - do { \ - __orig = (p)->flags_atomic; \ - if ((__orig & (uint8_t)(mask)) != 0) { \ - ret = EBUSY; \ - break; \ - } \ - } while (!__wt_atomic_cas8( \ - &(p)->flags_atomic, __orig, __orig | (uint8_t)(mask))); \ -} while (0) - -#define F_CAS_ATOMIC_WAIT(p, mask) do { \ - int __ret; \ - for (;;) { \ - F_CAS_ATOMIC(p, mask, __ret); \ - if (__ret == 0) \ - break; \ - __wt_yield(); \ - } \ -} while (0) - #define F_CLR_ATOMIC(p, mask) do { \ uint8_t __orig; \ do { \ diff --git a/src/include/mutex.h b/src/include/mutex.h index 1f1bb8f4b5c..b67e5e610e8 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -52,6 +52,24 @@ struct __wt_rwlock { }; /* + * A light weight lock that can be used to replace spinlocks if fairness is + * necessary. Implements a ticket-based back off spin lock. + * The fields are available as a union to allow for atomically setting + * the state of the entire lock. + */ +struct __wt_fair_lock { + union { + uint32_t lock; + struct { + uint16_t owner; /* Ticket for current owner */ + uint16_t waiter; /* Last allocated ticket */ + } s; + } u; +#define fair_lock_owner u.s.owner +#define fair_lock_waiter u.s.waiter +}; + +/* * Spin locks: * * WiredTiger uses spinlocks for fast mutual exclusion (where operations done diff --git a/src/include/mutex.i b/src/include/mutex.i index 5ea4583a2ab..54a9cc6f9fd 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -251,3 +251,91 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) #error Unknown spinlock type #endif + +/* + * __wt_fair_trylock -- + * Try to get a lock - give up if it is not immediately available. + */ +static inline int +__wt_fair_trylock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_FAIR_LOCK new, old; + + WT_UNUSED(session); + + old = new = *lock; + + /* Exit early if there is no chance we can get the lock. */ + if (old.fair_lock_waiter != old.fair_lock_owner) + return (EBUSY); + + /* The replacement lock value is a result of allocating a new ticket. */ + ++new.fair_lock_waiter; + return (__wt_atomic_cas32( + &lock->u.lock, old.u.lock, new.u.lock) ? 0 : EBUSY); +} + +/* + * __wt_fair_lock -- + * Get a lock. + */ +static inline int +__wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + uint16_t ticket; + int pause_cnt; + + WT_UNUSED(session); + + /* + * Possibly wrap: if we have more than 64K lockers waiting, the ticket + * value will wrap and two lockers will simultaneously be granted the + * lock. + */ + ticket = __wt_atomic_fetch_add16(&lock->fair_lock_waiter, 1); + for (pause_cnt = 0; ticket != lock->fair_lock_owner;) { + /* + * We failed to get the lock; pause before retrying and if we've + * paused enough, sleep so we don't burn CPU to no purpose. This + * situation happens if there are more threads than cores in the + * system and we're thrashing on shared resources. + */ + if (++pause_cnt < 1000) + WT_PAUSE(); + else + __wt_sleep(0, 10); + } + + return (0); +} + +/* + * __wt_fair_unlock -- + * Release a shared lock. + */ +static inline int +__wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_UNUSED(session); + + /* + * We have exclusive access - the update does not need to be atomic. + */ + ++lock->fair_lock_owner; + + return (0); +} + +#ifdef HAVE_DIAGNOSTIC +/* + * __wt_fair_islocked -- + * Test whether the lock is currently held + */ +static inline bool +__wt_fair_islocked(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) +{ + WT_UNUSED(session); + + return (lock->fair_lock_waiter != lock->fair_lock_owner); +} +#endif diff --git a/src/include/serial.i b/src/include/serial.i index 5358b874c06..ca22ce12d81 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -316,12 +316,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* If we can't lock it, don't scan, that's okay. */ - F_CAS_ATOMIC(page, WT_PAGE_RECONCILIATION, ret); - if (ret != 0) + if (__wt_fair_trylock(session, &page->page_lock) != 0) return (0); obsolete = __wt_update_obsolete_check(session, page, upd->next); - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_unlock(session, &page->page_lock)); if (obsolete != NULL) __wt_update_obsolete_free(session, page, obsolete); diff --git a/src/include/stat.h b/src/include/stat.h index 3f7d8985a84..1ebe253e5db 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -338,6 +338,7 @@ struct __wt_connection_stats { int64_t log_sync_dir; int64_t log_write_lsn; int64_t log_writes; + int64_t log_zero_fills; int64_t lsm_checkpoint_throttle; int64_t lsm_merge_throttle; int64_t lsm_rows_merged; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 9078a0e2e99..b7ebb8fbc14 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1750,6 +1750,33 @@ struct __wt_connection { * seconds at which to check for files that are inactive and close * them., an integer between 1 and 100000; default \c 10.} * @config{ ),,} + * @config{log = (, enable logging. Enabling logging uses three + * sessions from the configured session_max., a set of related + * configuration options defined below.} + * @config{ archive, automatically archive + * unneeded log files., a boolean flag; default \c true.} + * @config{ compressor, configure a compressor + * for log records. Permitted values are \c "none" or custom + * compression engine name created with WT_CONNECTION::add_compressor. + * If WiredTiger has builtin support for \c "bzip2"\, \c "snappy"\, \c + * "lz4" or \c "zlib" compression\, these names are also available. See + * @ref compression for more information., a string; default \c none.} + * @config{ enabled, enable logging subsystem., a + * boolean flag; default \c false.} + * @config{ file_max, the maximum size of log + * files., an integer between 100KB and 2GB; default \c 100MB.} + * @config{ path, the path to a directory into + * which the log files are written. If the value is not an absolute + * path name\, the files are created relative to the database home., a + * string; default empty.} + * @config{ prealloc, + * pre-allocate log files., a boolean flag; default \c true.} + * @config{ recover, run recovery or error if + * recovery needs to run after an unclean shutdown., a string\, chosen + * from the following options: \c "error"\, \c "on"; default \c on.} + * @config{ zero_fill, manually write zeroes into + * log files., a boolean flag; default \c false.} + * @config{ ),,} * @config{lsm_manager = (, configure database wide options for LSM tree * management. The LSM manager is started automatically the first time * an LSM tree is opened. The LSM manager uses a session from the @@ -2212,6 +2239,8 @@ struct __wt_connection { * @config{ recover, run recovery * or error if recovery needs to run after an unclean shutdown., a string\, * chosen from the following options: \c "error"\, \c "on"; default \c on.} + * @config{ zero_fill, manually write zeroes into log + * files., a boolean flag; default \c false.} * @config{ ),,} * @config{lsm_manager = (, configure database wide options for LSM tree * management. The LSM manager is started automatically the first time an LSM @@ -3793,90 +3822,92 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOG_WRITE_LSN 1109 /*! log: log write operations */ #define WT_STAT_CONN_LOG_WRITES 1110 +/*! log: log files manually zero-filled */ +#define WT_STAT_CONN_LOG_ZERO_FILLS 1111 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1111 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1112 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1112 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1113 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1113 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1114 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1114 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1115 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1115 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1116 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1116 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1117 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1117 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1118 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1118 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1119 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1119 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1120 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1120 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1121 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1121 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1122 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1122 +#define WT_STAT_CONN_MEMORY_FREE 1123 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1123 +#define WT_STAT_CONN_MEMORY_GROW 1124 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1124 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1125 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1125 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1126 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1126 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1127 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1127 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1128 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1128 +#define WT_STAT_CONN_PAGE_SLEEP 1129 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1129 +#define WT_STAT_CONN_READ_IO 1130 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1130 +#define WT_STAT_CONN_REC_PAGES 1131 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1131 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1132 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1132 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1133 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1133 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1134 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1134 +#define WT_STAT_CONN_RWLOCK_READ 1135 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1135 +#define WT_STAT_CONN_RWLOCK_WRITE 1136 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1136 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1137 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1137 +#define WT_STAT_CONN_SESSION_OPEN 1138 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1138 +#define WT_STAT_CONN_TXN_BEGIN 1139 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1139 +#define WT_STAT_CONN_TXN_CHECKPOINT 1140 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1140 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1141 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1141 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1142 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1142 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1143 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1143 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1144 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1144 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1145 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1145 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1146 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1146 +#define WT_STAT_CONN_TXN_COMMIT 1147 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1147 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1148 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1148 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1149 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1149 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1150 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1150 +#define WT_STAT_CONN_TXN_ROLLBACK 1151 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1151 +#define WT_STAT_CONN_TXN_SYNC 1152 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1152 +#define WT_STAT_CONN_WRITE_IO 1153 /*! * @} diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 4d46a25b63c..3f4e0ada7f1 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -164,6 +164,8 @@ struct __wt_ext; typedef struct __wt_ext WT_EXT; struct __wt_extlist; typedef struct __wt_extlist WT_EXTLIST; +struct __wt_fair_lock; + typedef struct __wt_fair_lock WT_FAIR_LOCK; struct __wt_fh; typedef struct __wt_fh WT_FH; struct __wt_hazard; diff --git a/src/log/log.c b/src/log/log.c index ca0b81c4cf6..efe4d22eeca 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -357,6 +357,67 @@ __wt_log_extract_lognum( } /* + * __log_zero -- + * Zero a log file. + */ +static int +__log_zero(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t start_off, wt_off_t len) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_ITEM(zerobuf); + WT_DECL_RET; + WT_LOG *log; + uint32_t allocsize, bufsz, off, partial, wrlen; + + conn = S2C(session); + log = conn->log; + allocsize = log->allocsize; + zerobuf = NULL; + if (allocsize < WT_MEGABYTE) + bufsz = WT_MEGABYTE; + else + bufsz = allocsize; + /* + * If they're using smaller log files, cap it at the file size. + */ + if (conn->log_file_max < bufsz) + bufsz = (uint32_t)conn->log_file_max; + WT_RET(__wt_scr_alloc(session, bufsz, &zerobuf)); + memset(zerobuf->mem, 0, zerobuf->memsize); + WT_STAT_FAST_CONN_INCR(session, log_zero_fills); + + /* + * Read in a chunk starting at the end of the file. Keep going until + * we reach the beginning or we find a chunk that contains any non-zero + * bytes. Compare against a known zero byte chunk. + */ + off = (uint32_t)start_off; + while (off < (uint32_t)len) { + /* + * Typically we start to zero the file after the log header + * and the bufsz is a sector-aligned size. So we want to + * align our writes when we can. + */ + partial = off % bufsz; + if (partial != 0) + wrlen = bufsz - partial; + else + wrlen = bufsz; + /* + * Check if we're writing a partial amount at the end too. + */ + if ((uint32_t)len - off < bufsz) + wrlen = (uint32_t)len - off; + WT_ERR(__wt_write(session, + fh, (wt_off_t)off, wrlen, zerobuf->mem)); + off += wrlen; + } +err: __wt_scr_free(session, &zerobuf); + return (ret); +} + +/* * __log_prealloc -- * Pre-allocate a log file. */ @@ -370,7 +431,15 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh) conn = S2C(session); log = conn->log; ret = 0; - if (fh->fallocate_available == WT_FALLOCATE_NOT_AVAILABLE || + /* + * If the user configured zero filling, pre-allocate the log file + * manually. Otherwise use either fallocate or ftruncate to create + * and zero the log file based on what is available. + */ + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL)) + ret = __log_zero(session, fh, + WT_LOG_FIRST_RECORD, conn->log_file_max); + else if (fh->fallocate_available == WT_FALLOCATE_NOT_AVAILABLE || (ret = __wt_fallocate(session, fh, WT_LOG_FIRST_RECORD, conn->log_file_max)) == ENOTSUP) ret = __wt_ftruncate(session, fh, @@ -753,7 +822,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) if (create_log) { log->prep_missed++; WT_RET(__wt_log_allocfile( - session, log->fileid, WT_LOG_FILENAME, true)); + session, log->fileid, WT_LOG_FILENAME)); } WT_RET(__log_openfile(session, false, &log->log_fh, WT_LOG_FILENAME, log->fileid)); @@ -904,7 +973,7 @@ err: WT_TRET(__wt_close(session, &log_fh)); */ int __wt_log_allocfile( - WT_SESSION_IMPL *session, uint32_t lognum, const char *dest, bool prealloc) + WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(from_path); @@ -936,8 +1005,7 @@ __wt_log_allocfile( WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id)); WT_ERR(__log_file_header(session, log_fh, NULL, true)); WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); - if (prealloc) - WT_ERR(__log_prealloc(session, log_fh)); + WT_ERR(__log_prealloc(session, log_fh)); WT_ERR(__wt_fsync(session, log_fh)); WT_ERR(__wt_close(session, &log_fh)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index bd3adb3a528..1c5124c32af 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -203,12 +203,14 @@ __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) int __wt_lsm_manager_start(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_MANAGER *manager; WT_SESSION_IMPL *worker_session; uint32_t i; - manager = &S2C(session)->lsm_manager; + conn = S2C(session); + manager = &conn->lsm_manager; /* * We need at least a manager, a switch thread and a generic @@ -225,7 +227,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) */ for (i = 0; i < WT_LSM_MAX_WORKERS; i++) { WT_ERR(__wt_open_internal_session( - S2C(session), "lsm-worker", true, false, &worker_session)); + conn, "lsm-worker", false, 0, &worker_session)); worker_session->isolation = WT_ISO_READ_UNCOMMITTED; manager->lsm_worker_cookies[i].session = worker_session; } @@ -234,7 +236,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager, &manager->lsm_worker_cookies[0])); - F_SET(S2C(session), WT_CONN_SERVER_LSM); + F_SET(conn, WT_CONN_SERVER_LSM); if (0) { err: for (i = 0; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 82264f7c58f..40917bebf56 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -44,7 +44,6 @@ typedef struct { * Track maximum transaction ID seen and first unwritten transaction ID. */ uint64_t max_txn; - uint64_t first_dirty_txn; /* * When we can't mark the page clean (for example, checkpoint found some @@ -292,7 +291,7 @@ typedef struct { } WT_RECONCILE; static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); -static void __rec_cell_build_addr( +static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); static int __rec_cell_build_int_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); @@ -394,7 +393,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, * In-memory splits: reconciliation of an internal page cannot handle * a child page splitting during the reconciliation. */ - F_CAS_ATOMIC_WAIT(page, WT_PAGE_RECONCILIATION); + WT_RET(__wt_fair_lock(session, &page->page_lock)); /* Reconcile the page. */ switch (page->type) { @@ -432,7 +431,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_TRET(__rec_write_wrapup_err(session, r, page)); /* Release the reconciliation lock. */ - F_CLR_ATOMIC(page, WT_PAGE_RECONCILIATION); + WT_TRET(__wt_fair_unlock(session, &page->page_lock)); /* Update statistics. */ WT_STAT_FAST_CONN_INCR(session, rec_pages); @@ -538,11 +537,6 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ if (r->leave_dirty) { /* - * Update the page's first unwritten transaction ID. - */ - mod->first_dirty_txn = r->first_dirty_txn; - - /* * The page remains dirty. * * Any checkpoint call cleared the tree's modified flag before @@ -880,12 +874,6 @@ __rec_write_init(WT_SESSION_IMPL *session, r->cache_write_lookaside = r->cache_write_restore = false; - /* - * Running transactions may update the page after we write it, so - * this is the highest ID we can be confident we will see. - */ - r->first_dirty_txn = conn->txn_global.last_running; - return (0); } @@ -1083,17 +1071,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, if ((txnid = upd->txnid) == WT_TXN_ABORTED) continue; - /* - * Track the largest/smallest transaction IDs on the list and - * the smallest not-globally-visible transaction on the page. - */ + /* Track the largest/smallest transaction IDs on the list. */ if (WT_TXNID_LT(max_txn, txnid)) max_txn = txnid; if (WT_TXNID_LT(txnid, min_txn)) min_txn = txnid; - if (WT_TXNID_LT(txnid, r->first_dirty_txn) && - !__wt_txn_visible_all(session, txnid)) - r->first_dirty_txn = txnid; /* * Find the first update we can use. @@ -3837,7 +3819,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) val->cell_len = 0; val->len = val->buf.size; } else - __rec_cell_build_addr(r, addr->addr, addr->size, + __rec_cell_build_addr(session, r, + addr->addr, addr->size, __rec_vtype(addr), ref->key.recno); WT_CHILD_RELEASE_ERR(session, hazard, ref); @@ -3883,7 +3866,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* Build the value cell. */ addr = &multi->addr; - __rec_cell_build_addr(r, + __rec_cell_build_addr(session, r, addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ @@ -4708,7 +4691,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) vtype = state == WT_CHILD_PROXY ? WT_CELL_ADDR_DEL : (u_int)vpack->raw; } - __rec_cell_build_addr(r, p, size, vtype, WT_RECNO_OOB); + __rec_cell_build_addr(session, r, p, size, vtype, WT_RECNO_OOB); WT_CHILD_RELEASE_ERR(session, hazard, ref); /* @@ -4794,8 +4777,8 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; addr = &multi->addr; - __rec_cell_build_addr( - r, addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); + __rec_cell_build_addr(session, r, + addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ if (key->len + val->len > r->space_avail) @@ -5863,13 +5846,15 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, * on the page. */ static void -__rec_cell_build_addr(WT_RECONCILE *r, +__rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *addr, size_t size, u_int cell_type, uint64_t recno) { WT_KV *val; val = &r->v; + WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL); + /* * We don't check the address size because we can't store an address on * an overflow page: if the address won't fit, the overflow page's diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c index 42b578946f4..a86cff4d723 100644 --- a/src/schema/schema_open.c +++ b/src/schema/schema_open.c @@ -571,7 +571,7 @@ __wt_schema_get_index(WT_SESSION_IMPL *session, /* Try to find the index in the table. */ for (i = 0; i < table->nindices; i++) { idx = table->indices[i]; - if (strcmp(idx->name, uri) == 0) { + if (idx != NULL && strcmp(idx->name, uri) == 0) { if (tablep != NULL) *tablep = table; else diff --git a/src/session/session_api.c b/src/session/session_api.c index 1bb519e80e0..a766829afad 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1144,47 +1144,12 @@ __session_strerror(WT_SESSION *wt_session, int error) } /* - * __wt_open_internal_session -- - * Allocate a session for WiredTiger's use. - */ -int -__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, - bool uses_dhandles, bool open_metadata, WT_SESSION_IMPL **sessionp) -{ - WT_SESSION_IMPL *session; - - *sessionp = NULL; - - WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session)); - session->name = name; - - /* - * Public sessions are automatically closed during WT_CONNECTION->close. - * If the session handles for internal threads were to go on the public - * list, there would be complex ordering issues during close. Set a - * flag to avoid this: internal sessions are not closed automatically. - */ - F_SET(session, WT_SESSION_INTERNAL); - - /* - * Some internal threads must keep running after we close all data - * handles. Make sure these threads don't open their own handles. - */ - if (!uses_dhandles) - F_SET(session, WT_SESSION_NO_DATA_HANDLES); - - *sessionp = session; - return (0); -} - -/* - * __wt_open_session -- - * Allocate a session handle. The internal parameter is used for sessions - * opened by WiredTiger for its own use. + * __open_session -- + * Allocate a session handle. */ -int -__wt_open_session(WT_CONNECTION_IMPL *conn, - WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, +static int +__open_session(WT_CONNECTION_IMPL *conn, + WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp) { static const WT_SESSION stds = { @@ -1324,7 +1289,26 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_STAT_FAST_CONN_INCR(session, session_open); err: __wt_spin_unlock(session, &conn->api_lock); - WT_RET(ret); + return (ret); +} + +/* + * __wt_open_session -- + * Allocate a session handle. + */ +int +__wt_open_session(WT_CONNECTION_IMPL *conn, + WT_EVENT_HANDLER *event_handler, const char *config, + bool open_metadata, WT_SESSION_IMPL **sessionp) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_SESSION *wt_session; + + *sessionp = NULL; + + /* Acquire a session. */ + WT_RET(__open_session(conn, event_handler, config, &session)); /* * Acquiring the metadata handle requires the schema lock; we've seen @@ -1336,8 +1320,59 @@ err: __wt_spin_unlock(session, &conn->api_lock); */ if (open_metadata) { WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); - WT_RET(__wt_metadata_open(session_ret)); + if ((ret = __wt_metadata_open(session)) != 0) { + wt_session = &session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + return (ret); + } } + *sessionp = session; + return (0); +} + +/* + * __wt_open_internal_session -- + * Allocate a session for WiredTiger's use. + */ +int +__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, + bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) +{ + WT_DECL_RET; + WT_SESSION *wt_session; + WT_SESSION_IMPL *session; + + *sessionp = NULL; + + /* Acquire a session. */ + WT_RET(__wt_open_session(conn, NULL, NULL, open_metadata, &session)); + session->name = name; + + /* + * Public sessions are automatically closed during WT_CONNECTION->close. + * If the session handles for internal threads were to go on the public + * list, there would be complex ordering issues during close. Set a + * flag to avoid this: internal sessions are not closed automatically. + */ + F_SET(session, session_flags | WT_SESSION_INTERNAL); + + /* + * Acquiring the lookaside table cursor requires various locks; we've + * seen problems in the past where deadlocks happened because sessions + * deadlocked getting the cursor late in the process. Be defensive, + * get it now. + */ + if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) { + WT_WITHOUT_DHANDLE(session, ret = + __wt_las_cursor_create(session, &session->las_cursor)); + if (ret != 0) { + wt_session = &session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + return (ret); + } + } + + *sessionp = session; return (0); } diff --git a/src/support/stat.c b/src/support/stat.c index 4e7f54937f4..9e817fad512 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -595,6 +595,7 @@ static const char * const __stats_connection_desc[] = { "log: log sync_dir operations", "log: log server thread advances write LSN", "log: log write operations", + "log: log files manually zero-filled", "LSM: sleep for LSM checkpoint throttle", "LSM: sleep for LSM merge throttle", "LSM: rows merged in an LSM tree", @@ -760,6 +761,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->log_slot_unbuffered = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; + stats->log_zero_fills = 0; stats->log_flush = 0; stats->log_compress_writes = 0; stats->log_compress_write_fails = 0; @@ -944,6 +946,7 @@ __wt_stat_connection_aggregate( to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); + to->log_zero_fills += WT_STAT_READ(from, log_zero_fills); to->log_flush += WT_STAT_READ(from, log_flush); to->log_compress_writes += WT_STAT_READ(from, log_compress_writes); to->log_compress_write_fails += diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index f2b181711d1..63d86969311 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -412,11 +412,12 @@ __wt_txn_recover(WT_SESSION_IMPL *session) WT_RECOVERY r; struct WT_RECOVERY_FILE *metafile; char *config; - bool needs_rec, was_backup; + bool eviction_started, needs_rec, was_backup; conn = S2C(session); WT_CLEAR(r); WT_INIT_LSN(&r.ckpt_lsn); + eviction_started = false; was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP); /* We need a real session for recovery. */ @@ -494,6 +495,15 @@ __wt_txn_recover(WT_SESSION_IMPL *session) */ if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR)) WT_ERR(WT_RUN_RECOVERY); + + /* + * Recovery can touch more data than fits in cache, so it relies on + * regular eviction to manage paging. Start eviction threads for + * recovery without LAS cursors. + */ + WT_ERR(__wt_evict_create(session)); + eviction_started = true; + /* * Always run recovery even if it was a clean shutdown. * We can consider skipping it in the future. @@ -522,6 +532,18 @@ __wt_txn_recover(WT_SESSION_IMPL *session) done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); + + if (ret != 0) + __wt_err(session, ret, "Recovery failed"); + + /* + * Destroy the eviction threads that were started in support of + * recovery. They will be restarted once the lookaside table is + * created. + */ + if (eviction_started) + WT_TRET(__wt_evict_destroy(session)); + WT_TRET(session->iface.close(&session->iface, NULL)); return (ret); diff --git a/test/suite/test_bug015.py b/test/suite/test_bug015.py new file mode 100644 index 00000000000..65b5b8e1755 --- /dev/null +++ b/test/suite/test_bug015.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from helper import copy_wiredtiger_home, key_populate, simple_populate + +# test_bug015.py +# JIRA WT-2162: index drop in a certain order triggers NULL pointer deref +class test_bug015(wttest.WiredTigerTestCase): + def test_bug015(self): + table = 'table:test_bug015' + idx1 = 'index:test_bug015:aab' + idx2 = 'index:test_bug015:aaa' + self.session.create(table, "columns=(k,v)") + self.session.create(idx1, "columns=(v)") + self.session.create(idx2, "columns=(v)") + self.session.drop(idx1, "force=true") + self.session.create(idx1, "columns=(v)") + self.session.drop(idx2, "force=true") + self.session.create(idx2, "columns=(v)") + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_reconfig.py b/test/suite/test_reconfig01.py index b464895f155..2528f856a08 100644 --- a/test/suite/test_reconfig.py +++ b/test/suite/test_reconfig01.py @@ -30,9 +30,9 @@ import time import wiredtiger, wttest from helper import simple_populate -# test_reconfig.py +# test_reconfig01.py # Smoke-test the connection reconfiguration operations. -class test_reconfig(wttest.WiredTigerTestCase): +class test_reconfig01(wttest.WiredTigerTestCase): def test_reconfig_shared_cache(self): self.conn.reconfigure("shared_cache=(name=pool,size=300M)") diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py new file mode 100644 index 00000000000..e0981a887fb --- /dev/null +++ b/test/suite/test_reconfig02.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import fnmatch, os, time +import wiredtiger, wttest +from helper import simple_populate + +# test_reconfig02.py +# Smoke-test the connection reconfiguration operations. +class test_reconfig02(wttest.WiredTigerTestCase): + init_config = 'log=(archive=false,enabled,file_max=100K,prealloc=false,zero_fill=false)' + uri = "table:reconfig02" + entries = 1000 + + def setUpConnectionOpen(self, dir): + self.conn_config = self.init_config + return wttest.WiredTigerTestCase.setUpConnectionOpen(self, dir) + + # Call reconfigure for zero filling a file. There is nothing + # we can actually look for to confirm it did anything. + # Also changing the log file size is a no-op, but should not fail. + def test_reconfig02_simple(self): + self.conn.reconfigure("log=(zero_fill=true)") + self.conn.reconfigure("log=(file_max=1MB)") + + # Test that we get an error if we try to turn logging off. + def test_reconfig02_disable(self): + msg = 'Invalid argument' + gotException = False + try: + self.conn.reconfigure("log=(enabled=false)") + except wiredtiger.WiredTigerError as e: + gotException = True + self.pr('got exception: ' + str(e)) + self.assertTrue(str(e).find(msg) >= 0) + self.assertTrue(gotException) + + # Logging starts on, but prealloc is off. Verify it is off. + # Reconfigure it on and run again, making sure that log files + # get pre-allocated. + def test_reconfig02_prealloc(self): + # Create a table just to write something into the log. Sleep + # to give the worker thread a chance to run. + self.session.create(self.uri, 'key_format=i,value_format=i') + time.sleep(2) + prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*") + # Make sure no pre-allocated log files exist. + self.assertEqual(0, len(prep_logs)) + + # Now turn on pre-allocation. Sleep to give the worker thread + # a chance to run and verify pre-allocated log files exist. + self.conn.reconfigure("log=(prealloc=true)") + time.sleep(2) + prep_logs = fnmatch.filter(os.listdir('.'), "*Prep*") + self.assertNotEqual(0, len(prep_logs)) + + # Logging starts on, but archive is off. Verify it is off. + # Reconfigure it on and run again, making sure that log files + # get archived. + def test_reconfig02_archive(self): + self.session.create(self.uri, 'key_format=i,value_format=i') + c = self.session.open_cursor(self.uri, None, None) + for i in range(self.entries): + c[i] = i + 1 + c.close() + # Close and reopen connection to write a checkpoint, move to the + # next log file and verify that archive did not run. + orig_logs = fnmatch.filter(os.listdir('.'), "*Log*") + self.reopen_conn() + cur_logs = fnmatch.filter(os.listdir('.'), "*Log*") + for o in orig_logs: + self.assertEqual(True, o in cur_logs) + + # Now turn on archive, sleep a bit to allow the archive thread + # to run and then confirm that all original logs are gone. + self.conn.reconfigure("log=(archive=true)") + time.sleep(2) + cur_logs = fnmatch.filter(os.listdir('.'), "*Log*") + for o in orig_logs: + self.assertEqual(False, o in cur_logs) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index 83c10f41244..17d0b97b50f 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -104,9 +104,18 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): # deterministic manner. self.txn_sync = self.sync_list[ self.scenario_number % len(self.sync_list)] + # + # We don't want to run zero fill with only the same settings, such + # as archive or sync, which are an even number of options. + # + freq = 3 + zerofill = 'false' + if self.scenario_number % freq == 0: + zerofill = 'true' self.backup_dir = os.path.join(self.home, "WT_BACKUP") conn_params = \ 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ + 'log=(zero_fill=%s),' % zerofill + \ 'create,error_prefix="%s: ",' % self.shortid() + \ 'transaction_sync="%s",' % self.txn_sync # print "Creating conn at '%s' with config '%s'" % (dir, conn_params) |