diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/session/session_api.c')
-rw-r--r-- | src/third_party/wiredtiger/src/session/session_api.c | 1054 |
1 files changed, 1054 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c new file mode 100644 index 00000000000..39b9dd0de61 --- /dev/null +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -0,0 +1,1054 @@ +/*- + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +static int __session_checkpoint(WT_SESSION *, const char *); +static int __session_rollback_transaction(WT_SESSION *, const char *); + +/* + * __wt_session_reset_cursors -- + * Reset all open cursors. + */ +int +__wt_session_reset_cursors(WT_SESSION_IMPL *session) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + + TAILQ_FOREACH(cursor, &session->cursors, q) { + /* Stop when there are no positioned cursors. */ + if (session->ncursors == 0) + break; + WT_TRET(cursor->reset(cursor)); + } + return (ret); +} + +/* + * __wt_session_copy_values -- + * Copy values into all positioned cursors, so that they don't keep + * transaction IDs pinned. + */ +int +__wt_session_copy_values(WT_SESSION_IMPL *session) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + + TAILQ_FOREACH(cursor, &session->cursors, q) + if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { + F_CLR(cursor, WT_CURSTD_VALUE_INT); + WT_RET(__wt_buf_set(session, &cursor->value, + cursor->value.data, cursor->value.size)); + F_SET(cursor, WT_CURSTD_VALUE_EXT); + } + + return (ret); +} + +/* + * __session_clear -- + * Clear a session structure. + */ +static void +__session_clear(WT_SESSION_IMPL *session) +{ + /* + * There's no serialization support around the review of the hazard + * array, which means threads checking for hazard pointers first check + * the active field (which may be 0) and then use the hazard pointer + * (which cannot be NULL). + * + * Additionally, the session structure can include information that + * persists past the session's end-of-life, stored as part of page + * splits. + * + * For these reasons, be careful when clearing the session structure. + */ + memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); + session->hazard_size = 0; + session->nhazard = 0; +} + +/* + * __session_close -- + * WT_SESSION->close method. + */ +static int +__session_close(WT_SESSION *wt_session, const char *config) +{ + WT_CONNECTION_IMPL *conn; + WT_CURSOR *cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + conn = (WT_CONNECTION_IMPL *)wt_session->connection; + session = (WT_SESSION_IMPL *)wt_session; + + SESSION_API_CALL(session, close, config, cfg); + WT_UNUSED(cfg); + + /* Rollback any active transaction. */ + if (F_ISSET(&session->txn, TXN_RUNNING)) + WT_TRET(__session_rollback_transaction(wt_session, NULL)); + + /* + * Also release any pinned transaction ID from a non-transactional + * operation. + */ + if (conn->txn_global.states != NULL) + __wt_txn_release_snapshot(session); + + /* Close all open cursors. */ + while ((cursor = TAILQ_FIRST(&session->cursors)) != NULL) { + /* + * Notify the user that we are closing the cursor handle + * via the registered close callback. + */ + if (session->event_handler->handle_close != NULL) + WT_TRET(session->event_handler->handle_close( + session->event_handler, wt_session, cursor)); + WT_TRET(cursor->close(cursor)); + } + + WT_ASSERT(session, session->ncursors == 0); + + /* Discard cached handles. */ + __wt_session_close_cache(session); + + /* Close all tables. */ + __wt_schema_close_tables(session); + + /* Discard metadata tracking. */ + __wt_meta_track_discard(session); + + /* Discard scratch buffers. */ + __wt_scr_discard(session); + + /* Free transaction information. */ + __wt_txn_destroy(session); + + /* Confirm we're not holding any hazard pointers. */ + __wt_hazard_close(session); + + /* Cleanup */ + if (session->block_manager_cleanup != NULL) + WT_TRET(session->block_manager_cleanup(session)); + if (session->reconcile_cleanup != NULL) + WT_TRET(session->reconcile_cleanup(session)); + + /* Free the eviction exclusive-lock information. */ + __wt_free(session, session->excl); + + /* Destroy the thread's mutex. */ + WT_TRET(__wt_cond_destroy(session, &session->cond)); + + /* The API lock protects opening and closing of sessions. */ + __wt_spin_lock(session, &conn->api_lock); + + /* Decrement the count of open sessions. */ + WT_STAT_FAST_CONN_DECR(session, session_open); + + /* + * Sessions are re-used, clear the structure: the clear sets the active + * field to 0, which will exclude the hazard array from review by the + * eviction thread. Because some session fields are accessed by other + * threads, the structure must be cleared carefully. + * + * We don't need to publish here, because regardless of the active field + * being non-zero, the hazard pointer is always valid. + */ + __session_clear(session); + session = conn->default_session; + + /* + * Decrement the count of active sessions if that's possible: a session + * being closed may or may not be at the end of the array, step toward + * the beginning of the array until we reach an active session. + */ + while (conn->sessions[conn->session_cnt - 1].active == 0) + if (--conn->session_cnt == 0) + break; + + __wt_spin_unlock(session, &conn->api_lock); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_reconfigure -- + * WT_SESSION->reconfigure method. + */ +static int +__session_reconfigure(WT_SESSION *wt_session, const char *config) +{ + WT_CONFIG_ITEM cval; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, reconfigure, config, cfg); + + if (F_ISSET(&session->txn, TXN_RUNNING)) + WT_ERR_MSG(session, EINVAL, "transaction in progress"); + + WT_TRET(__wt_session_reset_cursors(session)); + + WT_ERR(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); + if (cval.len != 0) + session->isolation = session->txn.isolation = + WT_STRING_MATCH("snapshot", cval.str, cval.len) ? + TXN_ISO_SNAPSHOT : + WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? + TXN_ISO_READ_UNCOMMITTED : TXN_ISO_READ_COMMITTED; + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __wt_open_cursor -- + * Internal version of WT_SESSION::open_cursor. + */ +int +__wt_open_cursor(WT_SESSION_IMPL *session, + const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) +{ + WT_COLGROUP *colgroup; + WT_DATA_SOURCE *dsrc; + WT_DECL_RET; + + *cursorp = NULL; + + /* + * Open specific cursor types we know about, or call the generic data + * source open function. + * + * Unwind a set of string comparisons into a switch statement hoping + * the compiler can make it fast, but list the common choices first + * instead of sorting so if/else patterns are still fast. + */ + switch (uri[0]) { + /* + * Common cursor types. + */ + case 't': + if (WT_PREFIX_MATCH(uri, "table:")) + WT_RET(__wt_curtable_open(session, uri, cfg, cursorp)); + break; + case 'c': + if (WT_PREFIX_MATCH(uri, "colgroup:")) { + /* + * Column groups are a special case: open a cursor on + * the underlying data source. + */ + WT_RET(__wt_schema_get_colgroup( + session, uri, NULL, &colgroup)); + WT_RET(__wt_open_cursor( + session, colgroup->source, owner, cfg, cursorp)); + } else if (WT_PREFIX_MATCH(uri, "config:")) + WT_RET(__wt_curconfig_open( + session, uri, cfg, cursorp)); + break; + case 'i': + if (WT_PREFIX_MATCH(uri, "index:")) + WT_RET(__wt_curindex_open( + session, uri, owner, cfg, cursorp)); + break; + case 'l': + if (WT_PREFIX_MATCH(uri, "lsm:")) + WT_RET(__wt_clsm_open( + session, uri, owner, cfg, cursorp)); + else if (WT_PREFIX_MATCH(uri, "log:")) + WT_RET(__wt_curlog_open(session, uri, cfg, cursorp)); + break; + + /* + * Less common cursor types. + */ + case 'f': + if (WT_PREFIX_MATCH(uri, "file:")) + WT_RET(__wt_curfile_open( + session, uri, owner, cfg, cursorp)); + break; + case 'm': + if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) + WT_RET(__wt_curmetadata_open( + session, uri, owner, cfg, cursorp)); + break; + case 'b': + if (WT_PREFIX_MATCH(uri, "backup:")) + WT_RET(__wt_curbackup_open( + session, uri, cfg, cursorp)); + break; + case 's': + if (WT_PREFIX_MATCH(uri, "statistics:")) + WT_RET(__wt_curstat_open(session, uri, cfg, cursorp)); + break; + default: + break; + } + + if (*cursorp == NULL && + (dsrc = __wt_schema_get_source(session, uri)) != NULL) + WT_RET(dsrc->open_cursor == NULL ? + __wt_object_unsupported(session, uri) : + __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp)); + + if (*cursorp == NULL) + return (__wt_bad_object_type(session, uri)); + + /* + * When opening simple tables, the table code calls this function on the + * underlying data source, in which case the application's URI has been + * copied. + */ + if ((*cursorp)->uri == NULL && + (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) + WT_TRET((*cursorp)->close(*cursorp)); + + return (ret); +} + +/* + * __session_open_cursor -- + * WT_SESSION->open_cursor method. + */ +static int +__session_open_cursor(WT_SESSION *wt_session, + const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + cursor = *cursorp = NULL; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, open_cursor, config, cfg); + + if ((to_dup == NULL && uri == NULL) || (to_dup != NULL && uri != NULL)) + WT_ERR_MSG(session, EINVAL, + "should be passed either a URI or a cursor to duplicate, " + "but not both"); + + if (to_dup != NULL) { + uri = to_dup->uri; + if (!WT_PREFIX_MATCH(uri, "colgroup:") && + !WT_PREFIX_MATCH(uri, "index:") && + !WT_PREFIX_MATCH(uri, "file:") && + !WT_PREFIX_MATCH(uri, "lsm:") && + !WT_PREFIX_MATCH(uri, WT_METADATA_URI) && + !WT_PREFIX_MATCH(uri, "table:") && + __wt_schema_get_source(session, uri) == NULL) + WT_ERR(__wt_bad_object_type(session, uri)); + } + + WT_ERR(__wt_open_cursor(session, uri, NULL, cfg, &cursor)); + if (to_dup != NULL) + WT_ERR(__wt_cursor_dup_position(to_dup, cursor)); + + *cursorp = cursor; + + if (0) { +err: if (cursor != NULL) + WT_TRET(cursor->close(cursor)); + } + + /* + * Opening a cursor on a non-existent data source will set ret to + * either of ENOENT or WT_NOTFOUND at this point. However, + * applications may reasonably do this inside a transaction to check + * for the existence of a table or index. + * + * Prefer WT_NOTFOUND here: that does not force running transactions to + * roll back. It will be mapped back to ENOENT. + */ + if (ret == ENOENT) + ret = WT_NOTFOUND; + + API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __wt_session_create_strip -- + * Discard any configuration information from a schema entry that is not + * applicable to an session.create call, here for the wt dump command utility, + * which only wants to dump the schema information needed for load. + */ +int +__wt_session_create_strip(WT_SESSION *wt_session, + const char *v1, const char *v2, const char **value_ret) +{ + WT_SESSION_IMPL *session = (WT_SESSION_IMPL *)wt_session; + const char *cfg[] = + { WT_CONFIG_BASE(session, session_create), v1, v2, NULL }; + + return (__wt_config_collapse(session, cfg, value_ret)); +} + +/* + * __session_create -- + * WT_SESSION->create method. + */ +static int +__session_create(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_CONFIG_ITEM cval; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, create, config, cfg); + WT_UNUSED(cfg); + + /* Disallow objects in the WiredTiger name space. */ + WT_ERR(__wt_str_name_check(session, uri)); + + /* + * Type configuration only applies to tables, column groups and indexes. + * We don't want applications to attempt to layer LSM on top of their + * extended data-sources, and the fact we allow LSM as a valid URI is an + * invitation to that mistake: nip it in the bud. + */ + if (!WT_PREFIX_MATCH(uri, "colgroup:") && + !WT_PREFIX_MATCH(uri, "index:") && + !WT_PREFIX_MATCH(uri, "table:")) { + /* + * We can't disallow type entirely, a configuration string might + * innocently include it, for example, a dump/load pair. If the + * URI type prefix and the type are the same, let it go. + */ + if ((ret = + __wt_config_getones(session, config, "type", &cval)) == 0 && + (strncmp(uri, cval.str, cval.len) != 0 || + uri[cval.len] != ':')) + WT_ERR_MSG(session, EINVAL, + "%s: unsupported type configuration", uri); + WT_ERR_NOTFOUND_OK(ret); + } + + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_create(session, uri, config)); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_log_printf -- + * WT_SESSION->log_printf method. + */ +static int +__session_log_printf(WT_SESSION *wt_session, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) +{ + WT_SESSION_IMPL *session; + WT_DECL_RET; + va_list ap; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, log_printf); + + va_start(ap, fmt); + ret = __wt_log_vprintf(session, fmt, ap); + va_end(ap); + +err: API_END_RET(session, ret); +} + +/* + * __session_rename -- + * WT_SESSION->rename method. + */ +static int +__session_rename(WT_SESSION *wt_session, + const char *uri, const char *newuri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, rename, config, cfg); + + /* Disallow objects in the WiredTiger name space. */ + WT_ERR(__wt_str_name_check(session, uri)); + WT_ERR(__wt_str_name_check(session, newuri)); + + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_rename(session, uri, newuri, cfg)); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_compact -- + * WT_SESSION->compact method. + */ +static int +__session_compact(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + /* Disallow objects in the WiredTiger name space. */ + WT_RET(__wt_str_name_check(session, uri)); + + if (!WT_PREFIX_MATCH(uri, "colgroup:") && + !WT_PREFIX_MATCH(uri, "file:") && + !WT_PREFIX_MATCH(uri, "index:") && + !WT_PREFIX_MATCH(uri, "lsm:") && + !WT_PREFIX_MATCH(uri, "table:")) + return (__wt_bad_object_type(session, uri)); + + return (__wt_session_compact(wt_session, uri, config)); +} + +/* + * __session_drop -- + * WT_SESSION->drop method. + */ +static int +__session_drop(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, drop, config, cfg); + + /* Disallow objects in the WiredTiger name space. */ + WT_ERR(__wt_str_name_check(session, uri)); + + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_drop(session, uri, cfg)); + +err: /* Note: drop operations cannot be unrolled (yet?). */ + API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_salvage -- + * WT_SESSION->salvage method. + */ +static int +__session_salvage(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + SESSION_API_CALL(session, salvage, config, cfg); + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_worker(session, uri, __wt_salvage, + NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_truncate -- + * WT_SESSION->truncate method. + */ +static int +__session_truncate(WT_SESSION *wt_session, + const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_CURSOR *cursor; + int cmp; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_TXN_API_CALL(session, truncate, config, cfg); + + /* + * If the URI is specified, we don't need a start/stop, if start/stop + * is specified, we don't need a URI. + * + * If no URI is specified, and both cursors are specified, start/stop + * must reference the same object. + * + * Any specified cursor must have been initialized. + */ + if ((uri == NULL && start == NULL && stop == NULL) || + (uri != NULL && (start != NULL || stop != NULL))) + WT_ERR_MSG(session, EINVAL, + "the truncate method should be passed either a URI or " + "start/stop cursors, but not both"); + + if (uri != NULL) { + /* Disallow objects in the WiredTiger name space. */ + WT_ERR(__wt_str_name_check(session, uri)); + + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_truncate(session, uri, cfg)); + goto done; + } + + /* + * Cursor truncate is only supported for some objects, check for the + * supporting methods we need, range_truncate and compare. + */ + cursor = start == NULL ? stop : start; + if (cursor->compare == NULL) + WT_ERR(__wt_bad_object_type(session, cursor->uri)); + + /* + * If both cursors set, check they're correctly ordered with respect to + * each other. We have to test this before any search, the search can + * change the initial cursor position. + * + * Rather happily, the compare routine will also confirm the cursors + * reference the same object and the keys are set. + */ + if (start != NULL && stop != NULL) { + WT_ERR(start->compare(start, stop, &cmp)); + if (cmp > 0) + WT_ERR_MSG(session, EINVAL, + "the start cursor position is after the stop " + "cursor position"); + } + + /* + * Truncate does not require keys actually exist so that applications + * can discard parts of the object's name space without knowing exactly + * what records currently appear in the object. For this reason, do a + * search-near, rather than a search. Additionally, we have to correct + * after calling search-near, to position the start/stop cursors on the + * next record greater than/less than the original key. If the cursors + * hit the beginning/end of the object, or the start/stop keys cross, + * we're done, the range must be empty. + */ + if (start != NULL) { + WT_ERR(start->search_near(start, &cmp)); + if (cmp < 0 && (ret = start->next(start)) != 0) { + WT_ERR_NOTFOUND_OK(ret); + goto done; + } + } + if (stop != NULL) { + WT_ERR(stop->search_near(stop, &cmp)); + if (cmp > 0 && (ret = stop->prev(stop)) != 0) { + WT_ERR_NOTFOUND_OK(ret); + goto done; + } + + if (start != NULL) { + WT_ERR(start->compare(start, stop, &cmp)); + if (cmp > 0) + goto done; + } + } + + WT_ERR(__wt_schema_range_truncate(session, start, stop)); + +done: +err: TXN_API_END_RETRY(session, ret, 0); + return ((ret) == WT_NOTFOUND ? ENOENT : (ret)); +} + +/* + * __session_upgrade -- + * WT_SESSION->upgrade method. + */ +static int +__session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + SESSION_API_CALL(session, upgrade, config, cfg); + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_worker(session, uri, __wt_upgrade, + NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_verify -- + * WT_SESSION->verify method. + */ +static int +__session_verify(WT_SESSION *wt_session, const char *uri, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + + SESSION_API_CALL(session, verify, config, cfg); + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_schema_worker(session, uri, __wt_verify, + NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)); + +err: API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __session_begin_transaction -- + * WT_SESSION->begin_transaction method. + */ +static int +__session_begin_transaction(WT_SESSION *wt_session, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, begin_transaction, config, cfg); + WT_STAT_FAST_CONN_INCR(session, txn_begin); + + if (F_ISSET(&session->txn, TXN_RUNNING)) + WT_ERR_MSG(session, EINVAL, "Transaction already running"); + + /* + * There is no transaction active in this thread; check if the cache is + * full, if we have to block for eviction, this is the best time to do + * it. + */ + WT_ERR(__wt_cache_full_check(session)); + + ret = __wt_txn_begin(session, cfg); + +err: API_END_RET(session, ret); +} + +/* + * __session_commit_transaction -- + * WT_SESSION->commit_transaction method. + */ +static int +__session_commit_transaction(WT_SESSION *wt_session, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_TXN *txn; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, commit_transaction, config, cfg); + WT_STAT_FAST_CONN_INCR(session, txn_commit); + + txn = &session->txn; + if (F_ISSET(txn, TXN_ERROR)) { + __wt_errx(session, "failed transaction requires rollback"); + ret = EINVAL; + } + + if (ret == 0) + ret = __wt_txn_commit(session, cfg); + else { + WT_TRET(__wt_session_reset_cursors(session)); + WT_TRET(__wt_txn_rollback(session, cfg)); + } + +err: API_END_RET(session, ret); +} + +/* + * __session_rollback_transaction -- + * WT_SESSION->rollback_transaction method. + */ +static int +__session_rollback_transaction(WT_SESSION *wt_session, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, rollback_transaction, config, cfg); + WT_STAT_FAST_CONN_INCR(session, txn_rollback); + + WT_TRET(__wt_session_reset_cursors(session)); + + WT_TRET(__wt_txn_rollback(session, cfg)); + +err: API_END_RET(session, ret); +} + +/* + * __session_transaction_pinned_range -- + * WT_SESSION->transaction_pinned_range method. + */ +static int +__session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_TXN_STATE *txn_state; + uint64_t pinned; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, pinned_range); + + txn_state = WT_SESSION_TXN_STATE(session); + + /* Assign pinned to the lesser of id or snap_min */ + if (txn_state->id != WT_TXN_NONE && + TXNID_LT(txn_state->id, txn_state->snap_min)) + pinned = txn_state->id; + else + pinned = txn_state->snap_min; + + if (pinned == WT_TXN_NONE) + *prange = 0; + else + *prange = S2C(session)->txn_global.current - pinned; + +err: API_END_RET(session, ret); +} + +/* + * __session_checkpoint -- + * WT_SESSION->checkpoint method. + */ +static int +__session_checkpoint(WT_SESSION *wt_session, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_TXN *txn; + + session = (WT_SESSION_IMPL *)wt_session; + + txn = &session->txn; + + WT_STAT_FAST_CONN_INCR(session, txn_checkpoint); + SESSION_API_CALL(session, checkpoint, config, cfg); + + /* + * Checkpoints require a snapshot to write a transactionally consistent + * snapshot of the data. + * + * We can't use an application's transaction: if it has uncommitted + * changes, they will be written in the checkpoint and may appear after + * a crash. + * + * Use a real snapshot transaction: we don't want any chance of the + * snapshot being updated during the checkpoint. Eviction is prevented + * from evicting anything newer than this because we track the oldest + * transaction ID in the system that is not visible to all readers. + */ + if (F_ISSET(txn, TXN_RUNNING)) + WT_ERR_MSG(session, EINVAL, + "Checkpoint not permitted in a transaction"); + + /* + * Reset open cursors. Do this explicitly, even though it will happen + * implicitly in the call to begin_transaction for the checkpoint, the + * checkpoint code will acquire the schema lock before we do that, and + * some implementation of WT_CURSOR::reset might need the schema lock. + */ + WT_ERR(__wt_session_reset_cursors(session)); + + /* + * Don't highjack the session checkpoint thread for eviction. + * + * Application threads are not generally available for potentially slow + * operations, but checkpoint does enough I/O it may be called upon to + * perform slow operations for the block manager. + */ + F_SET(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK); + + /* + * Only one checkpoint can be active at a time, and checkpoints must run + * in the same order as they update the metadata. It's probably a bad + * idea to run checkpoints out of multiple threads, but serialize them + * here to ensure we don't get into trouble. + */ + WT_STAT_FAST_CONN_SET(session, txn_checkpoint_running, 1); + __wt_spin_lock(session, &S2C(session)->checkpoint_lock); + + ret = __wt_txn_checkpoint(session, cfg); + + WT_STAT_FAST_CONN_SET(session, txn_checkpoint_running, 0); + __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); + +err: F_CLR(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK); + + API_END_RET_NOTFOUND_MAP(session, ret); +} + +/* + * __wt_open_internal_session -- + * Allocate a session for WiredTiger's use. + */ +int +__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, + int uses_dhandles, int open_metadata, WT_SESSION_IMPL **sessionp) +{ + WT_SESSION_IMPL *session; + + *sessionp = NULL; + + WT_RET(__wt_open_session(conn, NULL, NULL, &session)); + session->name = name; + + /* + * Public sessions are automatically closed during WT_CONNECTION->close. + * If the session handles for internal threads were to go on the public + * list, there would be complex ordering issues during close. Set a + * flag to avoid this: internal sessions are not closed automatically. + */ + F_SET(session, WT_SESSION_INTERNAL); + + /* + * Some internal threads must keep running after we close all data + * handles. Make sure these threads don't open their own handles. + */ + if (!uses_dhandles) + F_SET(session, WT_SESSION_NO_DATA_HANDLES); + + /* + * Acquiring the metadata handle requires the schema lock; we've seen + * problems in the past where a worker thread has acquired the schema + * lock unexpectedly, relatively late in the run, and deadlocked. Be + * defensive, get it now. The metadata file may not exist when the + * connection first creates its default session or the shared cache + * pool creates its sessions, let our caller decline this work. + */ + if (open_metadata) { + WT_ASSERT(session, !F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)); + WT_RET(__wt_metadata_open(session)); + } + + *sessionp = session; + return (0); +} + +/* + * __wt_open_session -- + * Allocate a session handle. The internal parameter is used for sessions + * opened by WiredTiger for its own use. + */ +int +__wt_open_session(WT_CONNECTION_IMPL *conn, + WT_EVENT_HANDLER *event_handler, const char *config, + WT_SESSION_IMPL **sessionp) +{ + static const WT_SESSION stds = { + NULL, + __session_close, + __session_reconfigure, + __session_open_cursor, + __session_create, + __session_compact, + __session_drop, + __session_log_printf, + __session_rename, + __session_salvage, + __session_truncate, + __session_upgrade, + __session_verify, + __session_begin_transaction, + __session_commit_transaction, + __session_rollback_transaction, + __session_checkpoint, + __session_transaction_pinned_range + }; + WT_DECL_RET; + WT_SESSION_IMPL *session, *session_ret; + uint32_t i; + + *sessionp = NULL; + + session = conn->default_session; + session_ret = NULL; + + __wt_spin_lock(session, &conn->api_lock); + + /* + * Make sure we don't try to open a new session after the application + * closes the connection. This is particularly intended to catch + * cases where server threads open sessions. + */ + WT_ASSERT(session, F_ISSET(conn, WT_CONN_SERVER_RUN)); + + /* Find the first inactive session slot. */ + for (session_ret = conn->sessions, + i = 0; i < conn->session_size; ++session_ret, ++i) + if (!session_ret->active) + break; + if (i == conn->session_size) + WT_ERR_MSG(session, ENOMEM, + "only configured to support %" PRIu32 " sessions" + " (including %" PRIu32 " internal)", + conn->session_size, WT_NUM_INTERNAL_SESSIONS); + + /* + * If the active session count is increasing, update it. We don't worry + * about correcting the session count on error, as long as we don't mark + * this session as active, we'll clean it up on close. + */ + if (i >= conn->session_cnt) /* Defend against off-by-one errors. */ + conn->session_cnt = i + 1; + + session_ret->id = i; + session_ret->iface = stds; + session_ret->iface.connection = &conn->iface; + + WT_ERR(__wt_cond_alloc(session, "session", 0, &session_ret->cond)); + + __wt_random_init(session_ret->rnd); + + __wt_event_handler_set(session_ret, + event_handler == NULL ? session->event_handler : event_handler); + + TAILQ_INIT(&session_ret->cursors); + SLIST_INIT(&session_ret->dhandles); + + /* Initialize transaction support: default to read-committed. */ + session_ret->isolation = TXN_ISO_READ_COMMITTED; + WT_ERR(__wt_txn_init(session_ret)); + + /* + * The session's hazard pointer memory isn't discarded during normal + * session close because access to it isn't serialized. Allocate the + * first time we open this session. + */ + if (session_ret->hazard == NULL) + WT_ERR(__wt_calloc_def( + session, conn->hazard_max, &session_ret->hazard)); + + /* + * Set an initial size for the hazard array. It will be grown as + * required up to hazard_max. The hazard_size is reset on close, since + * __wt_hazard_close ensures the array is cleared - so it is safe to + * reset the starting size on each open. + */ + session_ret->hazard_size = WT_HAZARD_INCR; + + /* + * Configuration: currently, the configuration for open_session is the + * same as session.reconfigure, so use that function. + */ + if (config != NULL) + WT_ERR( + __session_reconfigure((WT_SESSION *)session_ret, config)); + + session_ret->name = NULL; + + /* + * Publish: make the entry visible to server threads. There must be a + * barrier for two reasons, to ensure structure fields are set before + * any other thread will consider the session, and to push the session + * count to ensure the eviction thread can't review too few slots. + */ + WT_PUBLISH(session_ret->active, 1); + + WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0); + *sessionp = session_ret; + + WT_STAT_FAST_CONN_INCR(session, session_open); + +err: __wt_spin_unlock(session, &conn->api_lock); + return (ret); +} |