/*- * Copyright (c) 2014-2015 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * * See the file LICENSE for redistribution information. */ #include "wt_internal.h" static int __session_checkpoint(WT_SESSION *, const char *); static int __session_rollback_transaction(WT_SESSION *, const char *); /* * __wt_session_reset_cursors -- * Reset all open cursors. */ int __wt_session_reset_cursors(WT_SESSION_IMPL *session) { WT_CURSOR *cursor; WT_DECL_RET; TAILQ_FOREACH(cursor, &session->cursors, q) { /* Stop when there are no positioned cursors. */ if (session->ncursors == 0) break; WT_TRET(cursor->reset(cursor)); } return (ret); } /* * __wt_session_copy_values -- * Copy values into all positioned cursors, so that they don't keep * transaction IDs pinned. */ int __wt_session_copy_values(WT_SESSION_IMPL *session) { WT_CURSOR *cursor; WT_DECL_RET; TAILQ_FOREACH(cursor, &session->cursors, q) if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { F_CLR(cursor, WT_CURSTD_VALUE_INT); WT_RET(__wt_buf_set(session, &cursor->value, cursor->value.data, cursor->value.size)); F_SET(cursor, WT_CURSTD_VALUE_EXT); } return (ret); } /* * __session_clear -- * Clear a session structure. */ static void __session_clear(WT_SESSION_IMPL *session) { /* * There's no serialization support around the review of the hazard * array, which means threads checking for hazard pointers first check * the active field (which may be 0) and then use the hazard pointer * (which cannot be NULL). * * Additionally, the session structure can include information that * persists past the session's end-of-life, stored as part of page * splits. * * For these reasons, be careful when clearing the session structure. */ memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); session->hazard_size = 0; session->nhazard = 0; } /* * __session_close -- * WT_SESSION->close method. */ static int __session_close(WT_SESSION *wt_session, const char *config) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; conn = (WT_CONNECTION_IMPL *)wt_session->connection; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, close, config, cfg); WT_UNUSED(cfg); /* Rollback any active transaction. */ if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_TRET(__session_rollback_transaction(wt_session, NULL)); /* * Also release any pinned transaction ID from a non-transactional * operation. */ if (conn->txn_global.states != NULL) __wt_txn_release_snapshot(session); /* Close all open cursors. */ while ((cursor = TAILQ_FIRST(&session->cursors)) != NULL) { /* * Notify the user that we are closing the cursor handle * via the registered close callback. */ if (session->event_handler->handle_close != NULL) WT_TRET(session->event_handler->handle_close( session->event_handler, wt_session, cursor)); WT_TRET(cursor->close(cursor)); } WT_ASSERT(session, session->ncursors == 0); /* Discard cached handles. */ __wt_session_close_cache(session); /* Close all tables. */ WT_TRET(__wt_schema_close_tables(session)); /* Discard metadata tracking. */ __wt_meta_track_discard(session); /* Discard scratch buffers, error memory. */ __wt_scr_discard(session); __wt_buf_free(session, &session->err); /* Free transaction information. */ __wt_txn_destroy(session); /* Confirm we're not holding any hazard pointers. */ __wt_hazard_close(session); /* Cleanup */ if (session->block_manager_cleanup != NULL) WT_TRET(session->block_manager_cleanup(session)); if (session->reconcile_cleanup != NULL) WT_TRET(session->reconcile_cleanup(session)); /* Destroy the thread's mutex. */ WT_TRET(__wt_cond_destroy(session, &session->cond)); /* The API lock protects opening and closing of sessions. */ __wt_spin_lock(session, &conn->api_lock); /* Decrement the count of open sessions. */ WT_STAT_FAST_CONN_DECR(session, session_open); /* * Sessions are re-used, clear the structure: the clear sets the active * field to 0, which will exclude the hazard array from review by the * eviction thread. Because some session fields are accessed by other * threads, the structure must be cleared carefully. * * We don't need to publish here, because regardless of the active field * being non-zero, the hazard pointer is always valid. */ __session_clear(session); session = conn->default_session; /* * Decrement the count of active sessions if that's possible: a session * being closed may or may not be at the end of the array, step toward * the beginning of the array until we reach an active session. */ while (conn->sessions[conn->session_cnt - 1].active == 0) if (--conn->session_cnt == 0) break; __wt_spin_unlock(session, &conn->api_lock); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_reconfigure -- * WT_SESSION->reconfigure method. */ static int __session_reconfigure(WT_SESSION *wt_session, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, reconfigure, config, cfg); if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "transaction in progress"); WT_TRET(__wt_session_reset_cursors(session)); WT_ERR(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) session->isolation = session->txn.isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED; err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __wt_open_cursor -- * Internal version of WT_SESSION::open_cursor. */ int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; WT_DECL_RET; *cursorp = NULL; /* * Open specific cursor types we know about, or call the generic data * source open function. * * Unwind a set of string comparisons into a switch statement hoping * the compiler can make it fast, but list the common choices first * instead of sorting so if/else patterns are still fast. */ switch (uri[0]) { /* * Common cursor types. */ case 't': if (WT_PREFIX_MATCH(uri, "table:")) WT_RET(__wt_curtable_open(session, uri, cfg, cursorp)); break; case 'c': if (WT_PREFIX_MATCH(uri, "colgroup:")) { /* * Column groups are a special case: open a cursor on * the underlying data source. */ WT_RET(__wt_schema_get_colgroup( session, uri, 0, NULL, &colgroup)); WT_RET(__wt_open_cursor( session, colgroup->source, owner, cfg, cursorp)); } else if (WT_PREFIX_MATCH(uri, "config:")) WT_RET(__wt_curconfig_open( session, uri, cfg, cursorp)); break; case 'i': if (WT_PREFIX_MATCH(uri, "index:")) WT_RET(__wt_curindex_open( session, uri, owner, cfg, cursorp)); break; case 'l': if (WT_PREFIX_MATCH(uri, "lsm:")) WT_RET(__wt_clsm_open( session, uri, owner, cfg, cursorp)); else if (WT_PREFIX_MATCH(uri, "log:")) WT_RET(__wt_curlog_open(session, uri, cfg, cursorp)); break; /* * Less common cursor types. */ case 'f': if (WT_PREFIX_MATCH(uri, "file:")) WT_RET(__wt_curfile_open( session, uri, owner, cfg, cursorp)); break; case 'm': if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) WT_RET(__wt_curmetadata_open( session, uri, owner, cfg, cursorp)); break; case 'b': if (WT_PREFIX_MATCH(uri, "backup:")) WT_RET(__wt_curbackup_open( session, uri, cfg, cursorp)); break; case 's': if (WT_PREFIX_MATCH(uri, "statistics:")) WT_RET(__wt_curstat_open(session, uri, cfg, cursorp)); break; default: break; } if (*cursorp == NULL && (dsrc = __wt_schema_get_source(session, uri)) != NULL) WT_RET(dsrc->open_cursor == NULL ? __wt_object_unsupported(session, uri) : __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp)); if (*cursorp == NULL) return (__wt_bad_object_type(session, uri)); /* * When opening simple tables, the table code calls this function on the * underlying data source, in which case the application's URI has been * copied. */ if ((*cursorp)->uri == NULL && (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) WT_TRET((*cursorp)->close(*cursorp)); return (ret); } /* * __session_open_cursor -- * WT_SESSION->open_cursor method. */ static int __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup, const char *config, WT_CURSOR **cursorp) { WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; cursor = *cursorp = NULL; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, open_cursor, config, cfg); if ((to_dup == NULL && uri == NULL) || (to_dup != NULL && uri != NULL)) WT_ERR_MSG(session, EINVAL, "should be passed either a URI or a cursor to duplicate, " "but not both"); if (to_dup != NULL) { uri = to_dup->uri; if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "lsm:") && !WT_PREFIX_MATCH(uri, WT_METADATA_URI) && !WT_PREFIX_MATCH(uri, "table:") && __wt_schema_get_source(session, uri) == NULL) WT_ERR(__wt_bad_object_type(session, uri)); } WT_ERR(__wt_open_cursor(session, uri, NULL, cfg, &cursor)); if (to_dup != NULL) WT_ERR(__wt_cursor_dup_position(to_dup, cursor)); *cursorp = cursor; if (0) { err: if (cursor != NULL) WT_TRET(cursor->close(cursor)); } /* * Opening a cursor on a non-existent data source will set ret to * either of ENOENT or WT_NOTFOUND at this point. However, * applications may reasonably do this inside a transaction to check * for the existence of a table or index. * * Prefer WT_NOTFOUND here: that does not force running transactions to * roll back. It will be mapped back to ENOENT. */ if (ret == ENOENT) ret = WT_NOTFOUND; API_END_RET_NOTFOUND_MAP(session, ret); } /* * __wt_session_create_strip -- * Discard any configuration information from a schema entry that is not * applicable to an session.create call, here for the wt dump command utility, * which only wants to dump the schema information needed for load. */ int __wt_session_create_strip(WT_SESSION *wt_session, const char *v1, const char *v2, char **value_ret) { WT_SESSION_IMPL *session = (WT_SESSION_IMPL *)wt_session; const char *cfg[] = { WT_CONFIG_BASE(session, session_create), v1, v2, NULL }; return (__wt_config_collapse(session, cfg, value_ret)); } /* * __session_create -- * WT_SESSION->create method. */ static int __session_create(WT_SESSION *wt_session, const char *uri, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, create, config, cfg); WT_UNUSED(cfg); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); /* * Type configuration only applies to tables, column groups and indexes. * We don't want applications to attempt to layer LSM on top of their * extended data-sources, and the fact we allow LSM as a valid URI is an * invitation to that mistake: nip it in the bud. */ if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "table:")) { /* * We can't disallow type entirely, a configuration string might * innocently include it, for example, a dump/load pair. If the * URI type prefix and the type are the same, let it go. */ if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 && (strncmp(uri, cval.str, cval.len) != 0 || uri[cval.len] != ':')) WT_ERR_MSG(session, EINVAL, "%s: unsupported type configuration", uri); WT_ERR_NOTFOUND_OK(ret); } WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_LOCK(session, ret = __wt_schema_create(session, uri, config))); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_log_flush -- * WT_SESSION->log_flush method. */ static int __session_log_flush(WT_SESSION *wt_session, const char *config) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; uint32_t flags; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, log_flush, config, cfg); WT_STAT_FAST_CONN_INCR(session, log_flush); conn = S2C(session); flags = 0; /* * If logging is not enabled there is nothing to do. */ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) WT_ERR_MSG(session, EINVAL, "logging not enabled"); WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval)); if (WT_STRING_MATCH("on", cval.str, cval.len)) flags = WT_LOG_FSYNC; else if (WT_STRING_MATCH("off", cval.str, cval.len)) flags = WT_LOG_FLUSH; ret = __wt_log_flush(session, flags); err: API_END_RET(session, ret); } /* * __session_log_printf -- * WT_SESSION->log_printf method. */ static int __session_log_printf(WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) { WT_SESSION_IMPL *session; WT_DECL_RET; va_list ap; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL_NOCONF(session, log_printf); va_start(ap, fmt); ret = __wt_log_vprintf(session, fmt, ap); va_end(ap); err: API_END_RET(session, ret); } /* * __session_rename -- * WT_SESSION->rename method. */ static int __session_rename(WT_SESSION *wt_session, const char *uri, const char *newuri, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, rename, config, cfg); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); WT_ERR(__wt_str_name_check(session, newuri)); WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_LOCK(session, ret = __wt_schema_rename(session, uri, newuri, cfg))); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_compact -- * WT_SESSION->compact method. */ static int __session_compact(WT_SESSION *wt_session, const char *uri, const char *config) { WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; /* Disallow objects in the WiredTiger name space. */ WT_RET(__wt_str_name_check(session, uri)); if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "lsm:") && !WT_PREFIX_MATCH(uri, "table:")) return (__wt_bad_object_type(session, uri)); return (__wt_session_compact(wt_session, uri, config)); } /* * __session_drop -- * WT_SESSION->drop method. */ static int __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, drop, config, cfg); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg))); err: /* Note: drop operations cannot be unrolled (yet?). */ API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_salvage -- * WT_SESSION->salvage method. */ static int __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, salvage, config, cfg); /* Block out checkpoints to avoid spurious EBUSY errors. */ __wt_spin_lock(session, &S2C(session)->checkpoint_lock); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_salvage, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)); __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_truncate -- * WT_SESSION->truncate method. */ static int __session_truncate(WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_CURSOR *cursor; int cmp, local_start; local_start = 0; session = (WT_SESSION_IMPL *)wt_session; SESSION_TXN_API_CALL(session, truncate, config, cfg); /* * If the URI is specified, we don't need a start/stop, if start/stop * is specified, we don't need a URI. One exception is the log URI * which may truncate (archive) log files for a backup cursor. * * If no URI is specified, and both cursors are specified, start/stop * must reference the same object. * * Any specified cursor must have been initialized. */ if ((uri == NULL && start == NULL && stop == NULL) || (uri != NULL && !WT_PREFIX_MATCH(uri, "log:") && (start != NULL || stop != NULL))) WT_ERR_MSG(session, EINVAL, "the truncate method should be passed either a URI or " "start/stop cursors, but not both"); if (uri != NULL) { /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); if (WT_PREFIX_MATCH(uri, "log:")) { /* * Verify the user only gave the URI prefix and not * a specific target name after that. */ if (!WT_STREQ(uri, "log:")) WT_ERR_MSG(session, EINVAL, "the truncate method should not specify any" "target after the log: URI prefix."); ret = __wt_log_truncate_files(session, start, cfg); } else { /* Wait for checkpoints to avoid EBUSY errors. */ __wt_spin_lock(session, &S2C(session)->checkpoint_lock); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_truncate(session, uri, cfg)); __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); } goto done; } /* * Cursor truncate is only supported for some objects, check for the * supporting methods we need, range_truncate and compare. */ cursor = start == NULL ? stop : start; if (cursor->compare == NULL) WT_ERR(__wt_bad_object_type(session, cursor->uri)); /* * If both cursors set, check they're correctly ordered with respect to * each other. We have to test this before any search, the search can * change the initial cursor position. * * Rather happily, the compare routine will also confirm the cursors * reference the same object and the keys are set. */ if (start != NULL && stop != NULL) { WT_ERR(start->compare(start, stop, &cmp)); if (cmp > 0) WT_ERR_MSG(session, EINVAL, "the start cursor position is after the stop " "cursor position"); } /* * Truncate does not require keys actually exist so that applications * can discard parts of the object's name space without knowing exactly * what records currently appear in the object. For this reason, do a * search-near, rather than a search. Additionally, we have to correct * after calling search-near, to position the start/stop cursors on the * next record greater than/less than the original key. */ if (start != NULL) { WT_ERR(start->search_near(start, &cmp)); if (cmp < 0 && (ret = start->next(start)) != 0) { WT_ERR_NOTFOUND_OK(ret); goto done; } } if (stop != NULL) { WT_ERR(stop->search_near(stop, &cmp)); if (cmp > 0 && (ret = stop->prev(stop)) != 0) { WT_ERR_NOTFOUND_OK(ret); goto done; } } /* * We always truncate in the forward direction because the underlying * data structures can move through pages faster forward than backward. * If we don't have a start cursor, create one and position it at the * first record. */ if (start == NULL) { WT_ERR(__session_open_cursor( wt_session, stop->uri, NULL, NULL, &start)); local_start = 1; WT_ERR(start->next(start)); } /* * If the start/stop keys cross, we're done, the range must be empty. */ if (stop != NULL) { WT_ERR(start->compare(start, stop, &cmp)); if (cmp > 0) goto done; } WT_ERR(__wt_schema_range_truncate(session, start, stop)); done: err: TXN_API_END_RETRY(session, ret, 0); /* * Close any locally-opened start cursor. */ if (local_start) WT_TRET(start->close(start)); /* * Only map WT_NOTFOUND to ENOENT if a URI was specified. */ return (ret == WT_NOTFOUND && uri != NULL ? ENOENT : ret); } /* * __session_upgrade -- * WT_SESSION->upgrade method. */ static int __session_upgrade(WT_SESSION *wt_session, const char *uri, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, upgrade, config, cfg); /* Block out checkpoints to avoid spurious EBUSY errors. */ __wt_spin_lock(session, &S2C(session)->checkpoint_lock); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_upgrade, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_UPGRADE)); __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_verify -- * WT_SESSION->verify method. */ static int __session_verify(WT_SESSION *wt_session, const char *uri, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, verify, config, cfg); /* Block out checkpoints to avoid spurious EBUSY errors. */ __wt_spin_lock(session, &S2C(session)->checkpoint_lock); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_verify, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)); __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); err: API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_begin_transaction -- * WT_SESSION->begin_transaction method. */ static int __session_begin_transaction(WT_SESSION *wt_session, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, begin_transaction, config, cfg); WT_STAT_FAST_CONN_INCR(session, txn_begin); if (F_ISSET(&session->txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "Transaction already running"); ret = __wt_txn_begin(session, cfg); err: API_END_RET(session, ret); } /* * __session_commit_transaction -- * WT_SESSION->commit_transaction method. */ static int __session_commit_transaction(WT_SESSION *wt_session, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_TXN *txn; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, commit_transaction, config, cfg); WT_STAT_FAST_CONN_INCR(session, txn_commit); txn = &session->txn; if (F_ISSET(txn, WT_TXN_ERROR)) { __wt_errx(session, "failed transaction requires rollback"); ret = EINVAL; } if (ret == 0) ret = __wt_txn_commit(session, cfg); else { WT_TRET(__wt_session_reset_cursors(session)); WT_TRET(__wt_txn_rollback(session, cfg)); } err: API_END_RET(session, ret); } /* * __session_rollback_transaction -- * WT_SESSION->rollback_transaction method. */ static int __session_rollback_transaction(WT_SESSION *wt_session, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, rollback_transaction, config, cfg); WT_STAT_FAST_CONN_INCR(session, txn_rollback); WT_TRET(__wt_session_reset_cursors(session)); WT_TRET(__wt_txn_rollback(session, cfg)); err: API_END_RET(session, ret); } /* * __session_transaction_pinned_range -- * WT_SESSION->transaction_pinned_range method. */ static int __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_TXN_STATE *txn_state; uint64_t pinned; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL_NOCONF(session, pinned_range); txn_state = WT_SESSION_TXN_STATE(session); /* Assign pinned to the lesser of id or snap_min */ if (txn_state->id != WT_TXN_NONE && WT_TXNID_LT(txn_state->id, txn_state->snap_min)) pinned = txn_state->id; else pinned = txn_state->snap_min; if (pinned == WT_TXN_NONE) *prange = 0; else *prange = S2C(session)->txn_global.current - pinned; err: API_END_RET(session, ret); } /* * __session_checkpoint -- * WT_SESSION->checkpoint method. */ static int __session_checkpoint(WT_SESSION *wt_session, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_TXN *txn; session = (WT_SESSION_IMPL *)wt_session; txn = &session->txn; WT_STAT_FAST_CONN_INCR(session, txn_checkpoint); SESSION_API_CALL(session, checkpoint, config, cfg); /* * Checkpoints require a snapshot to write a transactionally consistent * snapshot of the data. * * We can't use an application's transaction: if it has uncommitted * changes, they will be written in the checkpoint and may appear after * a crash. * * Use a real snapshot transaction: we don't want any chance of the * snapshot being updated during the checkpoint. Eviction is prevented * from evicting anything newer than this because we track the oldest * transaction ID in the system that is not visible to all readers. */ if (F_ISSET(txn, WT_TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "Checkpoint not permitted in a transaction"); /* * Reset open cursors. Do this explicitly, even though it will happen * implicitly in the call to begin_transaction for the checkpoint, the * checkpoint code will acquire the schema lock before we do that, and * some implementation of WT_CURSOR::reset might need the schema lock. */ WT_ERR(__wt_session_reset_cursors(session)); /* * Don't highjack the session checkpoint thread for eviction. * * Application threads are not generally available for potentially slow * operations, but checkpoint does enough I/O it may be called upon to * perform slow operations for the block manager. */ F_SET(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK); /* * Only one checkpoint can be active at a time, and checkpoints must run * in the same order as they update the metadata. It's probably a bad * idea to run checkpoints out of multiple threads, but serialize them * here to ensure we don't get into trouble. */ WT_STAT_FAST_CONN_SET(session, txn_checkpoint_running, 1); __wt_spin_lock(session, &S2C(session)->checkpoint_lock); ret = __wt_txn_checkpoint(session, cfg); WT_STAT_FAST_CONN_SET(session, txn_checkpoint_running, 0); __wt_spin_unlock(session, &S2C(session)->checkpoint_lock); err: F_CLR(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK); API_END_RET_NOTFOUND_MAP(session, ret); } /* * __session_strerror -- * WT_SESSION->strerror method. */ static const char * __session_strerror(WT_SESSION *wt_session, int error) { WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; return (__wt_strerror(session, error, NULL, 0)); } /* * __wt_open_internal_session -- * Allocate a session for WiredTiger's use. */ int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, int uses_dhandles, int open_metadata, WT_SESSION_IMPL **sessionp) { WT_SESSION_IMPL *session; *sessionp = NULL; WT_RET(__wt_open_session(conn, NULL, NULL, &session)); session->name = name; /* * Public sessions are automatically closed during WT_CONNECTION->close. * If the session handles for internal threads were to go on the public * list, there would be complex ordering issues during close. Set a * flag to avoid this: internal sessions are not closed automatically. */ F_SET(session, WT_SESSION_INTERNAL); /* * Some internal threads must keep running after we close all data * handles. Make sure these threads don't open their own handles. */ if (!uses_dhandles) F_SET(session, WT_SESSION_NO_DATA_HANDLES); /* * Acquiring the metadata handle requires the schema lock; we've seen * problems in the past where a worker thread has acquired the schema * lock unexpectedly, relatively late in the run, and deadlocked. Be * defensive, get it now. The metadata file may not exist when the * connection first creates its default session or the shared cache * pool creates its sessions, let our caller decline this work. */ if (open_metadata) { WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); WT_RET(__wt_metadata_open(session)); } *sessionp = session; return (0); } /* * __wt_open_session -- * Allocate a session handle. The internal parameter is used for sessions * opened by WiredTiger for its own use. */ int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp) { static const WT_SESSION stds = { NULL, NULL, __session_close, __session_reconfigure, __session_strerror, __session_open_cursor, __session_create, __session_compact, __session_drop, __session_log_flush, __session_log_printf, __session_rename, __session_salvage, __session_truncate, __session_upgrade, __session_verify, __session_begin_transaction, __session_commit_transaction, __session_rollback_transaction, __session_checkpoint, __session_transaction_pinned_range }; WT_DECL_RET; WT_SESSION_IMPL *session, *session_ret; uint32_t i; *sessionp = NULL; session = conn->default_session; session_ret = NULL; __wt_spin_lock(session, &conn->api_lock); /* * Make sure we don't try to open a new session after the application * closes the connection. This is particularly intended to catch * cases where server threads open sessions. */ WT_ASSERT(session, F_ISSET(conn, WT_CONN_SERVER_RUN)); /* Find the first inactive session slot. */ for (session_ret = conn->sessions, i = 0; i < conn->session_size; ++session_ret, ++i) if (!session_ret->active) break; if (i == conn->session_size) WT_ERR_MSG(session, ENOMEM, "only configured to support %" PRIu32 " sessions" " (including %" PRIu32 " internal)", conn->session_size, WT_NUM_INTERNAL_SESSIONS); /* * If the active session count is increasing, update it. We don't worry * about correcting the session count on error, as long as we don't mark * this session as active, we'll clean it up on close. */ if (i >= conn->session_cnt) /* Defend against off-by-one errors. */ conn->session_cnt = i + 1; session_ret->id = i; session_ret->iface = stds; session_ret->iface.connection = &conn->iface; WT_ERR(__wt_cond_alloc(session, "session", 0, &session_ret->cond)); if (WT_SESSION_FIRST_USE(session_ret)) __wt_random_init(&session_ret->rnd); __wt_event_handler_set(session_ret, event_handler == NULL ? session->event_handler : event_handler); TAILQ_INIT(&session_ret->cursors); TAILQ_INIT(&session_ret->dhandles); /* * If we don't have one, allocate the dhandle hash array. * Allocate the table hash array as well. */ if (session_ret->dhhash == NULL) WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE, sizeof(struct __dhandles_hash), &session_ret->dhhash)); if (session_ret->tablehash == NULL) WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE, sizeof(struct __tables_hash), &session_ret->tablehash)); for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) { TAILQ_INIT(&session_ret->dhhash[i]); TAILQ_INIT(&session_ret->tablehash[i]); } /* Initialize transaction support: default to read-committed. */ session_ret->isolation = WT_ISO_READ_COMMITTED; WT_ERR(__wt_txn_init(session_ret)); /* * The session's hazard pointer memory isn't discarded during normal * session close because access to it isn't serialized. Allocate the * first time we open this session. */ if (WT_SESSION_FIRST_USE(session_ret)) WT_ERR(__wt_calloc_def( session, conn->hazard_max, &session_ret->hazard)); /* * Set an initial size for the hazard array. It will be grown as * required up to hazard_max. The hazard_size is reset on close, since * __wt_hazard_close ensures the array is cleared - so it is safe to * reset the starting size on each open. */ session_ret->hazard_size = WT_HAZARD_INCR; /* * Configuration: currently, the configuration for open_session is the * same as session.reconfigure, so use that function. */ if (config != NULL) WT_ERR( __session_reconfigure((WT_SESSION *)session_ret, config)); session_ret->name = NULL; /* * Publish: make the entry visible to server threads. There must be a * barrier for two reasons, to ensure structure fields are set before * any other thread will consider the session, and to push the session * count to ensure the eviction thread can't review too few slots. */ WT_PUBLISH(session_ret->active, 1); WT_STATIC_ASSERT(offsetof(WT_SESSION_IMPL, iface) == 0); *sessionp = session_ret; WT_STAT_FAST_CONN_INCR(session, session_open); err: __wt_spin_unlock(session, &conn->api_lock); return (ret); }