/*- * Copyright (c) 2014-present MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * * See the file LICENSE for redistribution information. */ /* * WT_DATA_HANDLE_CACHE -- * Per-session cache of handles to avoid synchronization when opening * cursors. */ struct __wt_data_handle_cache { WT_DATA_HANDLE *dhandle; TAILQ_ENTRY(__wt_data_handle_cache) q; TAILQ_ENTRY(__wt_data_handle_cache) hashq; }; /* * WT_HAZARD -- * A hazard pointer. */ struct __wt_hazard { WT_REF *ref; /* Page reference */ #ifdef HAVE_DIAGNOSTIC const char *func; /* Function/line hazard acquired */ int line; #endif }; /* Get the connection implementation for a session */ #define S2C(session) ((WT_CONNECTION_IMPL *)((WT_SESSION_IMPL *)(session))->iface.connection) /* Get the btree for a session */ #define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle) #define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session)) /* Get the file system for a session */ #define S2FS(session) \ ((session)->bucket_storage == NULL ? S2C(session)->file_system : \ (session)->bucket_storage->file_system) typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST; /* Number of cursors cached to trigger cursor sweep. */ #define WT_SESSION_CURSOR_SWEEP_COUNTDOWN 40 /* Minimum number of buckets to visit during a regular cursor sweep. */ #define WT_SESSION_CURSOR_SWEEP_MIN 5 /* Maximum number of buckets to visit during a regular cursor sweep. */ #define WT_SESSION_CURSOR_SWEEP_MAX 64 /* * WT_SESSION_IMPL -- * Implementation of WT_SESSION. */ struct __wt_session_impl { WT_SESSION iface; WT_EVENT_HANDLER *event_handler; /* Application's event handlers */ void *lang_private; /* Language specific private storage */ void (*format_private)(WT_CURSOR *, int, void *); /* Format test program private callback. */ void *format_private_arg; u_int active; /* Non-zero if the session is in-use */ const char *name; /* Name */ const char *lastop; /* Last operation */ uint32_t id; /* UID, offset in session array */ uint64_t cache_wait_us; /* Wait time for cache for current operation */ uint64_t operation_start_us; /* Operation start */ uint64_t operation_timeout_us; /* Maximum operation period before rollback */ u_int api_call_counter; /* Depth of api calls */ WT_DATA_HANDLE *dhandle; /* Current data handle */ WT_BUCKET_STORAGE *bucket_storage; /* Current bucket storage and file system */ /* * Each session keeps a cache of data handles. The set of handles can grow quite large so we * maintain both a simple list and a hash table of lists. The hash table key is based on a hash * of the data handle's URI. Though all hash entries are discarded on session close, the hash * table list itself is kept in allocated memory that lives across session close - so it is * declared further down. */ /* Session handle reference list */ TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; uint64_t last_sweep; /* Last sweep for dead handles */ struct timespec last_epoch; /* Last epoch time returned */ WT_CURSOR_LIST cursors; /* Cursors closed with the session */ u_int ncursors; /* Count of active file cursors. */ uint32_t cursor_sweep_countdown; /* Countdown to cursor sweep */ uint32_t cursor_sweep_position; /* Position in cursor_cache for sweep */ uint64_t last_cursor_big_sweep; /* Last big sweep for dead cursors */ uint64_t last_cursor_sweep; /* Last regular sweep for dead cursors */ u_int sweep_warning_5min; /* Whether the session was without sweep for 5 min. */ u_int sweep_warning_60min; /* Whether the session was without sweep for 60 min. */ WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ WT_COMPACT_STATE *compact; /* Compaction information */ enum { WT_COMPACT_NONE = 0, WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state; WT_IMPORT_LIST *import_list; /* List of metadata entries to import from file. */ u_int hs_cursor_counter; /* Number of open history store cursors */ WT_CURSOR *meta_cursor; /* Metadata file */ void *meta_track; /* Metadata operation tracking */ void *meta_track_next; /* Current position */ void *meta_track_sub; /* Child transaction / save point */ size_t meta_track_alloc; /* Currently allocated */ int meta_track_nest; /* Nesting level of meta transaction */ #define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) /* Current rwlock for callback. */ WT_RWLOCK *current_rwlock; uint8_t current_rwticket; WT_ITEM **scratch; /* Temporary memory for any function */ u_int scratch_alloc; /* Currently allocated */ size_t scratch_cached; /* Scratch bytes cached */ #ifdef HAVE_DIAGNOSTIC /* * Variables used to look for violations of the contract that a session is only used by a single * session at once. */ volatile uintmax_t api_tid; volatile uint32_t api_enter_refcnt; /* * It's hard to figure out from where a buffer was allocated after it's leaked, so in diagnostic * mode we track them; DIAGNOSTIC can't simply add additional fields to WT_ITEM structures * because they are visible to applications, create a parallel structure instead. */ struct __wt_scratch_track { const char *func; /* Allocating function, line */ int line; } * scratch_track; #endif /* Record the important timestamps of each stage in an reconciliation. */ struct __wt_reconcile_timeline { uint64_t reconcile_start; uint64_t image_build_start; uint64_t image_build_finish; uint64_t hs_wrapup_start; uint64_t hs_wrapup_finish; uint64_t reconcile_finish; uint64_t total_reentry_hs_eviction_time; } reconcile_timeline; /* * Record the important timestamps of each stage in an eviction. If an eviction takes a long * time and times out, we can trace the time usage of each stage from this information. */ struct __wt_evict_timeline { uint64_t evict_start; uint64_t reentry_hs_evict_start; uint64_t reentry_hs_evict_finish; uint64_t evict_finish; bool reentry_hs_eviction; } evict_timeline; WT_ITEM err; /* Error buffer */ WT_TXN_ISOLATION isolation; WT_TXN *txn; /* Transaction state */ void *block_manager; /* Block-manager support */ int (*block_manager_cleanup)(WT_SESSION_IMPL *); const char *hs_checkpoint; /* History store checkpoint name, during checkpoint cursor ops */ uint64_t checkpoint_write_gen; /* Write generation override, during checkpoint cursor ops */ /* Checkpoint handles */ WT_DATA_HANDLE **ckpt_handle; /* Handle list */ u_int ckpt_handle_next; /* Next empty slot */ size_t ckpt_handle_allocated; /* Bytes allocated */ /* Named checkpoint drop list, during a checkpoint */ WT_ITEM *ckpt_drop_list; /* Checkpoint time of current checkpoint, during a checkpoint */ uint64_t current_ckpt_sec; /* * Operations acting on handles. * * The preferred pattern is to gather all of the required handles at the beginning of an * operation, then drop any other locks, perform the operation, then release the handles. This * cannot be easily merged with the list of checkpoint handles because some operations (such as * compact) do checkpoints internally. */ WT_DATA_HANDLE **op_handle; /* Handle list */ u_int op_handle_next; /* Next empty slot */ size_t op_handle_allocated; /* Bytes allocated */ void *reconcile; /* Reconciliation support */ int (*reconcile_cleanup)(WT_SESSION_IMPL *); /* Salvage support. */ void *salvage_track; /* Sessions have an associated statistics bucket based on its ID. */ u_int stat_bucket; /* Statistics bucket offset */ uint64_t cache_max_wait_us; /* Maximum time an operation waits for space in cache */ #ifdef HAVE_DIAGNOSTIC uint8_t dump_raw; /* Configure debugging page dump */ #endif #ifdef HAVE_UNITTEST_ASSERTS /* * Unit testing assertions requires overriding abort logic and instead capturing this information to * be checked by the unit test. */ #define WT_SESSION_UNITTEST_BUF_LEN 100 bool unittest_assert_hit; char unittest_assert_msg[WT_SESSION_UNITTEST_BUF_LEN]; #endif /* AUTOMATIC FLAG VALUE GENERATION START 0 */ #define WT_SESSION_LOCKED_CHECKPOINT 0x0001u #define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0002u #define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0004u #define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0008u #define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0010u #define WT_SESSION_LOCKED_METADATA 0x0020u #define WT_SESSION_LOCKED_PASS 0x0040u #define WT_SESSION_LOCKED_SCHEMA 0x0080u #define WT_SESSION_LOCKED_SLOT 0x0100u #define WT_SESSION_LOCKED_TABLE_READ 0x0200u #define WT_SESSION_LOCKED_TABLE_WRITE 0x0400u #define WT_SESSION_LOCKED_TURTLE 0x0800u #define WT_SESSION_NO_SCHEMA_LOCK 0x1000u /*AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t lock_flags; /* AUTOMATIC FLAG VALUE GENERATION START 0 */ #define WT_SESSION_BACKUP_CURSOR 0x00001u #define WT_SESSION_BACKUP_DUP 0x00002u #define WT_SESSION_CACHE_CURSORS 0x00004u #define WT_SESSION_CAN_WAIT 0x00008u #define WT_SESSION_DEBUG_DO_NOT_CLEAR_TXN_ID 0x00010u #define WT_SESSION_DEBUG_RELEASE_EVICT 0x00020u #define WT_SESSION_EVICTION 0x00040u #define WT_SESSION_IGNORE_CACHE_SIZE 0x00080u #define WT_SESSION_IMPORT 0x00100u #define WT_SESSION_IMPORT_REPAIR 0x00200u #define WT_SESSION_INTERNAL 0x00400u #define WT_SESSION_LOGGING_INMEM 0x00800u #define WT_SESSION_NO_DATA_HANDLES 0x01000u #define WT_SESSION_NO_RECONCILE 0x02000u #define WT_SESSION_QUIET_CORRUPT_FILE 0x04000u #define WT_SESSION_READ_WONT_NEED 0x08000u #define WT_SESSION_RESOLVING_TXN 0x10000u #define WT_SESSION_ROLLBACK_TO_STABLE 0x20000u #define WT_SESSION_SCHEMA_TXN 0x40000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t flags; /* * All of the following fields live at the end of the structure so it's easier to clear everything * but the fields that persist. */ #define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) /* * The random number state persists past session close because we don't want to repeatedly use * the same values for skiplist depth when the application isn't caching sessions. */ WT_RAND_STATE rnd; /* Random number generation state */ /* * Hash tables are allocated lazily as sessions are used to keep the size of this structure from * growing too large. */ WT_CURSOR_LIST *cursor_cache; /* Hash table of cached cursors */ /* Hashed handle reference list array */ TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) * dhhash; /* Generations manager */ #define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ #define WT_GEN_COMMIT 1 /* Commit generation */ #define WT_GEN_EVICT 2 /* Eviction generation */ #define WT_GEN_HAZARD 3 /* Hazard pointer */ #define WT_GEN_SPLIT 4 /* Page splits */ #define WT_GENERATIONS 5 /* Total generation manager entries */ volatile uint64_t generations[WT_GENERATIONS]; /* * Session memory persists past session close because it's accessed by threads of control other * than the thread owning the session. For example, btree splits and hazard pointers can "free" * memory that's still in use. In order to eventually free it, it's stashed here with its * generation number; when no thread is reading in generation, the memory can be freed for real. */ struct __wt_session_stash { struct __wt_stash { void *p; /* Memory, length */ size_t len; uint64_t gen; /* Generation */ } * list; size_t cnt; /* Array entries */ size_t alloc; /* Allocated bytes */ } stash[WT_GENERATIONS]; /* * Hazard pointers. * * Hazard information persists past session close because it's accessed by threads of control other * than the thread owning the session. * * Use the non-NULL state of the hazard field to know if the session has previously been * initialized. */ #define WT_SESSION_FIRST_USE(s) ((s)->hazard == NULL) /* * The hazard pointer array grows as necessary, initialize with 250 slots. */ #define WT_SESSION_INITIAL_HAZARD_SLOTS 250 uint32_t hazard_size; /* Hazard pointer array slots */ uint32_t hazard_inuse; /* Hazard pointer array slots in-use */ uint32_t nhazard; /* Count of active hazard pointers */ WT_HAZARD *hazard; /* Hazard pointer array */ /* * Operation tracking. */ WT_OPTRACK_RECORD *optrack_buf; u_int optrackbuf_ptr; uint64_t optrack_offset; WT_FH *optrack_fh; WT_SESSION_STATS stats; }; /* Consider moving this to session_inline.h if it ever appears. */ #define WT_READING_CHECKPOINT(s) \ ((s)->dhandle != NULL && F_ISSET((s)->dhandle, WT_DHANDLE_OPEN) && \ WT_DHANDLE_IS_CHECKPOINT((s)->dhandle))