summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/include/session.h
blob: 39e24cfa8b533ddf990458b5baeb754b0c68d150 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/*-
 * Copyright (c) 2014-2018 MongoDB, Inc.
 * Copyright (c) 2008-2014 WiredTiger, Inc.
 *	All rights reserved.
 *
 * See the file LICENSE for redistribution information.
 */

/*
 * WT_DATA_HANDLE_CACHE --
 *	Per-session cache of handles to avoid synchronization when opening
 *	cursors.
 */
struct __wt_data_handle_cache {
	WT_DATA_HANDLE *dhandle;

	TAILQ_ENTRY(__wt_data_handle_cache) q;
	TAILQ_ENTRY(__wt_data_handle_cache) hashq;
};

/*
 * WT_HAZARD --
 *	A hazard pointer.
 */
struct __wt_hazard {
	WT_REF *ref;			/* Page reference */
#ifdef HAVE_DIAGNOSTIC
	const char *file;		/* File/line where hazard acquired */
	int	    line;
#endif
};

/* Get the connection implementation for a session */
#define	S2C(session)	  ((WT_CONNECTION_IMPL *)(session)->iface.connection)

/* Get the btree for a session */
#define	S2BT(session)	   ((WT_BTREE *)(session)->dhandle->handle)
#define	S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))

typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor)	WT_CURSOR_LIST;

/* Number of cursors cached to trigger cursor sweep. */
#define	WT_SESSION_CURSOR_SWEEP_COUNTDOWN	20

/* Minimum number of buckets to visit during cursor sweep. */
#define	WT_SESSION_CURSOR_SWEEP_MIN		5

/* Maximum number of buckets to visit during cursor sweep. */
#define	WT_SESSION_CURSOR_SWEEP_MAX		32
/*
 * WT_SESSION_IMPL --
 *	Implementation of WT_SESSION.
 */
struct __wt_session_impl {
	WT_SESSION iface;

	void	*lang_private;		/* Language specific private storage */

	u_int active;			/* Non-zero if the session is in-use */

	const char *name;		/* Name */
	const char *lastop;		/* Last operation */
	uint32_t id;			/* UID, offset in session array */

	WT_EVENT_HANDLER *event_handler;/* Application's event handlers */

	WT_DATA_HANDLE *dhandle;	/* Current data handle */

	/*
	 * Each session keeps a cache of data handles. The set of handles can
	 * grow quite large so we maintain both a simple list and a hash table
	 * of lists. The hash table key is based on a hash of the data handle's
	 * URI.  The hash table list is kept in allocated memory that lives
	 * across session close - so it is declared further down.
	 */
					/* Session handle reference list */
	TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles;
	time_t last_sweep;		/* Last sweep for dead handles */
	struct timespec last_epoch;	/* Last epoch time returned */

	WT_CURSOR_LIST cursors;		/* Cursors closed with the session */
	uint32_t cursor_sweep_position;	/* Position in cursor_cache for sweep */
	uint32_t cursor_sweep_countdown;/* Countdown to cursor sweep */
	time_t last_cursor_sweep;	/* Last sweep for dead cursors */

	WT_CURSOR_BACKUP *bkp_cursor;	/* Hot backup cursor */

	WT_COMPACT_STATE *compact;	/* Compaction information */
	enum { WT_COMPACT_NONE=0,
	    WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;

	WT_CURSOR	*las_cursor;	/* Lookaside table cursor */

	WT_CURSOR *meta_cursor;		/* Metadata file */
	void	  *meta_track;		/* Metadata operation tracking */
	void	  *meta_track_next;	/* Current position */
	void	  *meta_track_sub;	/* Child transaction / save point */
	size_t	   meta_track_alloc;	/* Currently allocated */
	int	   meta_track_nest;	/* Nesting level of meta transaction */
#define	WT_META_TRACKING(session)	((session)->meta_track_next != NULL)

	/* Current rwlock for callback. */
	WT_RWLOCK *current_rwlock;
	uint8_t current_rwticket;

	WT_ITEM	**scratch;		/* Temporary memory for any function */
	u_int	  scratch_alloc;	/* Currently allocated */
	size_t	  scratch_cached;	/* Scratch bytes cached */
#ifdef HAVE_DIAGNOSTIC
	/*
	 * Variables used to look for violations of the contract that a
	 * session is only used by a single session at once.
	 */
	volatile uintmax_t api_tid;
	volatile uint32_t api_enter_refcnt;
	/*
	 * It's hard to figure out from where a buffer was allocated after it's
	 * leaked, so in diagnostic mode we track them; DIAGNOSTIC can't simply
	 * add additional fields to WT_ITEM structures because they are visible
	 * to applications, create a parallel structure instead.
	 */
	struct __wt_scratch_track {
		const char *file;	/* Allocating file, line */
		int line;
	} *scratch_track;
#endif

	WT_ITEM err;			/* Error buffer */

	WT_TXN_ISOLATION isolation;
	WT_TXN	txn;			/* Transaction state */
#define	WT_SESSION_BG_SYNC_MSEC		1200000
	WT_LSN	bg_sync_lsn;		/* Background sync operation LSN. */
	u_int	ncursors;		/* Count of active file cursors. */

	void	*block_manager;		/* Block-manager support */
	int	(*block_manager_cleanup)(WT_SESSION_IMPL *);

					/* Checkpoint handles */
	WT_DATA_HANDLE **ckpt_handle;	/* Handle list */
	u_int   ckpt_handle_next;	/* Next empty slot */
	size_t  ckpt_handle_allocated;	/* Bytes allocated */

	/*
	 * Operations acting on handles.
	 *
	 * The preferred pattern is to gather all of the required handles at
	 * the beginning of an operation, then drop any other locks, perform
	 * the operation, then release the handles.  This cannot be easily
	 * merged with the list of checkpoint handles because some operations
	 * (such as compact) do checkpoints internally.
	 */
	WT_DATA_HANDLE **op_handle;	/* Handle list */
	u_int   op_handle_next;		/* Next empty slot */
	size_t  op_handle_allocated;	/* Bytes allocated */

	void	*reconcile;		/* Reconciliation support */
	int	(*reconcile_cleanup)(WT_SESSION_IMPL *);

	/* Sessions have an associated statistics bucket based on its ID. */
	u_int	stat_bucket;		/* Statistics bucket offset */

/* AUTOMATIC FLAG VALUE GENERATION START */
#define	WT_SESSION_CACHE_CURSORS		0x000001u
#define	WT_SESSION_CAN_WAIT			0x000002u
#define	WT_SESSION_IGNORE_CACHE_SIZE		0x000004u
#define	WT_SESSION_INTERNAL			0x000008u
#define	WT_SESSION_LOCKED_CHECKPOINT		0x000010u
#define	WT_SESSION_LOCKED_HANDLE_LIST_READ	0x000020u
#define	WT_SESSION_LOCKED_HANDLE_LIST_WRITE	0x000040u
#define	WT_SESSION_LOCKED_METADATA		0x000080u
#define	WT_SESSION_LOCKED_PASS			0x000100u
#define	WT_SESSION_LOCKED_SCHEMA		0x000200u
#define	WT_SESSION_LOCKED_SLOT			0x000400u
#define	WT_SESSION_LOCKED_TABLE_READ		0x000800u
#define	WT_SESSION_LOCKED_TABLE_WRITE		0x001000u
#define	WT_SESSION_LOCKED_TURTLE		0x002000u
#define	WT_SESSION_LOGGING_INMEM		0x004000u
#define	WT_SESSION_LOOKASIDE_CURSOR		0x008000u
#define	WT_SESSION_NO_DATA_HANDLES		0x010000u
#define	WT_SESSION_NO_LOGGING			0x020000u
#define	WT_SESSION_NO_RECONCILE			0x040000u
#define	WT_SESSION_NO_SCHEMA_LOCK		0x080000u
#define	WT_SESSION_QUIET_CORRUPT_FILE		0x100000u
#define	WT_SESSION_READ_WONT_NEED		0x200000u
#define	WT_SESSION_SCHEMA_TXN			0x400000u
#define	WT_SESSION_SERVER_ASYNC			0x800000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
	uint32_t flags;

	/*
	 * All of the following fields live at the end of the structure so it's
	 * easier to clear everything but the fields that persist.
	 */
#define	WT_SESSION_CLEAR_SIZE	(offsetof(WT_SESSION_IMPL, rnd))

	/*
	 * The random number state persists past session close because we don't
	 * want to repeatedly use the same values for skiplist depth when the
	 * application isn't caching sessions.
	 */
	WT_RAND_STATE rnd;		/* Random number generation state */

	/*
	 * Hash tables are allocated lazily as sessions are used to keep the
	 * size of this structure from growing too large.
	 */
	WT_CURSOR_LIST *cursor_cache;	/* Hash table of cached cursors */

					/* Hashed handle reference list array */
	TAILQ_HEAD(__dhandles_hash, __wt_data_handle_cache) *dhhash;

					/* Generations manager */
#define	WT_GEN_CHECKPOINT	0	/* Checkpoint generation */
#define	WT_GEN_EVICT		1	/* Eviction generation */
#define	WT_GEN_HAZARD		2	/* Hazard pointer */
#define	WT_GEN_SPLIT		3	/* Page splits */
#define	WT_GENERATIONS		4	/* Total generation manager entries */
	volatile uint64_t generations[WT_GENERATIONS];

	/*
	 * Session memory persists past session close because it's accessed by
	 * threads of control other than the thread owning the session. For
	 * example, btree splits and hazard pointers can "free" memory that's
	 * still in use. In order to eventually free it, it's stashed here with
	 * with its generation number; when no thread is reading in generation,
	 * the memory can be freed for real.
	 */
	struct __wt_session_stash {
		struct __wt_stash {
			void	*p;	/* Memory, length */
			size_t	 len;
			uint64_t gen;	/* Generation */
		} *list;
		size_t  cnt;		/* Array entries */
		size_t  alloc;		/* Allocated bytes */
	} stash[WT_GENERATIONS];

	/*
	 * Hazard pointers.
	 *
	 * Hazard information persists past session close because it's accessed
	 * by threads of control other than the thread owning the session.
	 *
	 * Use the non-NULL state of the hazard field to know if the session has
	 * previously been initialized.
	 */
#define	WT_SESSION_FIRST_USE(s)						\
	((s)->hazard == NULL)

	/*
	 * The hazard pointer array grows as necessary, initialize with 250
	 * slots.
	 */
#define	WT_SESSION_INITIAL_HAZARD_SLOTS	250
	uint32_t   hazard_size;		/* Hazard pointer array slots */
	uint32_t   hazard_inuse;	/* Hazard pointer array slots in-use */
	uint32_t   nhazard;		/* Count of active hazard pointers */
	WT_HAZARD *hazard;		/* Hazard pointer array */

	/*
	 * Operation tracking.
	 */
	WT_OPTRACK_RECORD *optrack_buf;
	u_int optrackbuf_ptr;
	uint64_t optrack_offset;
	WT_FH *optrack_fh;
};