1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
|
/*-
* Copyright (c) 2014-2017 MongoDB, Inc.
* Copyright (c) 2008-2014 WiredTiger, Inc.
* All rights reserved.
*
* See the file LICENSE for redistribution information.
*/
/*
* Default hash table size; we don't need a prime number of buckets
* because we always use a good hash function.
*/
#define WT_HASH_ARRAY_SIZE 512
/*******************************************
* Global per-process structure.
*******************************************/
/*
* WT_PROCESS --
* Per-process information for the library.
*/
struct __wt_process {
WT_SPINLOCK spinlock; /* Per-process spinlock */
/* Locked: connection queue */
TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh;
WT_CACHE_POOL *cache_pool;
/* Checksum function */
#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len)
uint32_t (*checksum)(const void *, size_t);
};
extern WT_PROCESS __wt_process;
/*
* WT_KEYED_ENCRYPTOR --
* An list entry for an encryptor with a unique (name, keyid).
*/
struct __wt_keyed_encryptor {
const char *keyid; /* Key id of encryptor */
int owned; /* Encryptor needs to be terminated */
size_t size_const; /* The result of the sizing callback */
WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
/* Linked list of encryptors */
TAILQ_ENTRY(__wt_keyed_encryptor) hashq;
TAILQ_ENTRY(__wt_keyed_encryptor) q;
};
/*
* WT_NAMED_COLLATOR --
* A collator list entry
*/
struct __wt_named_collator {
const char *name; /* Name of collator */
WT_COLLATOR *collator; /* User supplied object */
TAILQ_ENTRY(__wt_named_collator) q; /* Linked list of collators */
};
/*
* WT_NAMED_COMPRESSOR --
* A compressor list entry
*/
struct __wt_named_compressor {
const char *name; /* Name of compressor */
WT_COMPRESSOR *compressor; /* User supplied callbacks */
/* Linked list of compressors */
TAILQ_ENTRY(__wt_named_compressor) q;
};
/*
* WT_NAMED_DATA_SOURCE --
* A data source list entry
*/
struct __wt_named_data_source {
const char *prefix; /* Name of data source */
WT_DATA_SOURCE *dsrc; /* User supplied callbacks */
/* Linked list of data sources */
TAILQ_ENTRY(__wt_named_data_source) q;
};
/*
* WT_NAMED_ENCRYPTOR --
* An encryptor list entry
*/
struct __wt_named_encryptor {
const char *name; /* Name of encryptor */
WT_ENCRYPTOR *encryptor; /* User supplied callbacks */
/* Locked: list of encryptors by key */
TAILQ_HEAD(__wt_keyedhash, __wt_keyed_encryptor)
keyedhashqh[WT_HASH_ARRAY_SIZE];
TAILQ_HEAD(__wt_keyed_qh, __wt_keyed_encryptor) keyedqh;
/* Linked list of encryptors */
TAILQ_ENTRY(__wt_named_encryptor) q;
};
/*
* WT_NAMED_EXTRACTOR --
* An extractor list entry
*/
struct __wt_named_extractor {
const char *name; /* Name of extractor */
WT_EXTRACTOR *extractor; /* User supplied object */
TAILQ_ENTRY(__wt_named_extractor) q; /* Linked list of extractors */
};
/*
* Allocate some additional slots for internal sessions so the user cannot
* configure too few sessions for us to run.
*/
#define WT_EXTRA_INTERNAL_SESSIONS 20
/*
* WT_CONN_CHECK_PANIC --
* Check if we've panicked and return the appropriate error.
*/
#define WT_CONN_CHECK_PANIC(conn) \
(F_ISSET(conn, WT_CONN_PANIC) ? WT_PANIC : 0)
#define WT_SESSION_CHECK_PANIC(session) \
WT_CONN_CHECK_PANIC(S2C(session))
/*
* Macros to ensure the dhandle is inserted or removed from both the
* main queue and the hashed queue.
*/
#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \
WT_ASSERT(session, \
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
++(conn)->dhandle_count; \
} while (0)
#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \
WT_ASSERT(session, \
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
--(conn)->dhandle_count; \
} while (0)
/*
* Macros to ensure the block is inserted or removed from both the
* main queue and the hashed queue.
*/
#define WT_CONN_BLOCK_INSERT(conn, block, bucket) do { \
TAILQ_INSERT_HEAD(&(conn)->blockqh, block, q); \
TAILQ_INSERT_HEAD(&(conn)->blockhash[bucket], block, hashq); \
} while (0)
#define WT_CONN_BLOCK_REMOVE(conn, block, bucket) do { \
TAILQ_REMOVE(&(conn)->blockqh, block, q); \
TAILQ_REMOVE(&(conn)->blockhash[bucket], block, hashq); \
} while (0)
/*
* WT_CONNECTION_IMPL --
* Implementation of WT_CONNECTION
*/
struct __wt_connection_impl {
WT_CONNECTION iface;
/* For operations without an application-supplied session */
WT_SESSION_IMPL *default_session;
WT_SESSION_IMPL dummy_session;
const char *cfg; /* Connection configuration */
WT_SPINLOCK api_lock; /* Connection API spinlock */
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_RWLOCK table_lock; /* Table list lock */
WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
WT_RWLOCK dhandle_lock; /* Data handle list lock */
/* Connection queue */
TAILQ_ENTRY(__wt_connection_impl) q;
/* Cache pool queue */
TAILQ_ENTRY(__wt_connection_impl) cpq;
const char *home; /* Database home */
const char *error_prefix; /* Database error prefix */
int is_new; /* Connection created database */
uint16_t compat_major; /* Compatibility major version */
uint16_t compat_minor; /* Compatibility minor version */
WT_EXTENSION_API extension_api; /* Extension API */
/* Configuration */
const WT_CONFIG_ENTRY **config_entries;
void **foc; /* Free-on-close array */
size_t foc_cnt; /* Array entries */
size_t foc_size; /* Array size */
WT_FH *lock_fh; /* Lock file handle */
/*
* The connection keeps a cache of data handles. The set of handles
* can grow quite large so we maintain both a simple list and a hash
* table of lists. The hash table key is based on a hash of the table
* URI.
*/
/* Locked: data handle hash array */
TAILQ_HEAD(__wt_dhhash, __wt_data_handle) dhhash[WT_HASH_ARRAY_SIZE];
/* Locked: data handle list */
TAILQ_HEAD(__wt_dhandle_qh, __wt_data_handle) dhqh;
/* Locked: LSM handle list. */
TAILQ_HEAD(__wt_lsm_qh, __wt_lsm_tree) lsmqh;
/* Locked: file list */
TAILQ_HEAD(__wt_fhhash, __wt_fh) fhhash[WT_HASH_ARRAY_SIZE];
TAILQ_HEAD(__wt_fh_qh, __wt_fh) fhqh;
/* Locked: library list */
TAILQ_HEAD(__wt_dlh_qh, __wt_dlh) dlhqh;
WT_SPINLOCK block_lock; /* Locked: block manager list */
TAILQ_HEAD(__wt_blockhash, __wt_block) blockhash[WT_HASH_ARRAY_SIZE];
TAILQ_HEAD(__wt_block_qh, __wt_block) blockqh;
u_int dhandle_count; /* Locked: handles in the queue */
u_int open_btree_count; /* Locked: open writable btree count */
uint32_t next_file_id; /* Locked: file ID counter */
uint32_t open_file_count; /* Atomic: open file handle count */
uint32_t open_cursor_count; /* Atomic: open cursor handle count */
/*
* WiredTiger allocates space for 50 simultaneous sessions (threads of
* control) by default. Growing the number of threads dynamically is
* possible, but tricky since server threads are walking the array
* without locking it.
*
* There's an array of WT_SESSION_IMPL pointers that reference the
* allocated array; we do it that way because we want an easy way for
* the server thread code to avoid walking the entire array when only a
* few threads are running.
*/
WT_SESSION_IMPL *sessions; /* Session reference */
uint32_t session_size; /* Session array size */
uint32_t session_cnt; /* Session count */
size_t session_scratch_max; /* Max scratch memory per session */
WT_CACHE *cache; /* Page cache */
volatile uint64_t cache_size; /* Cache size (either statically
configured or the current size
within a cache pool). */
WT_TXN_GLOBAL txn_global; /* Global transaction state */
WT_RWLOCK hot_backup_lock; /* Hot backup serialization */
bool hot_backup; /* Hot backup in progress */
char **hot_backup_list; /* Hot backup file list */
WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
wt_thread_t ckpt_tid; /* Checkpoint thread */
bool ckpt_tid_set; /* Checkpoint thread set */
WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */
#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0)
wt_off_t ckpt_logsize; /* Checkpoint log size period */
bool ckpt_signalled;/* Checkpoint signalled */
uint64_t ckpt_usecs; /* Checkpoint timer */
uint64_t ckpt_time_max; /* Checkpoint time min/max */
uint64_t ckpt_time_min;
uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
uint64_t ckpt_time_total;
uint32_t stat_flags; /* Options declared in flags.py */
/* Connection statistics */
WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS];
WT_CONNECTION_STATS *stat_array;
WT_ASYNC *async; /* Async structure */
bool async_cfg; /* Global async configuration */
uint32_t async_size; /* Async op array size */
uint32_t async_workers; /* Number of async workers */
WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
bool evict_server_running;/* Eviction server operating */
WT_THREAD_GROUP evict_threads;
uint32_t evict_threads_max;/* Max eviction threads */
uint32_t evict_threads_min;/* Min eviction threads */
uint32_t evict_tune_datapts_needed;/* Data needed to tune */
struct timespec evict_tune_last_action_time;/* Time of last action */
struct timespec evict_tune_last_time; /* Time of last check */
uint32_t evict_tune_num_points; /* Number of values tried */
uint64_t evict_tune_pgs_last; /* Number of pages evicted */
uint64_t evict_tune_pg_sec_max; /* Max throughput encountered */
bool evict_tune_stable; /* Are we stable? */
uint32_t evict_tune_workers_best;/* Best performing value */
#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H"
WT_SESSION_IMPL *stat_session; /* Statistics log session */
wt_thread_t stat_tid; /* Statistics log thread */
bool stat_tid_set; /* Statistics log thread set */
WT_CONDVAR *stat_cond; /* Statistics log wait mutex */
const char *stat_format; /* Statistics log timestamp format */
WT_FSTREAM *stat_fs; /* Statistics log stream */
/* Statistics log json table printing state flag */
bool stat_json_tables;
char *stat_path; /* Statistics log path format */
char **stat_sources; /* Statistics log list of objects */
const char *stat_stamp; /* Statistics log entry timestamp */
uint64_t stat_usecs; /* Statistics log period */
#define WT_CONN_LOG_ARCHIVE 0x001 /* Archive is enabled */
#define WT_CONN_LOG_DOWNGRADED 0x002 /* Running older version */
#define WT_CONN_LOG_ENABLED 0x004 /* Logging is enabled */
#define WT_CONN_LOG_EXISTED 0x008 /* Log files found */
#define WT_CONN_LOG_FORCE_DOWNGRADE 0x010 /* Force downgrade */
#define WT_CONN_LOG_RECOVER_DIRTY 0x020 /* Recovering unclean */
#define WT_CONN_LOG_RECOVER_DONE 0x040 /* Recovery completed */
#define WT_CONN_LOG_RECOVER_ERR 0x080 /* Error if recovery required */
#define WT_CONN_LOG_ZERO_FILL 0x100 /* Manually zero files */
uint32_t log_flags; /* Global logging configuration */
WT_CONDVAR *log_cond; /* Log server wait mutex */
WT_SESSION_IMPL *log_session; /* Log server session */
wt_thread_t log_tid; /* Log server thread */
bool log_tid_set; /* Log server thread set */
WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */
WT_SESSION_IMPL *log_file_session;/* Log file thread session */
wt_thread_t log_file_tid; /* Log file thread */
bool log_file_tid_set;/* Log file thread set */
WT_CONDVAR *log_wrlsn_cond;/* Log write lsn thread wait mutex */
WT_SESSION_IMPL *log_wrlsn_session;/* Log write lsn thread session */
wt_thread_t log_wrlsn_tid; /* Log write lsn thread */
bool log_wrlsn_tid_set;/* Log write lsn thread set */
WT_LOG *log; /* Logging structure */
WT_COMPRESSOR *log_compressor;/* Logging compressor */
uint32_t log_cursors; /* Log cursor count */
wt_off_t log_file_max; /* Log file max size */
const char *log_path; /* Logging path format */
uint32_t log_prealloc; /* Log file pre-allocation */
uint32_t txn_logsync; /* Log sync configuration */
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
/*
* Is there a data/schema change that needs to be the part of a
* checkpoint.
*/
bool modified;
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
int sweep_tid_set; /* Handle sweep thread set */
WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */
uint64_t sweep_idle_time; /* Handle sweep idle time */
uint64_t sweep_interval; /* Handle sweep interval */
uint64_t sweep_handles_min;/* Handle sweep minimum open */
/*
* Shared lookaside lock, session and cursor, used by threads accessing
* the lookaside table (other than eviction server and worker threads
* and the sweep thread, all of which have their own lookaside cursors).
*/
WT_SPINLOCK las_lock; /* Lookaside table spinlock */
WT_SESSION_IMPL *las_session; /* Lookaside table session */
uint32_t las_fileid; /* Lookaside table file ID */
/*
* The "lookaside_activity" verbose messages are throttled to once per
* checkpoint. To accomplish this we track the checkpoint generation
* for the most recent read and write verbose messages.
*/
uint64_t las_verb_gen_read;
uint64_t las_verb_gen_write;
/* Set of btree IDs not being rolled back */
uint8_t *stable_rollback_bitstring;
uint32_t stable_rollback_maxfile;
/* Locked: collator list */
TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh;
/* Locked: compressor list */
TAILQ_HEAD(__wt_comp_qh, __wt_named_compressor) compqh;
/* Locked: data source list */
TAILQ_HEAD(__wt_dsrc_qh, __wt_named_data_source) dsrcqh;
/* Locked: encryptor list */
WT_SPINLOCK encryptor_lock; /* Encryptor list lock */
TAILQ_HEAD(__wt_encrypt_qh, __wt_named_encryptor) encryptqh;
/* Locked: extractor list */
TAILQ_HEAD(__wt_extractor_qh, __wt_named_extractor) extractorqh;
void *lang_private; /* Language specific private storage */
/* If non-zero, all buffers used for I/O will be aligned to this. */
size_t buffer_alignment;
uint64_t stashed_bytes; /* Atomic: stashed memory statistics */
uint64_t stashed_objects;
/* Generations manager */
volatile uint64_t generations[WT_GENERATIONS];
wt_off_t data_extend_len; /* file_extend data length */
wt_off_t log_extend_len; /* file_extend log length */
#define WT_DIRECT_IO_CHECKPOINT 0x01 /* Checkpoints */
#define WT_DIRECT_IO_DATA 0x02 /* Data files */
#define WT_DIRECT_IO_LOG 0x04 /* Log files */
uint32_t direct_io; /* O_DIRECT, FILE_FLAG_NO_BUFFERING */
uint32_t write_through; /* FILE_FLAG_WRITE_THROUGH */
bool mmap; /* mmap configuration */
int page_size; /* OS page size for mmap alignment */
uint32_t verbose;
/*
* Variable with flags for which subsystems the diagnostic stress timing
* delays have been requested.
*/
uint32_t timing_stress_flags;
#define WT_STDERR(s) (&S2C(s)->wt_stderr)
#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
WT_FSTREAM wt_stderr, wt_stdout;
/*
* File system interface abstracted to support alternative file system
* implementations.
*/
WT_FILE_SYSTEM *file_system;
uint32_t flags;
};
|