1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
/*-
* Copyright (c) 2014-2015 MongoDB, Inc.
* Copyright (c) 2008-2014 WiredTiger, Inc.
* All rights reserved.
*
* See the file LICENSE for redistribution information.
*/
#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */
#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */
#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */
/* Logging subsystem declarations. */
#define WT_LOG_ALIGN 128
#define WT_LOG_SLOT_BUF_INIT_SIZE 64 * 1024
#define WT_INIT_LSN(l) do { \
(l)->file = 1; \
(l)->offset = 0; \
} while (0)
#define WT_MAX_LSN(l) do { \
(l)->file = UINT32_MAX; \
(l)->offset = INT64_MAX; \
} while (0)
#define WT_ZERO_LSN(l) do { \
(l)->file = 0; \
(l)->offset = 0; \
} while (0)
#define WT_IS_INIT_LSN(l) \
((l)->file == 1 && (l)->offset == 0)
#define WT_IS_MAX_LSN(l) \
((l)->file == UINT32_MAX && (l)->offset == INT64_MAX)
/*
* Both of the macros below need to change if the content of __wt_lsn
* ever changes. The value is the following:
* txnid, record type, operation type, file id, operation key, operation value
*/
#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(IqI)
#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
#define WT_LOG_SKIP_HEADER(data) \
((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record))
#define WT_LOG_REC_SIZE(size) \
((size) - offsetof(WT_LOG_RECORD, record))
/*
* Compare 2 LSNs, return -1 if lsn0 < lsn1, 0 if lsn0 == lsn1
* and 1 if lsn0 > lsn1.
*/
#define WT_LOG_CMP(lsn1, lsn2) \
((lsn1)->file != (lsn2)->file ? \
((lsn1)->file < (lsn2)->file ? -1 : 1) : \
((lsn1)->offset != (lsn2)->offset ? \
((lsn1)->offset < (lsn2)->offset ? -1 : 1) : 0))
/*
* Possible values for the consolidation array slot states:
* (NOTE: Any new states must be > WT_LOG_SLOT_DONE and < WT_LOG_SLOT_READY.)
*
* < WT_LOG_SLOT_DONE - threads are actively writing to the log.
* WT_LOG_SLOT_DONE - all activity on this slot is complete.
* WT_LOG_SLOT_FREE - slot is available for allocation.
* WT_LOG_SLOT_PENDING - slot is transitioning from ready to active.
* WT_LOG_SLOT_WRITTEN - slot is written and should be processed by worker.
* WT_LOG_SLOT_READY - slot is ready for threads to join.
* > WT_LOG_SLOT_READY - threads are actively consolidating on this slot.
*
* The slot state must be volatile: threads loop checking the state and can't
* cache the first value they see.
*/
#define WT_LOG_SLOT_DONE 0
#define WT_LOG_SLOT_FREE 1
#define WT_LOG_SLOT_PENDING 2
#define WT_LOG_SLOT_WRITTEN 3
#define WT_LOG_SLOT_READY 4
typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
volatile int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
#define WT_SLOT_INVALID_INDEX 0xffffffff
uint32_t slot_index; /* Active slot index */
wt_off_t slot_start_offset; /* Starting file offset */
WT_LSN slot_release_lsn; /* Slot release LSN */
WT_LSN slot_start_lsn; /* Slot starting LSN */
WT_LSN slot_end_lsn; /* Slot ending LSN */
WT_FH *slot_fh; /* File handle for this group */
WT_ITEM slot_buf; /* Buffer for grouped writes */
int32_t slot_churn; /* Active slots are scarce. */
#define WT_SLOT_BUF_GROW 0x01 /* Grow buffer on release */
#define WT_SLOT_BUFFERED 0x02 /* Buffer writes */
#define WT_SLOT_CLOSEFH 0x04 /* Close old fh on release */
#define WT_SLOT_SYNC 0x08 /* Needs sync on release */
#define WT_SLOT_SYNC_DIR 0x10 /* Directory sync on release */
uint32_t flags; /* Flags */
} WT_LOGSLOT;
#define WT_SLOT_INIT_FLAGS (WT_SLOT_BUFFERED)
typedef struct {
WT_LOGSLOT *slot;
wt_off_t offset;
} WT_MYSLOT;
/* Offset of first record */
#define WT_LOG_FIRST_RECORD log->allocsize
typedef struct {
uint32_t allocsize; /* Allocation alignment size */
wt_off_t log_written; /* Amount of log written this period */
/*
* Log file information
*/
uint32_t fileid; /* Current log file number */
uint32_t prep_fileid; /* Pre-allocated file number */
uint32_t prep_missed; /* Pre-allocated file misses */
WT_FH *log_fh; /* Logging file handle */
WT_FH *log_close_fh; /* Logging file handle to close */
WT_FH *log_dir_fh; /* Log directory file handle */
/*
* System LSNs
*/
WT_LSN alloc_lsn; /* Next LSN for allocation */
WT_LSN bg_sync_lsn; /* Latest background sync LSN */
WT_LSN ckpt_lsn; /* Last checkpoint LSN */
WT_LSN first_lsn; /* First LSN */
WT_LSN sync_dir_lsn; /* LSN of the last directory sync */
WT_LSN sync_lsn; /* LSN of the last sync */
WT_LSN trunc_lsn; /* End LSN for recovery truncation */
WT_LSN write_lsn; /* End of last LSN written */
WT_LSN write_start_lsn;/* Beginning of last LSN written */
/*
* Synchronization resources
*/
WT_SPINLOCK log_lock; /* Locked: Logging fields */
WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */
WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */
WT_RWLOCK *log_archive_lock; /* Archive and log cursors */
/* Notify any waiting threads when sync_lsn is updated. */
WT_CONDVAR *log_sync_cond;
/* Notify any waiting threads when write_lsn is updated. */
WT_CONDVAR *log_write_cond;
/*
* Consolidation array information
* WT_SLOT_ACTIVE must be less than WT_SLOT_POOL.
* Our testing shows that the more consolidation we generate the
* better the performance we see which equates to an active slot
* slot count of one.
*/
#define WT_SLOT_ACTIVE 1
#define WT_SLOT_POOL 16
uint32_t pool_index; /* Global pool index */
WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */
WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
} WT_LOG;
typedef struct {
uint32_t len; /* 00-03: Record length including hdr */
uint32_t checksum; /* 04-07: Checksum of the record */
#define WT_LOG_RECORD_COMPRESSED 0x01 /* Compressed except hdr */
#define WT_LOG_RECORD_ENCRYPTED 0x02 /* Encrypted except hdr */
uint16_t flags; /* 08-09: Flags */
uint8_t unused[2]; /* 10-11: Padding */
uint32_t mem_len; /* 12-15: Uncompressed len if needed */
uint8_t record[0]; /* Beginning of actual data */
} WT_LOG_RECORD;
/*
* WT_LOG_DESC --
* The log file's description.
*/
struct __wt_log_desc {
#define WT_LOG_MAGIC 0x101064
uint32_t log_magic; /* 00-03: Magic number */
#define WT_LOG_MAJOR_VERSION 1
uint16_t majorv; /* 04-05: Major version */
#define WT_LOG_MINOR_VERSION 0
uint16_t minorv; /* 06-07: Minor version */
uint64_t log_size; /* 08-15: Log file size */
};
/*
* WT_LOG_REC_DESC --
* A descriptor for a log record type.
*/
struct __wt_log_rec_desc {
const char *fmt;
int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
/*
* WT_LOG_OP_DESC --
* A descriptor for a log operation type.
*/
struct __wt_log_op_desc {
const char *fmt;
int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
|