1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
/*-
* Copyright (c) 2008-2014 WiredTiger, Inc.
* All rights reserved.
*
* See the file LICENSE for redistribution information.
*/
#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */
/* Logging subsystem declarations. */
#define LOG_ALIGN 128
#define WT_LOG_SLOT_BUF_INIT_SIZE 64 * 1024
#define INIT_LSN(l) do { \
(l)->file = 1; \
(l)->offset = 0; \
} while (0)
#define IS_INIT_LSN(l) ((l)->file == 1 && (l)->offset == 0)
/*
* Both of the macros below need to change if the content of __wt_lsn
* ever changes. The value is the following:
* txnid, record type, operation type, file id, operation key, operation value
*/
#define LOGC_KEY_FORMAT WT_UNCHECKED_STRING(IqI)
#define LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
#define LOG_SKIP_HEADER(data) \
((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record))
#define LOG_REC_SIZE(size) \
((size) - offsetof(WT_LOG_RECORD, record))
#define MAX_LSN(l) do { \
(l)->file = UINT32_MAX; \
(l)->offset = INT64_MAX; \
} while (0)
/*
* Compare 2 LSNs, return -1 if lsn0 < lsn1, 0 if lsn0 == lsn1
* and 1 if lsn0 > lsn1.
*/
#define LOG_CMP(lsn1, lsn2) \
((lsn1)->file != (lsn2)->file ? \
((lsn1)->file < (lsn2)->file ? -1 : 1) : \
((lsn1)->offset != (lsn2)->offset ? \
((lsn1)->offset < (lsn2)->offset ? -1 : 1) : 0))
/*
* Possible values for the consolidation array slot states:
* < WT_LOG_SLOT_DONE - threads are actively writing to the log.
* WT_LOG_SLOT_DONE - all activity on this slot is complete.
* WT_LOG_SLOT_FREE - slot is available for allocation.
* WT_LOG_SLOT_PENDING - slot is transitioning from ready to active.
* WT_LOG_SLOT_READY - slot is ready for threads to join.
* > WT_LOG_SLOT_READY - threads are actively consolidating on this slot.
*/
#define WT_LOG_SLOT_DONE 0
#define WT_LOG_SLOT_FREE 1
#define WT_LOG_SLOT_PENDING 2
#define WT_LOG_SLOT_READY 3
typedef struct {
int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
#define SLOT_INVALID_INDEX 0xffffffff
uint32_t slot_index; /* Active slot index */
wt_off_t slot_start_offset; /* Starting file offset */
WT_LSN slot_release_lsn; /* Slot release LSN */
WT_LSN slot_start_lsn; /* Slot starting LSN */
WT_LSN slot_end_lsn; /* Slot ending LSN */
WT_FH *slot_fh; /* File handle for this group */
WT_ITEM slot_buf; /* Buffer for grouped writes */
int32_t slot_churn; /* Active slots are scarce. */
#define SLOT_BUF_GROW 0x01 /* Grow buffer on release */
#define SLOT_BUFFERED 0x02 /* Buffer writes */
#define SLOT_CLOSEFH 0x04 /* Close old fh on release */
#define SLOT_SYNC 0x08 /* Needs sync on release */
uint32_t flags; /* Flags */
} WT_LOGSLOT WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
typedef struct {
WT_LOGSLOT *slot;
wt_off_t offset;
} WT_MYSLOT;
/* Offset of first record */
#define LOG_FIRST_RECORD log->allocsize
typedef struct {
uint32_t allocsize; /* Allocation alignment size */
wt_off_t log_written; /* Amount of log written this period */
/*
* Log file information
*/
uint32_t fileid; /* Current log file number */
WT_FH *log_fh; /* Logging file handle */
WT_FH *log_close_fh; /* Logging file handle to close */
/*
* System LSNs
*/
WT_LSN alloc_lsn; /* Next LSN for allocation */
WT_LSN ckpt_lsn; /* Last checkpoint LSN */
WT_LSN first_lsn; /* First LSN */
WT_LSN sync_lsn; /* LSN of the last sync */
WT_LSN trunc_lsn; /* End LSN for recovery truncation */
WT_LSN write_lsn; /* Last LSN written to log file */
/*
* Synchronization resources
*/
WT_SPINLOCK log_lock; /* Locked: Logging fields */
WT_SPINLOCK log_slot_lock; /* Locked: Consolidation array */
WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */
WT_RWLOCK *log_archive_lock; /* Archive and log cursors */
/* Notify any waiting threads when sync_lsn is updated. */
WT_CONDVAR *log_sync_cond;
/*
* Consolidation array information
* SLOT_ACTIVE must be less than SLOT_POOL.
* Our testing shows that the more consolidation we generate the
* better the performance we see which equates to an active slot
* slot count of one.
*/
#define SLOT_ACTIVE 1
#define SLOT_POOL 16
uint32_t pool_index; /* Global pool index */
WT_LOGSLOT *slot_array[SLOT_ACTIVE]; /* Active slots */
WT_LOGSLOT slot_pool[SLOT_POOL]; /* Pool of all slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
} WT_LOG;
typedef struct {
uint32_t len; /* 00-03: Record length including hdr */
uint32_t checksum; /* 04-07: Checksum of the record */
uint8_t unused[8]; /* 08-15: Padding */
uint8_t record[0]; /* Beginning of actual data */
} WT_LOG_RECORD;
/*
* WT_LOG_DESC --
* The log file's description.
*/
struct __wt_log_desc {
#define WT_LOG_MAGIC 0x101064
uint32_t log_magic; /* 00-03: Magic number */
#define WT_LOG_MAJOR_VERSION 1
uint16_t majorv; /* 04-05: Major version */
#define WT_LOG_MINOR_VERSION 0
uint16_t minorv; /* 06-07: Minor version */
uint64_t log_size; /* 08-15: Log file size */
};
/*
* WT_LOG_REC_DESC --
* A descriptor for a log record type.
*/
struct __wt_log_rec_desc {
const char *fmt;
int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
/*
* WT_LOG_OP_DESC --
* A descriptor for a log operation type.
*/
struct __wt_log_op_desc {
const char *fmt;
int (*print)(WT_SESSION_IMPL *session, uint8_t **pp, uint8_t *end);
};
|