1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
|
/* Copyright (c) 2013, Kristian Nielsen and MariaDB Services Ab.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef RPL_GTID_H
#define RPL_GTID_H
#include "hash.h"
#include "queues.h"
/* Definitions for MariaDB global transaction ID (GTID). */
extern const LEX_CSTRING rpl_gtid_slave_state_table_name;
class String;
#define PARAM_GTID(G) G.domain_id, G.server_id, G.seq_no
struct rpl_gtid
{
uint32 domain_id;
uint32 server_id;
uint64 seq_no;
};
inline bool operator==(const rpl_gtid& lhs, const rpl_gtid& rhs)
{
return
lhs.domain_id == rhs.domain_id &&
lhs.server_id == rhs.server_id &&
lhs.seq_no == rhs.seq_no;
};
enum enum_gtid_skip_type {
GTID_SKIP_NOT, GTID_SKIP_STANDALONE, GTID_SKIP_TRANSACTION
};
/*
Structure to keep track of threads waiting in MASTER_GTID_WAIT().
Since replication is (mostly) single-threaded, we want to minimise the
performance impact on that from MASTER_GTID_WAIT(). To achieve this, we
are careful to keep the common lock between replication threads and
MASTER_GTID_WAIT threads held for as short as possible. We keep only
a single thread waiting to be notified by the replication threads; this
thread then handles all the (potentially heavy) lifting of dealing with
all current waiting threads.
*/
struct gtid_waiting {
/* Elements in the hash, basically a priority queue for each domain. */
struct hash_element {
QUEUE queue;
uint32 domain_id;
};
/* A priority queue to handle waiters in one domain in seq_no order. */
struct queue_element {
uint64 wait_seq_no;
THD *thd;
int queue_idx;
/*
do_small_wait is true if we have responsibility for ensuring that there
is a small waiter.
*/
bool do_small_wait;
/*
The flag `done' is set when the wait is completed (either due to reaching
the position waited for, or due to timeout or kill). The queue_element
is in the queue if and only if `done' is true.
*/
bool done;
};
mysql_mutex_t LOCK_gtid_waiting;
HASH hash;
void init();
void destroy();
hash_element *get_entry(uint32 domain_id);
int wait_for_pos(THD *thd, String *gtid_str, longlong timeout_us);
void promote_new_waiter(gtid_waiting::hash_element *he);
int wait_for_gtid(THD *thd, rpl_gtid *wait_gtid, struct timespec *wait_until);
void process_wait_hash(uint64 wakeup_seq_no, gtid_waiting::hash_element *he);
int register_in_wait_queue(THD *thd, rpl_gtid *wait_gtid, hash_element *he,
queue_element *elem);
void remove_from_wait_queue(hash_element *he, queue_element *elem);
};
class Relay_log_info;
struct rpl_group_info;
class Gtid_list_log_event;
/*
Replication slave state.
For every independent replication stream (identified by domain_id), this
remembers the last gtid applied on the slave within this domain.
Since events are always committed in-order within a single domain, this is
sufficient to maintain the state of the replication slave.
*/
struct rpl_slave_state
{
/* Elements in the list of GTIDs kept for each domain_id. */
struct list_element
{
struct list_element *next;
uint64 sub_id;
uint64 seq_no;
uint32 server_id;
/*
hton of mysql.gtid_slave_pos* table used to record this GTID.
Can be NULL if the gtid table failed to load (eg. missing
mysql.gtid_slave_pos table following an upgrade).
*/
void *hton;
};
/* Elements in the HASH that hold the state for one domain_id. */
struct element
{
struct list_element *list;
uint32 domain_id;
/* Highest seq_no seen so far in this domain. */
uint64 highest_seq_no;
/*
If this is non-NULL, then it is the waiter responsible for the small
wait in MASTER_GTID_WAIT().
*/
gtid_waiting::queue_element *gtid_waiter;
/*
If gtid_waiter is non-NULL, then this is the seq_no that its
MASTER_GTID_WAIT() is waiting on. When we reach this seq_no, we need to
signal the waiter on COND_wait_gtid.
*/
uint64 min_wait_seq_no;
mysql_cond_t COND_wait_gtid;
/*
For --gtid-ignore-duplicates. The Relay_log_info that currently owns
this domain, and the number of worker threads that are active in it.
The idea is that only one of multiple master connections is allowed to
actively apply events for a given domain. Other connections must either
discard the events (if the seq_no in GTID shows they have already been
applied), or wait to see if the current owner will apply it.
*/
const Relay_log_info *owner_rli;
uint32 owner_count;
mysql_cond_t COND_gtid_ignore_duplicates;
list_element *grab_list() { list_element *l= list; list= NULL; return l; }
void add(list_element *l)
{
l->next= list;
list= l;
}
};
/* Descriptor for mysql.gtid_slave_posXXX table in specific engine. */
enum gtid_pos_table_state {
GTID_POS_AUTO_CREATE,
GTID_POS_CREATE_REQUESTED,
GTID_POS_CREATE_IN_PROGRESS,
GTID_POS_AVAILABLE
};
struct gtid_pos_table {
struct gtid_pos_table *next;
/*
Use a void * here, rather than handlerton *, to make explicit that we
are not using the value to access any functionality in the engine. It
is just used as an opaque value to identify which engine we are using
for each GTID row.
*/
void *table_hton;
LEX_CSTRING table_name;
uint8 state;
};
/* Mapping from domain_id to its element. */
HASH hash;
/* Mutex protecting access to the state. */
mysql_mutex_t LOCK_slave_state;
/* Auxiliary buffer to sort gtid list. */
DYNAMIC_ARRAY gtid_sort_array;
uint64 last_sub_id;
/*
List of tables available for durably storing the slave GTID position.
Accesses to this table is protected by LOCK_slave_state. However for
efficiency, there is also a provision for read access to it from a running
slave without lock.
An element can be added at the head of a list by storing the new
gtid_pos_tables pointer atomically with release semantics, to ensure that
the next pointer of the new element is visible to readers of the new list.
Other changes (like deleting or replacing elements) must happen only while
all SQL driver threads are stopped. LOCK_slave_state must be held in any
case.
The list can be read without lock by an SQL driver thread or worker thread
by reading the gtid_pos_tables pointer atomically with acquire semantics,
to ensure that it will see the correct next pointer of a new head element.
The type is struct gtid_pos_table *, but needs to be void * to allow using
my_atomic operations without violating C strict aliasing semantics.
*/
void * volatile gtid_pos_tables;
/* The default entry in gtid_pos_tables, mysql.gtid_slave_pos. */
void * volatile default_gtid_pos_table;
bool loaded;
rpl_slave_state();
~rpl_slave_state();
void truncate_hash();
ulong count() const { return hash.records; }
int update(uint32 domain_id, uint32 server_id, uint64 sub_id,
uint64 seq_no, void *hton, rpl_group_info *rgi);
int truncate_state_table(THD *thd);
void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
rpl_group_info *rgi, bool in_statement, void **out_hton);
uint64 next_sub_id(uint32 domain_id);
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra,
bool sort);
int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra);
bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid);
int load(THD *thd, const char *state_from_master, size_t len, bool reset,
bool in_statement);
bool is_empty();
element *get_element(uint32 domain_id);
int put_back_list(uint32 domain_id, list_element *list);
void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
rpl_group_info *rgi);
int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi);
int check_duplicate_gtid(rpl_gtid *gtid, rpl_group_info *rgi);
void release_domain_owner(rpl_group_info *rgi);
void set_gtid_pos_tables_list(gtid_pos_table *new_list,
gtid_pos_table *default_entry);
void add_gtid_pos_table(gtid_pos_table *entry);
struct gtid_pos_table *alloc_gtid_pos_table(LEX_CSTRING *table_name,
void *hton, rpl_slave_state::gtid_pos_table_state state);
void free_gtid_pos_tables(struct gtid_pos_table *list);
};
/*
Binlog state.
This keeps the last GTID written to the binlog for every distinct
(domain_id, server_id) pair.
This will be logged at the start of the next binlog file as a
Gtid_list_log_event; this way, it is easy to find the binlog file
containing a given GTID, by simply scanning backwards from the newest
one until a lower seq_no is found in the Gtid_list_log_event at the
start of a binlog for the given domain_id and server_id.
We also remember the last logged GTID for every domain_id. This is used
to know where to start when a master is changed to a slave. As a side
effect, it also allows to skip a hash lookup in the very common case of
logging a new GTID with same server id as last GTID.
*/
struct rpl_binlog_state
{
struct element {
uint32 domain_id;
HASH hash; /* Containing all server_id for one domain_id */
/* The most recent entry in the hash. */
rpl_gtid *last_gtid;
/* Counter to allocate next seq_no for this domain. */
uint64 seq_no_counter;
int update_element(const rpl_gtid *gtid);
};
/* Mapping from domain_id to collection of elements. */
HASH hash;
/* Mutex protecting access to the state. */
mysql_mutex_t LOCK_binlog_state;
my_bool initialized;
/* Auxiliary buffer to sort gtid list. */
DYNAMIC_ARRAY gtid_sort_array;
rpl_binlog_state() :initialized(0) {}
~rpl_binlog_state();
void init();
void reset_nolock();
void reset();
void free();
bool load(struct rpl_gtid *list, uint32 count);
bool load(rpl_slave_state *slave_pos);
int update_nolock(const struct rpl_gtid *gtid, bool strict);
int update(const struct rpl_gtid *gtid, bool strict);
int update_with_next_gtid(uint32 domain_id, uint32 server_id,
rpl_gtid *gtid);
int alloc_element_nolock(const rpl_gtid *gtid);
bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no);
int bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no);
int write_to_iocache(IO_CACHE *dest);
int read_from_iocache(IO_CACHE *src);
uint32 count();
int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size);
bool append_pos(String *str);
bool append_state(String *str);
rpl_gtid *find_nolock(uint32 domain_id, uint32 server_id);
rpl_gtid *find(uint32 domain_id, uint32 server_id);
rpl_gtid *find_most_recent(uint32 domain_id);
const char* drop_domain(DYNAMIC_ARRAY *ids, Gtid_list_log_event *glev, char*);
};
/*
Represent the GTID state that a slave connection to a master requests
the master to start sending binlog events from.
*/
struct slave_connection_state
{
struct entry {
rpl_gtid gtid;
uint32 flags;
};
/* Bits for 'flags' */
enum start_flags
{
START_OWN_SLAVE_POS= 0x1,
START_ON_EMPTY_DOMAIN= 0x2
};
/* Mapping from domain_id to the entry with GTID requested for that domain. */
HASH hash;
/* Auxiliary buffer to sort gtid list. */
DYNAMIC_ARRAY gtid_sort_array;
slave_connection_state();
~slave_connection_state();
void reset() { my_hash_reset(&hash); }
int load(const char *slave_request, size_t len);
int load(const rpl_gtid *gtid_list, uint32 count);
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
rpl_gtid *find(uint32 domain_id);
entry *find_entry(uint32 domain_id);
int update(const rpl_gtid *in_gtid);
void remove(const rpl_gtid *gtid);
void remove_if_present(const rpl_gtid *in_gtid);
ulong count() const { return hash.records; }
int to_string(String *out_str);
int append_to_string(String *out_str);
int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
bool is_pos_reached();
};
extern bool rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid,
bool *first);
extern int gtid_check_rpl_slave_state_table(TABLE *table);
extern rpl_gtid *gtid_parse_string_to_list(const char *p, size_t len,
uint32 *out_len);
#endif /* RPL_GTID_H */
|