1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 2001-2002
* Sleepycat Software. All rights reserved.
*/
#ifndef _REP_H_
#define _REP_H_
#define REP_ALIVE 1 /* I am alive message. */
#define REP_ALIVE_REQ 2 /* Request for alive messages. */
#define REP_ALL_REQ 3 /* Request all log records greater than LSN. */
#define REP_ELECT 4 /* Indicates that all listeners should */
/* begin master election */
#define REP_FILE 6 /* Page of a database file. */
#define REP_FILE_REQ 7 /* Request for a database file. */
#define REP_LOG 8 /* Log record. */
#define REP_LOG_MORE 9 /* There are more log records to request. */
#define REP_LOG_REQ 10 /* Request for a log record. */
#define REP_MASTER_REQ 11 /* Who is the master */
#define REP_NEWCLIENT 12 /* Announces the presence of a new client. */
#define REP_NEWFILE 13 /* Announce a log file change. */
#define REP_NEWMASTER 14 /* Announces who the master is. */
#define REP_NEWSITE 15 /* Announces that a site has heard from a new
* site; like NEWCLIENT, but indirect. A
* NEWCLIENT message comes directly from the new
* client while a NEWSITE comes indirectly from
* someone who heard about a NEWSITE.
*/
#define REP_PAGE 16 /* Database page. */
#define REP_PAGE_REQ 17 /* Request for a database page. */
#define REP_PLIST 18 /* Database page list. */
#define REP_PLIST_REQ 19 /* Request for a page list. */
#define REP_VERIFY 20 /* A log record for verification. */
#define REP_VERIFY_FAIL 21 /* The client is outdated. */
#define REP_VERIFY_REQ 22 /* Request for a log record to verify. */
#define REP_VOTE1 23 /* Send out your information for an election. */
#define REP_VOTE2 24 /* Send a "you are master" vote. */
/* Used to consistently designate which messages ought to be received where. */
#define MASTER_ONLY(dbenv) \
if (!F_ISSET(dbenv, DB_ENV_REP_MASTER)) return (EINVAL)
#define CLIENT_ONLY(dbenv) \
if (!F_ISSET(dbenv, DB_ENV_REP_CLIENT)) return (EINVAL)
#define ANYSITE(dbenv)
/* Shared replication structure. */
typedef struct __rep {
/*
* Due to alignment constraints on some architectures (e.g. HP-UX),
* DB_MUTEXes must be the first element of shalloced structures,
* and as a corollary there can be only one per structure. Thus,
* db_mutex_off points to a mutex in a separately-allocated chunk.
*/
DB_MUTEX mutex; /* Region lock. */
roff_t db_mutex_off; /* Client database mutex. */
u_int32_t tally_off; /* Offset of the tally region. */
int eid; /* Environment id. */
int master_id; /* ID of the master site. */
u_int32_t gen; /* Replication generation number */
int asites; /* Space allocated for sites. */
int nsites; /* Number of sites in group. */
int priority; /* My priority in an election. */
u_int32_t gbytes; /* Limit on data sent in single... */
u_int32_t bytes; /* __rep_process_message call. */
#define DB_REP_REQUEST_GAP 4
#define DB_REP_MAX_GAP 128
u_int32_t request_gap; /* # of records to receive before we
* request a missing log record. */
u_int32_t max_gap; /* Maximum number of records before
* requesting a missing log record. */
/* Vote tallying information. */
int sites; /* Sites heard from. */
int winner; /* Current winner. */
int w_priority; /* Winner priority. */
u_int32_t w_gen; /* Winner generation. */
DB_LSN w_lsn; /* Winner LSN. */
int w_tiebreaker; /* Winner tiebreaking value. */
int votes; /* Number of votes for this site. */
/* Statistics. */
DB_REP_STAT stat;
#define REP_F_EPHASE1 0x01 /* In phase 1 of election. */
#define REP_F_EPHASE2 0x02 /* In phase 2 of election. */
#define REP_F_LOGSONLY 0x04 /* Log-site only; cannot be upgraded. */
#define REP_F_MASTER 0x08 /* Master replica. */
#define REP_F_RECOVER 0x10
#define REP_F_UPGRADE 0x20 /* Upgradeable replica. */
#define REP_ISCLIENT (REP_F_UPGRADE | REP_F_LOGSONLY)
u_int32_t flags;
} REP;
#define IN_ELECTION(R) F_ISSET((R), REP_F_EPHASE1 | REP_F_EPHASE2)
#define ELECTION_DONE(R) F_CLR((R), REP_F_EPHASE1 | REP_F_EPHASE2)
/*
* Per-process replication structure.
*/
struct __db_rep {
DB_MUTEX *mutexp;
DB_MUTEX *db_mutexp; /* Mutex for bookkeeping database. */
DB *rep_db; /* Bookkeeping database. */
REP *region; /* In memory structure. */
int (*rep_send) /* Send function. */
__P((DB_ENV *,
const DBT *, const DBT *, int, u_int32_t));
};
/*
* Control structure for replication communication infrastructure.
*
* Note that the version information should be at the beginning of the
* structure, so that we can rearrange the rest of it while letting the
* version checks continue to work. DB_REPVERSION should be revved any time
* the rest of the structure changes.
*/
typedef struct __rep_control {
#define DB_REPVERSION 1
u_int32_t rep_version; /* Replication version number. */
u_int32_t log_version; /* Log version number. */
DB_LSN lsn; /* Log sequence number. */
u_int32_t rectype; /* Message type. */
u_int32_t gen; /* Generation number. */
u_int32_t flags; /* log_put flag value. */
} REP_CONTROL;
/* Election vote information. */
typedef struct __rep_vote {
int priority; /* My site's priority. */
int nsites; /* Number of sites I've been in
* communication with. */
int tiebreaker; /* Tie-breaking quasi-random int. */
} REP_VOTE_INFO;
/*
* This structure takes care of representing a transaction.
* It holds all the records, sorted by page number so that
* we can obtain locks and apply updates in a deadlock free
* order.
*/
typedef struct __lsn_page {
DB_LSN lsn;
u_int32_t fid;
DB_LOCK_ILOCK pgdesc;
#define LSN_PAGE_NOLOCK 0x0001 /* No lock necessary for log rec. */
u_int32_t flags;
} LSN_PAGE;
typedef struct __txn_recs {
int npages;
int nalloc;
LSN_PAGE *array;
u_int32_t txnid;
u_int32_t lockid;
} TXN_RECS;
typedef struct __lsn_collection {
int nlsns;
int nalloc;
DB_LSN *array;
} LSN_COLLECTION;
/*
* This is used by the page-prep routines to do the lock_vec call to
* apply the updates for a single transaction or a collection of
* transactions.
*/
typedef struct _linfo {
int n;
DB_LOCKREQ *reqs;
DBT *objs;
} linfo_t;
#include "dbinc_auto/rep_ext.h"
#endif /* !_REP_H_ */
|