summaryrefslogtreecommitdiff
path: root/test/tcl/rep034.tcl
blob: cab018c442284915e15b0ce8f3e589bf97e58cb8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# See the file LICENSE for redistribution information.
#
# Copyright (c) 2004, 2015 Oracle and/or its affiliates.  All rights reserved.
#
# $Id$
#
# TEST	rep034
# TEST	Test of STARTUPDONE notification.
# TEST
# TEST	STARTUPDONE can now be recognized without the need for new "live" log
# TEST  records from the master (under favorable conditions).  The response to
# TEST  the ALL_REQ at the end of synchronization includes an end-of-log marker
# TEST  that now triggers it.  However, the message containing that end marker
# TEST  could get lost, so live log records still serve as a back-up mechanism.
# TEST  The end marker may also be set under c2c sync, but only if the serving
# TEST  client has itself achieved STARTUPDONE.
#
proc rep034 { method { niter 2 } { tnum "034" } args } {

	source ./include.tcl
	global databases_in_memory
	global repfiles_in_memory
	global env_private

	# Valid for all access methods.
	if { $checking_valid_methods } {
		return "ALL"
	}

	# Set up for on-disk or in-memory databases.
	set msg "using on-disk databases"
	if { $databases_in_memory } {
		set msg "using named in-memory databases"
		if { [is_queueext $method] } { 
			puts -nonewline "Skipping rep$tnum for method "
			puts "$method with named in-memory databases."
			return
		}
	}

	set msg2 "and on-disk replication files"
	if { $repfiles_in_memory } {
		set msg2 "and in-memory replication files"
	}

	set msg3 ""
	if { $env_private } {
		set msg3 "with private env"
	}

	set args [convert_args $method $args]
	set logsets [create_logsets 3]
	foreach l $logsets {
		puts "Rep$tnum ($method $args): Test of\
		    startup synchronization detection $msg $msg2 $msg3."
		puts "Rep$tnum: Master logs are [lindex $l 0]"
		puts "Rep$tnum: Client 0 logs are [lindex $l 1]"
		puts "Rep$tnum: Client 1 logs are [lindex $l 2]"
		rep034_sub $method $niter $tnum $l $args
	}
}

# This test manages on its own the decision of whether or not to open an
# environment with recovery.  (It varies throughout the test.)  Therefore there
# is no need to run it twice (as we often do with a loop in the main proc).
# 
proc rep034_sub { method niter tnum logset largs } {
	global anywhere
	global testdir
	global databases_in_memory
	global repfiles_in_memory
	global env_private
	global rep_verbose
	global verbose_type
	global rep034_got_allreq

	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {$verbose_type on} "
	}

	set repmemargs ""
	if { $repfiles_in_memory } {
		set repmemargs "-rep_inmem_files "
	}

	set privargs ""
	if { $env_private == 1 } {
		set privargs " -private "
	}

	env_cleanup $testdir

	replsetup $testdir/MSGQUEUEDIR

	set masterdir $testdir/MASTERDIR
	set clientdir $testdir/CLIENTDIR
	set clientdir2 $testdir/CLIENTDIR2

	file mkdir $masterdir
	file mkdir $clientdir
	file mkdir $clientdir2

	set m_logtype [lindex $logset 0]
	set c_logtype [lindex $logset 1]
	set c2_logtype [lindex $logset 2]

	# In-memory logs require a large log buffer, and cannot
	# be used with -txn nosync.
	set m_logargs [adjust_logargs $m_logtype]
	set c_logargs [adjust_logargs $c_logtype]
	set c2_logargs [adjust_logargs $c2_logtype]
	set m_txnargs [adjust_txnargs $m_logtype]
	set c_txnargs [adjust_txnargs $c_logtype]
	set c2_txnargs [adjust_txnargs $c2_logtype]

	# In first part of test master serves requests.
	# 
	set anywhere 0

	# Create a master; add some data.
	# 
	repladd 1
	set ma_envcmd "berkdb_env_noerr -create $m_txnargs $m_logargs \
	    -event $verbargs -errpfx MASTER $repmemargs $privargs \
	    -home $masterdir -rep_master -rep_transport \[list 1 replsend\]"
	set masterenv [eval $ma_envcmd]
	puts "\tRep$tnum.a: Create master; add some data."
	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs

	# Bring up a new client, and see that it can get STARTUPDONE with no new
	# live transactions at the master.
	# 
	puts "\tRep$tnum.b: Bring up client; check STARTUPDONE."
	repladd 2
	set cl_envcmd "berkdb_env_noerr -create $c_txnargs $c_logargs \
	    -event $verbargs -errpfx CLIENT $repmemargs $privargs \
	    -home $clientdir -rep_client -rep_transport \[list 2 replsend\]"
	set clientenv [eval $cl_envcmd]
	set envlist "{$masterenv 1} {$clientenv 2}"
	process_msgs $envlist

	error_check_good done_without_live_txns \
	    [stat_field $clientenv rep_stat "Startup complete"] 1

	# Test that the event got fired as well.  In the rest of the test things
	# get a little complex (what with having two clients), so only check the
	# event part here.  The important point is the various ways that
	# STARTUPDONE can be computed, so testing the event firing mechanism
	# just this once is enough.
	#
	error_check_good done_event_too [is_startup_done $clientenv] 1

	#
	# Bring up another client.  Do additional new txns at master, ensure
	# that STARTUPDONE is not triggered at NEWMASTER LSN.
	# 
	puts "\tRep$tnum.c: Another client; no STARTUPDONE at NEWMASTER LSN."
	set newmaster_lsn [next_expected_lsn $masterenv]
	repladd 3
	#
	# !!! Please note that we're giving client2 a special customized version
	# of the replication transport call-back function.
	#
	set cl2_envcmd "berkdb_env_noerr -create $c2_txnargs $c2_logargs \
	    -event $verbargs -errpfx CLIENT2 $repmemargs $privargs \
	    -home $clientdir2 -rep_client -rep_transport \[list 3 rep034_send\]"
	set client2env [eval $cl2_envcmd]

	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"
	set verified false
	for {set i 0} {$i < 10} {incr i} {
		proc_msgs_once $envlist
		set client2lsn [next_expected_lsn $client2env]

		# Get to the point where we've gone past where the master's LSN
		# was at NEWMASTER time, and make sure we haven't yet gotten
		# STARTUPDONE.  Ten loop iterations should be plenty.
		# 
		if {[$client2env log_compare $client2lsn $newmaster_lsn] > 0} {
			if {![stat_field \
			    $client2env rep_stat "Startup complete"]} {
				set verified true
			}
			break;
		}
		eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
	}
	error_check_good no_newmaster_trigger $verified true

	process_msgs $envlist
	error_check_good done_during_live_txns \
	    [stat_field $client2env rep_stat "Startup complete"] 1

	#
	# From here on out we use client-to-client sync.
	# 
	set anywhere 1

	# Here we rely on recovery at client 1.  If that client is running with
	# in-memory logs or in-memory databases, forgo the remainder of the test.
	#
	if {$c_logtype eq "in-mem" || $databases_in_memory } {
		puts "\tRep$tnum.d: Skip the rest of the test for\
		     in-memory logging or databases."
		$masterenv close
		$clientenv close
		$client2env close
		replclose $testdir/MSGQUEUEDIR
		return
	}

	# Shut down client 1.  Bring it back, with recovery.  Verify that it can
	# get STARTUPDONE by syncing to other client, even with no new master
	# txns.
	# 
	puts "\tRep$tnum.d: Verify STARTUPDONE using c2c sync."
	$clientenv close
	set clientenv [eval $cl_envcmd -recover]
	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"

	# Clear counters at client2, so that we can check "Client service
	# requests" in a moment.
	# 
	$client2env rep_stat -clear
	process_msgs $envlist
	error_check_good done_via_c2c \
	    [stat_field $clientenv rep_stat "Startup complete"] 1
	#
	# Make sure our request was served by client2.  This isn't a test of c2c
	# sync per se, but if this fails it indicates that we're not really
	# testing what we thought we were testing.
	# 
	error_check_bad c2c_served_by_master \
	    [stat_field $client2env rep_stat "Client service requests"] 0

	# Verify that we don't get STARTUPDONE if we are using c2c sync to
	# another client, and the serving client has not itself reached
	# STARTUPDONE, because that suggests that the serving client could be
	# way far behind.   But that we can still eventually get STARTUPDONE, as
	# a fall-back, once the master starts generating new txns again.
	#
	# To do so, we'll need to restart both clients.  Start with the client
	# that will serve the request.  Turn off "anywhere" process for a moment
	# so that we can get this client set up without having the other one
	# running.
	#
	# Now it's client 2 that needs recovery.  Forgo the rest of the test if
	# it is logging in memory.  (We could get this far in mixed mode, with
	# client 1 logging on disk.)
	# 
	if {$c2_logtype eq "in-mem"} {
		puts "\tRep$tnum.e: Skip rest of test for in-memory logging."
		$masterenv close
		$clientenv close
		$client2env close
		replclose $testdir/MSGQUEUEDIR
		return
	}
	puts "\tRep$tnum.e: Check no STARTUPDONE when c2c server is behind."
	$clientenv log_flush
	$clientenv close
	$client2env log_flush
	$client2env close
	
	set anywhere 0
	set client2env [eval $cl2_envcmd -recover]
	set envlist "{$masterenv 1} {$client2env 3}"
	
	# We want client2 to get partway through initialization, but once it
	# sends the ALL_REQ to the master, we want to cut things off there.
	# Recall that we gave client2 a special "wrapper" version of the
	# replication transport call-back function: that function will set a
	# flag when it sees an ALL_REQ message go by.
	# 
	set rep034_got_allreq false
	while { !$rep034_got_allreq } {
		proc_msgs_once $envlist
	}

	#
	# To make sure we're doing a valid test, verify that we really did
	# succeed in getting the serving client into the state we intended.
	# 
	error_check_good serve_from_notstarted \
	    [stat_field $client2env rep_stat "Startup complete"] 0

	# Start up the client to be tested.  Make sure it doesn't get
	# STARTUPDONE (yet).  Again, the checking of service request stats is
	# just for test debugging, to make sure we have a valid test.
	#
	# To add insult to injury, not only do we not get STARTUPDONE from the
	# "behind" client, we also don't even get all the log records we need
	# (because we didn't allow client2's ALL_REQ to get to the master).
	# And no mechanism to let us know that.  The only resolution is to wait
	# for gap detection to rerequest (which would then go to the master).
	# So, set a small rep_request upper bound, so that it doesn't take a ton
	# of new live txns to reach the trigger.
	# 
	set anywhere 1
	$client2env rep_stat -clear
	replclear 2
	set clientenv [eval $cl_envcmd -recover]
	#
	# Set to 400 usecs.  An average ping to localhost should
	# be a few 10s usecs.
	#
	$clientenv rep_request 400 400
	set envlist "{$masterenv 1} {$clientenv 2} {$client2env 3}"

	# Here we're expecting that the master isn't generating any new log
	# records, which is normally the case since we're not generating any new
	# transactions there.  This is important, because otherwise the client
	# could notice its log gap and request the missing records, resulting in
	# STARTUPDONE before we're ready for it.  When debug_rop is on, just
	# scanning the data-dir during UPDATE_REQ processing (which, remember,
	# now happens just to check for potential NIMDB re-materialization)
	# generates log records, as we open each file we find to see if it's a
	# database.  So, filter out LOG messages (simulating them being "lost")
	# temporarily.
	# 
	if {[is_substr [berkdb getconfig] "debug_rop"]} {
		$masterenv rep_transport {1 rep034_send_nolog}
	}
	while {[rep034_proc_msgs_once $masterenv $clientenv $client2env] > 0} {}
	$masterenv rep_transport {1 replsend}

	error_check_good not_from_undone_c2c_client \
	    [stat_field $clientenv rep_stat "Startup complete"] 0

	error_check_bad c2c_served_by_master \
	    [stat_field $client2env rep_stat "Client service requests"] 0

	# Verify that we nevertheless *do* get STARTUPDONE after the master
	# starts generating new txns again.  Generate two sets of transactions,
	# with an unmistakable pause between, to ensure that we trigger the
	# client's rerequest timer, which we need in order to pick up the
	# missing transactions.  The 400 usec is a nice short time; but on
	# Windows sometimes it's possible to blast through a single process_msgs
	# cycle so quickly that its low-resolution timer reflects no elapsed
	# time at all!
	# 
	puts "\tRep$tnum.f: Check STARTUPDONE via fall-back to live txns."
	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
	process_msgs $envlist
	tclsleep 1
	eval rep_test $method $masterenv NULL $niter 0 0 0 $largs
	process_msgs $envlist
	error_check_good fallback_live_txns \
	    [stat_field $clientenv rep_stat "Startup complete"] 1

	$masterenv close
	$clientenv close
	$client2env close
	replclose $testdir/MSGQUEUEDIR
	set anywhere 0
}

# Do a round of message processing, but juggle things such that client2 can
# never receive a message from the master.
#
# Assumes the usual "{$masterenv 1} {$clientenv 2} {$client2env 3}" structure.
# 
proc rep034_proc_msgs_once { masterenv clientenv client2env } {
	set nproced [proc_msgs_once "{$masterenv 1}" NONE err]
	error_check_good pmonce_1 $err 0
	replclear 3
	
	incr nproced [proc_msgs_once "{$clientenv 2} {$client2env 3}" NONE err]
	error_check_good pmonce_2 $err 0

	return $nproced
}

# Wrapper for replsend.  Mostly just a pass-through to the real replsend, except
# we watch for an ALL_REQ, and just set a flag when we see it.
# 
proc rep034_send { control rec fromid toid flags lsn } {
	global rep034_got_allreq

	if {[berkdb msgtype $control] eq "all_req"} {
		set rep034_got_allreq true
	}
	return [replsend $control $rec $fromid $toid $flags $lsn]
}

# Another slightly different wrapper for replsend.  This one simulates losing
# any broadcast LOG messages from the master.
# 
proc rep034_send_nolog { control rec fromid toid flags lsn } {
	if {[berkdb msgtype $control] eq "log" &&
	    $fromid == 1 && $toid == -1} {
		set result 0
	} else {
		set result [replsend $control $rec $fromid $toid $flags $lsn]
	}
	return $result
}