summaryrefslogtreecommitdiff
path: root/bdb/test/reputils.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'bdb/test/reputils.tcl')
-rw-r--r--bdb/test/reputils.tcl659
1 files changed, 659 insertions, 0 deletions
diff --git a/bdb/test/reputils.tcl b/bdb/test/reputils.tcl
new file mode 100644
index 00000000000..340e359f26d
--- /dev/null
+++ b/bdb/test/reputils.tcl
@@ -0,0 +1,659 @@
+# See the file LICENSE for redistribution information.
+#
+# Copyright (c) 2001-2002
+# Sleepycat Software. All rights reserved.
+#
+# $Id: reputils.tcl,v 11.34 2002/08/12 17:54:18 sandstro Exp $
+#
+# Replication testing utilities
+
+# Environment handle for the env containing the replication "communications
+# structure" (really a CDB environment).
+
+# The test environment consists of a queue and a # directory (environment)
+# per replication site. The queue is used to hold messages destined for a
+# particular site and the directory will contain the environment for the
+# site. So the environment looks like:
+# $testdir
+# ___________|______________________________
+# / | \ \
+# MSGQUEUEDIR MASTERDIR CLIENTDIR.0 ... CLIENTDIR.N-1
+# | | ... |
+# 1 2 .. N+1
+#
+# The master is site 1 in the MSGQUEUEDIR and clients 1-N map to message
+# queues 2 - N+1.
+#
+# The globals repenv(1-N) contain the environment handles for the sites
+# with a given id (i.e., repenv(1) is the master's environment.
+
+global queueenv
+
+# Array of DB handles, one per machine ID, for the databases that contain
+# messages.
+global queuedbs
+global machids
+
+global elect_timeout
+set elect_timeout 50000000
+set drop 0
+
+# Create the directory structure for replication testing.
+# Open the master and client environments; store these in the global repenv
+# Return the master's environment: "-env masterenv"
+#
+proc repl_envsetup { envargs largs tnum {nclients 1} {droppct 0} { oob 0 } } {
+ source ./include.tcl
+ global clientdir
+ global drop drop_msg
+ global masterdir
+ global repenv
+ global testdir
+
+ env_cleanup $testdir
+
+ replsetup $testdir/MSGQUEUEDIR
+
+ set masterdir $testdir/MASTERDIR
+ file mkdir $masterdir
+ if { $droppct != 0 } {
+ set drop 1
+ set drop_msg [expr 100 / $droppct]
+ } else {
+ set drop 0
+ }
+
+ for { set i 0 } { $i < $nclients } { incr i } {
+ set clientdir($i) $testdir/CLIENTDIR.$i
+ file mkdir $clientdir($i)
+ }
+
+ # Open a master.
+ repladd 1
+ #
+ # Set log smaller than default to force changing files,
+ # but big enough so that the tests that use binary files
+ # as keys/data can run.
+ #
+ set lmax [expr 3 * 1024 * 1024]
+ set masterenv [eval {berkdb_env -create -log_max $lmax} $envargs \
+ {-home $masterdir -txn -rep_master -rep_transport \
+ [list 1 replsend]}]
+ error_check_good master_env [is_valid_env $masterenv] TRUE
+ set repenv(master) $masterenv
+
+ # Open clients
+ for { set i 0 } { $i < $nclients } { incr i } {
+ set envid [expr $i + 2]
+ repladd $envid
+ set clientenv [eval {berkdb_env -create} $envargs -txn \
+ {-cachesize { 0 10000000 0 }} -lock_max 10000 \
+ {-home $clientdir($i) -rep_client -rep_transport \
+ [list $envid replsend]}]
+ error_check_good client_env [is_valid_env $clientenv] TRUE
+ set repenv($i) $clientenv
+ }
+ set repenv($i) NULL
+ append largs " -env $masterenv "
+
+ # Process startup messages
+ repl_envprocq $tnum $nclients $oob
+
+ return $largs
+}
+
+# Process all incoming messages. Iterate until there are no messages left
+# in anyone's queue so that we capture all message exchanges. We verify that
+# the requested number of clients matches the number of client environments
+# we have. The oob parameter indicates if we should process the queue
+# with out-of-order delivery. The replprocess procedure actually does
+# the real work of processing the queue -- this routine simply iterates
+# over the various queues and does the initial setup.
+
+proc repl_envprocq { tnum { nclients 1 } { oob 0 }} {
+ global repenv
+ global drop
+
+ set masterenv $repenv(master)
+ for { set i 0 } { 1 } { incr i } {
+ if { $repenv($i) == "NULL"} {
+ break
+ }
+ }
+ error_check_good i_nclients $nclients $i
+
+ set name [format "Repl%03d" $tnum]
+ berkdb debug_check
+ puts -nonewline "\t$name: Processing master/$i client queues"
+ set rand_skip 0
+ if { $oob } {
+ puts " out-of-order"
+ } else {
+ puts " in order"
+ }
+ set do_check 1
+ set droprestore $drop
+ while { 1 } {
+ set nproced 0
+
+ if { $oob } {
+ set rand_skip [berkdb random_int 2 10]
+ }
+ incr nproced [replprocessqueue $masterenv 1 $rand_skip]
+ for { set i 0 } { $i < $nclients } { incr i } {
+ set envid [expr $i + 2]
+ if { $oob } {
+ set rand_skip [berkdb random_int 2 10]
+ }
+ set n [replprocessqueue $repenv($i) \
+ $envid $rand_skip]
+ incr nproced $n
+ }
+
+ if { $nproced == 0 } {
+ # Now that we delay requesting records until
+ # we've had a few records go by, we should always
+ # see that the number of requests is lower than the
+ # number of messages that were enqueued.
+ for { set i 0 } { $i < $nclients } { incr i } {
+ set clientenv $repenv($i)
+ set stats [$clientenv rep_stat]
+ set queued [getstats $stats \
+ {Total log records queued}]
+ error_check_bad queued_stats \
+ $queued -1
+ set requested [getstats $stats \
+ {Log records requested}]
+ error_check_bad requested_stats \
+ $requested -1
+ if { $queued != 0 && $do_check != 0 } {
+ error_check_good num_requested \
+ [expr $requested < $queued] 1
+ }
+
+ $clientenv rep_request 1 1
+ }
+
+ # If we were dropping messages, we might need
+ # to flush the log so that we get everything
+ # and end up in the right state.
+ if { $drop != 0 } {
+ set drop 0
+ set do_check 0
+ $masterenv rep_flush
+ berkdb debug_check
+ puts "\t$name: Flushing Master"
+ } else {
+ break
+ }
+ }
+ }
+
+ # Reset the clients back to the default state in case we
+ # have more processing to do.
+ for { set i 0 } { $i < $nclients } { incr i } {
+ set clientenv $repenv($i)
+ $clientenv rep_request 4 128
+ }
+ set drop $droprestore
+}
+
+# Verify that the directories in the master are exactly replicated in
+# each of the client environments.
+
+proc repl_envver0 { tnum method { nclients 1 } } {
+ global clientdir
+ global masterdir
+ global repenv
+
+ # Verify the database in the client dir.
+ # First dump the master.
+ set t1 $masterdir/t1
+ set t2 $masterdir/t2
+ set t3 $masterdir/t3
+ set omethod [convert_method $method]
+ set name [format "Repl%03d" $tnum]
+
+ #
+ # We are interested in the keys of whatever databases are present
+ # in the master environment, so we just call a no-op check function
+ # since we have no idea what the contents of this database really is.
+ # We just need to walk the master and the clients and make sure they
+ # have the same contents.
+ #
+ set cwd [pwd]
+ cd $masterdir
+ set stat [catch {glob test*.db} dbs]
+ cd $cwd
+ if { $stat == 1 } {
+ return
+ }
+ foreach testfile $dbs {
+ open_and_dump_file $testfile $repenv(master) $masterdir/t2 \
+ repl_noop dump_file_direction "-first" "-next"
+
+ if { [string compare [convert_method $method] -recno] != 0 } {
+ filesort $t2 $t3
+ file rename -force $t3 $t2
+ }
+ for { set i 0 } { $i < $nclients } { incr i } {
+ puts "\t$name: Verifying client $i database \
+ $testfile contents."
+ open_and_dump_file $testfile $repenv($i) \
+ $t1 repl_noop dump_file_direction "-first" "-next"
+
+ if { [string compare $omethod "-recno"] != 0 } {
+ filesort $t1 $t3
+ } else {
+ catch {file copy -force $t1 $t3} ret
+ }
+ error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0
+ }
+ }
+}
+
+# Remove all the elements from the master and verify that these
+# deletions properly propagated to the clients.
+
+proc repl_verdel { tnum method { nclients 1 } } {
+ global clientdir
+ global masterdir
+ global repenv
+
+ # Delete all items in the master.
+ set name [format "Repl%03d" $tnum]
+ set cwd [pwd]
+ cd $masterdir
+ set stat [catch {glob test*.db} dbs]
+ cd $cwd
+ if { $stat == 1 } {
+ return
+ }
+ foreach testfile $dbs {
+ puts "\t$name: Deleting all items from the master."
+ set txn [$repenv(master) txn]
+ error_check_good txn_begin [is_valid_txn $txn \
+ $repenv(master)] TRUE
+ set db [berkdb_open -txn $txn -env $repenv(master) $testfile]
+ error_check_good reopen_master [is_valid_db $db] TRUE
+ set dbc [$db cursor -txn $txn]
+ error_check_good reopen_master_cursor \
+ [is_valid_cursor $dbc $db] TRUE
+ for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \
+ { set dbt [$dbc get -next] } {
+ error_check_good del_item [$dbc del] 0
+ }
+ error_check_good dbc_close [$dbc close] 0
+ error_check_good txn_commit [$txn commit] 0
+ error_check_good db_close [$db close] 0
+
+ repl_envprocq $tnum $nclients
+
+ # Check clients.
+ for { set i 0 } { $i < $nclients } { incr i } {
+ puts "\t$name: Verifying emptiness of client database $i."
+
+ set db [berkdb_open -env $repenv($i) $testfile]
+ error_check_good reopen_client($i) \
+ [is_valid_db $db] TRUE
+ set dbc [$db cursor]
+ error_check_good reopen_client_cursor($i) \
+ [is_valid_cursor $dbc $db] TRUE
+
+ error_check_good client($i)_empty \
+ [llength [$dbc get -first]] 0
+
+ error_check_good dbc_close [$dbc close] 0
+ error_check_good db_close [$db close] 0
+ }
+ }
+}
+
+# Replication "check" function for the dump procs that expect to
+# be able to verify the keys and data.
+proc repl_noop { k d } {
+ return
+}
+
+# Close all the master and client environments in a replication test directory.
+proc repl_envclose { tnum envargs } {
+ source ./include.tcl
+ global clientdir
+ global encrypt
+ global masterdir
+ global repenv
+ global testdir
+
+ if { [lsearch $envargs "-encrypta*"] !=-1 } {
+ set encrypt 1
+ }
+
+ # In order to make sure that we have fully-synced and ready-to-verify
+ # databases on all the clients, do a checkpoint on the master and
+ # process messages in order to flush all the clients.
+ set drop 0
+ set do_check 0
+ set name [format "Repl%03d" $tnum]
+ berkdb debug_check
+ puts "\t$name: Checkpointing master."
+ error_check_good masterenv_ckp [$repenv(master) txn_checkpoint] 0
+
+ # Count clients.
+ for { set ncli 0 } { 1 } { incr ncli } {
+ if { $repenv($ncli) == "NULL" } {
+ break
+ }
+ }
+ repl_envprocq $tnum $ncli
+
+ error_check_good masterenv_close [$repenv(master) close] 0
+ verify_dir $masterdir "\t$name: " 0 0 1
+ for { set i 0 } { $i < $ncli } { incr i } {
+ error_check_good client($i)_close [$repenv($i) close] 0
+ verify_dir $clientdir($i) "\t$name: " 0 0 1
+ }
+ replclose $testdir/MSGQUEUEDIR
+
+}
+
+# Close up a replication group
+proc replclose { queuedir } {
+ global queueenv queuedbs machids
+
+ foreach m $machids {
+ set db $queuedbs($m)
+ error_check_good dbr_close [$db close] 0
+ }
+ error_check_good qenv_close [$queueenv close] 0
+ set machids {}
+}
+
+# Create a replication group for testing.
+proc replsetup { queuedir } {
+ global queueenv queuedbs machids
+
+ file mkdir $queuedir
+ set queueenv \
+ [berkdb_env -create -txn -lock_max 20000 -home $queuedir]
+ error_check_good queueenv [is_valid_env $queueenv] TRUE
+
+ if { [info exists queuedbs] } {
+ unset queuedbs
+ }
+ set machids {}
+
+ return $queueenv
+}
+
+# Send function for replication.
+proc replsend { control rec fromid toid } {
+ global queuedbs queueenv machids
+ global drop drop_msg
+
+ #
+ # If we are testing with dropped messages, then we drop every
+ # $drop_msg time. If we do that just return 0 and don't do
+ # anything.
+ #
+ if { $drop != 0 } {
+ incr drop
+ if { $drop == $drop_msg } {
+ set drop 1
+ return 0
+ }
+ }
+ # XXX
+ # -1 is DB_BROADCAST_MID
+ if { $toid == -1 } {
+ set machlist $machids
+ } else {
+ if { [info exists queuedbs($toid)] != 1 } {
+ error "replsend: machid $toid not found"
+ }
+ set machlist [list $toid]
+ }
+
+ foreach m $machlist {
+ # XXX should a broadcast include to "self"?
+ if { $m == $fromid } {
+ continue
+ }
+
+ set db $queuedbs($m)
+ set txn [$queueenv txn]
+ $db put -txn $txn -append [list $control $rec $fromid]
+ error_check_good replsend_commit [$txn commit] 0
+ }
+
+ return 0
+}
+
+# Nuke all the pending messages for a particular site.
+proc replclear { machid } {
+ global queuedbs queueenv
+
+ if { [info exists queuedbs($machid)] != 1 } {
+ error "FAIL: replclear: machid $machid not found"
+ }
+
+ set db $queuedbs($machid)
+ set txn [$queueenv txn]
+ set dbc [$db cursor -txn $txn]
+ for { set dbt [$dbc get -rmw -first] } { [llength $dbt] > 0 } \
+ { set dbt [$dbc get -rmw -next] } {
+ error_check_good replclear($machid)_del [$dbc del] 0
+ }
+ error_check_good replclear($machid)_dbc_close [$dbc close] 0
+ error_check_good replclear($machid)_txn_commit [$txn commit] 0
+}
+
+# Add a machine to a replication environment.
+proc repladd { machid } {
+ global queueenv queuedbs machids
+
+ if { [info exists queuedbs($machid)] == 1 } {
+ error "FAIL: repladd: machid $machid already exists"
+ }
+
+ set queuedbs($machid) [berkdb open -auto_commit \
+ -env $queueenv -create -recno -renumber repqueue$machid.db]
+ error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE
+
+ lappend machids $machid
+}
+
+# Process a queue of messages, skipping every "skip_interval" entry.
+# We traverse the entire queue, but since we skip some messages, we
+# may end up leaving things in the queue, which should get picked up
+# on a later run.
+
+proc replprocessqueue { dbenv machid { skip_interval 0 } \
+ { hold_electp NONE } { newmasterp NONE } } {
+ global queuedbs queueenv errorCode
+
+ # hold_electp is a call-by-reference variable which lets our caller
+ # know we need to hold an election.
+ if { [string compare $hold_electp NONE] != 0 } {
+ upvar $hold_electp hold_elect
+ }
+ set hold_elect 0
+
+ # newmasterp is the same idea, only returning the ID of a master
+ # given in a DB_REP_NEWMASTER return.
+ if { [string compare $newmasterp NONE] != 0 } {
+ upvar $newmasterp newmaster
+ }
+ set newmaster 0
+
+ set nproced 0
+
+ set txn [$queueenv txn]
+ set dbc [$queuedbs($machid) cursor -txn $txn]
+
+ error_check_good process_dbc($machid) \
+ [is_valid_cursor $dbc $queuedbs($machid)] TRUE
+
+ for { set dbt [$dbc get -first] } \
+ { [llength $dbt] != 0 } \
+ { set dbt [$dbc get -next] } {
+ set data [lindex [lindex $dbt 0] 1]
+
+ # If skip_interval is nonzero, we want to process messages
+ # out of order. We do this in a simple but slimy way--
+ # continue walking with the cursor without processing the
+ # message or deleting it from the queue, but do increment
+ # "nproced". The way this proc is normally used, the
+ # precise value of nproced doesn't matter--we just don't
+ # assume the queues are empty if it's nonzero. Thus,
+ # if we contrive to make sure it's nonzero, we'll always
+ # come back to records we've skipped on a later call
+ # to replprocessqueue. (If there really are no records,
+ # we'll never get here.)
+ #
+ # Skip every skip_interval'th record (and use a remainder other
+ # than zero so that we're guaranteed to really process at least
+ # one record on every call).
+ if { $skip_interval != 0 } {
+ if { $nproced % $skip_interval == 1 } {
+ incr nproced
+ continue
+ }
+ }
+
+ # We have to play an ugly cursor game here: we currently
+ # hold a lock on the page of messages, but rep_process_message
+ # might need to lock the page with a different cursor in
+ # order to send a response. So save our recno, close
+ # the cursor, and then reopen and reset the cursor.
+ set recno [lindex [lindex $dbt 0] 0]
+ error_check_good dbc_process_close [$dbc close] 0
+ error_check_good txn_commit [$txn commit] 0
+ set ret [catch {$dbenv rep_process_message \
+ [lindex $data 2] [lindex $data 0] [lindex $data 1]} res]
+ set txn [$queueenv txn]
+ set dbc [$queuedbs($machid) cursor -txn $txn]
+ set dbt [$dbc get -set $recno]
+
+ if { $ret != 0 } {
+ if { [is_substr $res DB_REP_HOLDELECTION] } {
+ set hold_elect 1
+ } else {
+ error "FAIL:[timestamp]\
+ rep_process_message returned $res"
+ }
+ }
+
+ incr nproced
+
+ $dbc del
+
+ if { $ret == 0 && $res != 0 } {
+ if { [is_substr $res DB_REP_NEWSITE] } {
+ # NEWSITE; do nothing.
+ } else {
+ set newmaster $res
+ # Break as soon as we get a NEWMASTER message;
+ # our caller needs to handle it.
+ break
+ }
+ }
+
+ if { $hold_elect == 1 } {
+ # Break also on a HOLDELECTION, for the same reason.
+ break
+ }
+
+ }
+
+ error_check_good dbc_close [$dbc close] 0
+ error_check_good txn_commit [$txn commit] 0
+
+ # Return the number of messages processed.
+ return $nproced
+}
+
+set run_repl_flag "-run_repl"
+
+proc extract_repl_args { args } {
+ global run_repl_flag
+
+ for { set arg [lindex $args [set i 0]] } \
+ { [string length $arg] > 0 } \
+ { set arg [lindex $args [incr i]] } {
+ if { [string compare $arg $run_repl_flag] == 0 } {
+ return [lindex $args [expr $i + 1]]
+ }
+ }
+ return ""
+}
+
+proc delete_repl_args { args } {
+ global run_repl_flag
+
+ set ret {}
+
+ for { set arg [lindex $args [set i 0]] } \
+ { [string length $arg] > 0 } \
+ { set arg [lindex $args [incr i]] } {
+ if { [string compare $arg $run_repl_flag] != 0 } {
+ lappend ret $arg
+ } else {
+ incr i
+ }
+ }
+ return $ret
+}
+
+global elect_serial
+global elections_in_progress
+set elect_serial 0
+
+# Start an election in a sub-process.
+proc start_election { qdir envstring nsites pri timeout {err "none"}} {
+ source ./include.tcl
+ global elect_serial elect_timeout elections_in_progress machids
+
+ incr elect_serial
+
+ set t [open "|$tclsh_path >& $testdir/ELECTION_OUTPUT.$elect_serial" w]
+
+ puts $t "source $test_path/test.tcl"
+ puts $t "replsetup $qdir"
+ foreach i $machids { puts $t "repladd $i" }
+ puts $t "set env_cmd \{$envstring\}"
+ puts $t "set dbenv \[eval \$env_cmd -errfile \
+ $testdir/ELECTION_ERRFILE.$elect_serial -errpfx FAIL: \]"
+# puts "Start election err $err, env $envstring"
+ puts $t "\$dbenv test abort $err"
+ puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \
+ $elect_timeout\} ret\]"
+ if { $err != "none" } {
+ puts $t "\$dbenv test abort none"
+ puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \
+ $elect_timeout\} ret\]"
+ }
+ flush $t
+
+ set elections_in_progress($elect_serial) $t
+ return $elect_serial
+}
+
+proc close_election { i } {
+ global elections_in_progress
+ set t $elections_in_progress($i)
+ puts $t "\$dbenv close"
+ close $t
+ unset elections_in_progress($i)
+}
+
+proc cleanup_elections { } {
+ global elect_serial elections_in_progress
+
+ for { set i 0 } { $i <= $elect_serial } { incr i } {
+ if { [info exists elections_in_progress($i)] != 0 } {
+ close_election $i
+ }
+ }
+
+ set elect_serial 0
+}