diff options
Diffstat (limited to 'bdb/test/reputils.tcl')
-rw-r--r-- | bdb/test/reputils.tcl | 659 |
1 files changed, 659 insertions, 0 deletions
diff --git a/bdb/test/reputils.tcl b/bdb/test/reputils.tcl new file mode 100644 index 00000000000..340e359f26d --- /dev/null +++ b/bdb/test/reputils.tcl @@ -0,0 +1,659 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 2001-2002 +# Sleepycat Software. All rights reserved. +# +# $Id: reputils.tcl,v 11.34 2002/08/12 17:54:18 sandstro Exp $ +# +# Replication testing utilities + +# Environment handle for the env containing the replication "communications +# structure" (really a CDB environment). + +# The test environment consists of a queue and a # directory (environment) +# per replication site. The queue is used to hold messages destined for a +# particular site and the directory will contain the environment for the +# site. So the environment looks like: +# $testdir +# ___________|______________________________ +# / | \ \ +# MSGQUEUEDIR MASTERDIR CLIENTDIR.0 ... CLIENTDIR.N-1 +# | | ... | +# 1 2 .. N+1 +# +# The master is site 1 in the MSGQUEUEDIR and clients 1-N map to message +# queues 2 - N+1. +# +# The globals repenv(1-N) contain the environment handles for the sites +# with a given id (i.e., repenv(1) is the master's environment. + +global queueenv + +# Array of DB handles, one per machine ID, for the databases that contain +# messages. +global queuedbs +global machids + +global elect_timeout +set elect_timeout 50000000 +set drop 0 + +# Create the directory structure for replication testing. +# Open the master and client environments; store these in the global repenv +# Return the master's environment: "-env masterenv" +# +proc repl_envsetup { envargs largs tnum {nclients 1} {droppct 0} { oob 0 } } { + source ./include.tcl + global clientdir + global drop drop_msg + global masterdir + global repenv + global testdir + + env_cleanup $testdir + + replsetup $testdir/MSGQUEUEDIR + + set masterdir $testdir/MASTERDIR + file mkdir $masterdir + if { $droppct != 0 } { + set drop 1 + set drop_msg [expr 100 / $droppct] + } else { + set drop 0 + } + + for { set i 0 } { $i < $nclients } { incr i } { + set clientdir($i) $testdir/CLIENTDIR.$i + file mkdir $clientdir($i) + } + + # Open a master. + repladd 1 + # + # Set log smaller than default to force changing files, + # but big enough so that the tests that use binary files + # as keys/data can run. + # + set lmax [expr 3 * 1024 * 1024] + set masterenv [eval {berkdb_env -create -log_max $lmax} $envargs \ + {-home $masterdir -txn -rep_master -rep_transport \ + [list 1 replsend]}] + error_check_good master_env [is_valid_env $masterenv] TRUE + set repenv(master) $masterenv + + # Open clients + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + repladd $envid + set clientenv [eval {berkdb_env -create} $envargs -txn \ + {-cachesize { 0 10000000 0 }} -lock_max 10000 \ + {-home $clientdir($i) -rep_client -rep_transport \ + [list $envid replsend]}] + error_check_good client_env [is_valid_env $clientenv] TRUE + set repenv($i) $clientenv + } + set repenv($i) NULL + append largs " -env $masterenv " + + # Process startup messages + repl_envprocq $tnum $nclients $oob + + return $largs +} + +# Process all incoming messages. Iterate until there are no messages left +# in anyone's queue so that we capture all message exchanges. We verify that +# the requested number of clients matches the number of client environments +# we have. The oob parameter indicates if we should process the queue +# with out-of-order delivery. The replprocess procedure actually does +# the real work of processing the queue -- this routine simply iterates +# over the various queues and does the initial setup. + +proc repl_envprocq { tnum { nclients 1 } { oob 0 }} { + global repenv + global drop + + set masterenv $repenv(master) + for { set i 0 } { 1 } { incr i } { + if { $repenv($i) == "NULL"} { + break + } + } + error_check_good i_nclients $nclients $i + + set name [format "Repl%03d" $tnum] + berkdb debug_check + puts -nonewline "\t$name: Processing master/$i client queues" + set rand_skip 0 + if { $oob } { + puts " out-of-order" + } else { + puts " in order" + } + set do_check 1 + set droprestore $drop + while { 1 } { + set nproced 0 + + if { $oob } { + set rand_skip [berkdb random_int 2 10] + } + incr nproced [replprocessqueue $masterenv 1 $rand_skip] + for { set i 0 } { $i < $nclients } { incr i } { + set envid [expr $i + 2] + if { $oob } { + set rand_skip [berkdb random_int 2 10] + } + set n [replprocessqueue $repenv($i) \ + $envid $rand_skip] + incr nproced $n + } + + if { $nproced == 0 } { + # Now that we delay requesting records until + # we've had a few records go by, we should always + # see that the number of requests is lower than the + # number of messages that were enqueued. + for { set i 0 } { $i < $nclients } { incr i } { + set clientenv $repenv($i) + set stats [$clientenv rep_stat] + set queued [getstats $stats \ + {Total log records queued}] + error_check_bad queued_stats \ + $queued -1 + set requested [getstats $stats \ + {Log records requested}] + error_check_bad requested_stats \ + $requested -1 + if { $queued != 0 && $do_check != 0 } { + error_check_good num_requested \ + [expr $requested < $queued] 1 + } + + $clientenv rep_request 1 1 + } + + # If we were dropping messages, we might need + # to flush the log so that we get everything + # and end up in the right state. + if { $drop != 0 } { + set drop 0 + set do_check 0 + $masterenv rep_flush + berkdb debug_check + puts "\t$name: Flushing Master" + } else { + break + } + } + } + + # Reset the clients back to the default state in case we + # have more processing to do. + for { set i 0 } { $i < $nclients } { incr i } { + set clientenv $repenv($i) + $clientenv rep_request 4 128 + } + set drop $droprestore +} + +# Verify that the directories in the master are exactly replicated in +# each of the client environments. + +proc repl_envver0 { tnum method { nclients 1 } } { + global clientdir + global masterdir + global repenv + + # Verify the database in the client dir. + # First dump the master. + set t1 $masterdir/t1 + set t2 $masterdir/t2 + set t3 $masterdir/t3 + set omethod [convert_method $method] + set name [format "Repl%03d" $tnum] + + # + # We are interested in the keys of whatever databases are present + # in the master environment, so we just call a no-op check function + # since we have no idea what the contents of this database really is. + # We just need to walk the master and the clients and make sure they + # have the same contents. + # + set cwd [pwd] + cd $masterdir + set stat [catch {glob test*.db} dbs] + cd $cwd + if { $stat == 1 } { + return + } + foreach testfile $dbs { + open_and_dump_file $testfile $repenv(master) $masterdir/t2 \ + repl_noop dump_file_direction "-first" "-next" + + if { [string compare [convert_method $method] -recno] != 0 } { + filesort $t2 $t3 + file rename -force $t3 $t2 + } + for { set i 0 } { $i < $nclients } { incr i } { + puts "\t$name: Verifying client $i database \ + $testfile contents." + open_and_dump_file $testfile $repenv($i) \ + $t1 repl_noop dump_file_direction "-first" "-next" + + if { [string compare $omethod "-recno"] != 0 } { + filesort $t1 $t3 + } else { + catch {file copy -force $t1 $t3} ret + } + error_check_good diff_files($t2,$t3) [filecmp $t2 $t3] 0 + } + } +} + +# Remove all the elements from the master and verify that these +# deletions properly propagated to the clients. + +proc repl_verdel { tnum method { nclients 1 } } { + global clientdir + global masterdir + global repenv + + # Delete all items in the master. + set name [format "Repl%03d" $tnum] + set cwd [pwd] + cd $masterdir + set stat [catch {glob test*.db} dbs] + cd $cwd + if { $stat == 1 } { + return + } + foreach testfile $dbs { + puts "\t$name: Deleting all items from the master." + set txn [$repenv(master) txn] + error_check_good txn_begin [is_valid_txn $txn \ + $repenv(master)] TRUE + set db [berkdb_open -txn $txn -env $repenv(master) $testfile] + error_check_good reopen_master [is_valid_db $db] TRUE + set dbc [$db cursor -txn $txn] + error_check_good reopen_master_cursor \ + [is_valid_cursor $dbc $db] TRUE + for { set dbt [$dbc get -first] } { [llength $dbt] > 0 } \ + { set dbt [$dbc get -next] } { + error_check_good del_item [$dbc del] 0 + } + error_check_good dbc_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + error_check_good db_close [$db close] 0 + + repl_envprocq $tnum $nclients + + # Check clients. + for { set i 0 } { $i < $nclients } { incr i } { + puts "\t$name: Verifying emptiness of client database $i." + + set db [berkdb_open -env $repenv($i) $testfile] + error_check_good reopen_client($i) \ + [is_valid_db $db] TRUE + set dbc [$db cursor] + error_check_good reopen_client_cursor($i) \ + [is_valid_cursor $dbc $db] TRUE + + error_check_good client($i)_empty \ + [llength [$dbc get -first]] 0 + + error_check_good dbc_close [$dbc close] 0 + error_check_good db_close [$db close] 0 + } + } +} + +# Replication "check" function for the dump procs that expect to +# be able to verify the keys and data. +proc repl_noop { k d } { + return +} + +# Close all the master and client environments in a replication test directory. +proc repl_envclose { tnum envargs } { + source ./include.tcl + global clientdir + global encrypt + global masterdir + global repenv + global testdir + + if { [lsearch $envargs "-encrypta*"] !=-1 } { + set encrypt 1 + } + + # In order to make sure that we have fully-synced and ready-to-verify + # databases on all the clients, do a checkpoint on the master and + # process messages in order to flush all the clients. + set drop 0 + set do_check 0 + set name [format "Repl%03d" $tnum] + berkdb debug_check + puts "\t$name: Checkpointing master." + error_check_good masterenv_ckp [$repenv(master) txn_checkpoint] 0 + + # Count clients. + for { set ncli 0 } { 1 } { incr ncli } { + if { $repenv($ncli) == "NULL" } { + break + } + } + repl_envprocq $tnum $ncli + + error_check_good masterenv_close [$repenv(master) close] 0 + verify_dir $masterdir "\t$name: " 0 0 1 + for { set i 0 } { $i < $ncli } { incr i } { + error_check_good client($i)_close [$repenv($i) close] 0 + verify_dir $clientdir($i) "\t$name: " 0 0 1 + } + replclose $testdir/MSGQUEUEDIR + +} + +# Close up a replication group +proc replclose { queuedir } { + global queueenv queuedbs machids + + foreach m $machids { + set db $queuedbs($m) + error_check_good dbr_close [$db close] 0 + } + error_check_good qenv_close [$queueenv close] 0 + set machids {} +} + +# Create a replication group for testing. +proc replsetup { queuedir } { + global queueenv queuedbs machids + + file mkdir $queuedir + set queueenv \ + [berkdb_env -create -txn -lock_max 20000 -home $queuedir] + error_check_good queueenv [is_valid_env $queueenv] TRUE + + if { [info exists queuedbs] } { + unset queuedbs + } + set machids {} + + return $queueenv +} + +# Send function for replication. +proc replsend { control rec fromid toid } { + global queuedbs queueenv machids + global drop drop_msg + + # + # If we are testing with dropped messages, then we drop every + # $drop_msg time. If we do that just return 0 and don't do + # anything. + # + if { $drop != 0 } { + incr drop + if { $drop == $drop_msg } { + set drop 1 + return 0 + } + } + # XXX + # -1 is DB_BROADCAST_MID + if { $toid == -1 } { + set machlist $machids + } else { + if { [info exists queuedbs($toid)] != 1 } { + error "replsend: machid $toid not found" + } + set machlist [list $toid] + } + + foreach m $machlist { + # XXX should a broadcast include to "self"? + if { $m == $fromid } { + continue + } + + set db $queuedbs($m) + set txn [$queueenv txn] + $db put -txn $txn -append [list $control $rec $fromid] + error_check_good replsend_commit [$txn commit] 0 + } + + return 0 +} + +# Nuke all the pending messages for a particular site. +proc replclear { machid } { + global queuedbs queueenv + + if { [info exists queuedbs($machid)] != 1 } { + error "FAIL: replclear: machid $machid not found" + } + + set db $queuedbs($machid) + set txn [$queueenv txn] + set dbc [$db cursor -txn $txn] + for { set dbt [$dbc get -rmw -first] } { [llength $dbt] > 0 } \ + { set dbt [$dbc get -rmw -next] } { + error_check_good replclear($machid)_del [$dbc del] 0 + } + error_check_good replclear($machid)_dbc_close [$dbc close] 0 + error_check_good replclear($machid)_txn_commit [$txn commit] 0 +} + +# Add a machine to a replication environment. +proc repladd { machid } { + global queueenv queuedbs machids + + if { [info exists queuedbs($machid)] == 1 } { + error "FAIL: repladd: machid $machid already exists" + } + + set queuedbs($machid) [berkdb open -auto_commit \ + -env $queueenv -create -recno -renumber repqueue$machid.db] + error_check_good repqueue_create [is_valid_db $queuedbs($machid)] TRUE + + lappend machids $machid +} + +# Process a queue of messages, skipping every "skip_interval" entry. +# We traverse the entire queue, but since we skip some messages, we +# may end up leaving things in the queue, which should get picked up +# on a later run. + +proc replprocessqueue { dbenv machid { skip_interval 0 } \ + { hold_electp NONE } { newmasterp NONE } } { + global queuedbs queueenv errorCode + + # hold_electp is a call-by-reference variable which lets our caller + # know we need to hold an election. + if { [string compare $hold_electp NONE] != 0 } { + upvar $hold_electp hold_elect + } + set hold_elect 0 + + # newmasterp is the same idea, only returning the ID of a master + # given in a DB_REP_NEWMASTER return. + if { [string compare $newmasterp NONE] != 0 } { + upvar $newmasterp newmaster + } + set newmaster 0 + + set nproced 0 + + set txn [$queueenv txn] + set dbc [$queuedbs($machid) cursor -txn $txn] + + error_check_good process_dbc($machid) \ + [is_valid_cursor $dbc $queuedbs($machid)] TRUE + + for { set dbt [$dbc get -first] } \ + { [llength $dbt] != 0 } \ + { set dbt [$dbc get -next] } { + set data [lindex [lindex $dbt 0] 1] + + # If skip_interval is nonzero, we want to process messages + # out of order. We do this in a simple but slimy way-- + # continue walking with the cursor without processing the + # message or deleting it from the queue, but do increment + # "nproced". The way this proc is normally used, the + # precise value of nproced doesn't matter--we just don't + # assume the queues are empty if it's nonzero. Thus, + # if we contrive to make sure it's nonzero, we'll always + # come back to records we've skipped on a later call + # to replprocessqueue. (If there really are no records, + # we'll never get here.) + # + # Skip every skip_interval'th record (and use a remainder other + # than zero so that we're guaranteed to really process at least + # one record on every call). + if { $skip_interval != 0 } { + if { $nproced % $skip_interval == 1 } { + incr nproced + continue + } + } + + # We have to play an ugly cursor game here: we currently + # hold a lock on the page of messages, but rep_process_message + # might need to lock the page with a different cursor in + # order to send a response. So save our recno, close + # the cursor, and then reopen and reset the cursor. + set recno [lindex [lindex $dbt 0] 0] + error_check_good dbc_process_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + set ret [catch {$dbenv rep_process_message \ + [lindex $data 2] [lindex $data 0] [lindex $data 1]} res] + set txn [$queueenv txn] + set dbc [$queuedbs($machid) cursor -txn $txn] + set dbt [$dbc get -set $recno] + + if { $ret != 0 } { + if { [is_substr $res DB_REP_HOLDELECTION] } { + set hold_elect 1 + } else { + error "FAIL:[timestamp]\ + rep_process_message returned $res" + } + } + + incr nproced + + $dbc del + + if { $ret == 0 && $res != 0 } { + if { [is_substr $res DB_REP_NEWSITE] } { + # NEWSITE; do nothing. + } else { + set newmaster $res + # Break as soon as we get a NEWMASTER message; + # our caller needs to handle it. + break + } + } + + if { $hold_elect == 1 } { + # Break also on a HOLDELECTION, for the same reason. + break + } + + } + + error_check_good dbc_close [$dbc close] 0 + error_check_good txn_commit [$txn commit] 0 + + # Return the number of messages processed. + return $nproced +} + +set run_repl_flag "-run_repl" + +proc extract_repl_args { args } { + global run_repl_flag + + for { set arg [lindex $args [set i 0]] } \ + { [string length $arg] > 0 } \ + { set arg [lindex $args [incr i]] } { + if { [string compare $arg $run_repl_flag] == 0 } { + return [lindex $args [expr $i + 1]] + } + } + return "" +} + +proc delete_repl_args { args } { + global run_repl_flag + + set ret {} + + for { set arg [lindex $args [set i 0]] } \ + { [string length $arg] > 0 } \ + { set arg [lindex $args [incr i]] } { + if { [string compare $arg $run_repl_flag] != 0 } { + lappend ret $arg + } else { + incr i + } + } + return $ret +} + +global elect_serial +global elections_in_progress +set elect_serial 0 + +# Start an election in a sub-process. +proc start_election { qdir envstring nsites pri timeout {err "none"}} { + source ./include.tcl + global elect_serial elect_timeout elections_in_progress machids + + incr elect_serial + + set t [open "|$tclsh_path >& $testdir/ELECTION_OUTPUT.$elect_serial" w] + + puts $t "source $test_path/test.tcl" + puts $t "replsetup $qdir" + foreach i $machids { puts $t "repladd $i" } + puts $t "set env_cmd \{$envstring\}" + puts $t "set dbenv \[eval \$env_cmd -errfile \ + $testdir/ELECTION_ERRFILE.$elect_serial -errpfx FAIL: \]" +# puts "Start election err $err, env $envstring" + puts $t "\$dbenv test abort $err" + puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \ + $elect_timeout\} ret\]" + if { $err != "none" } { + puts $t "\$dbenv test abort none" + puts $t "set res \[catch \{\$dbenv rep_elect $nsites $pri \ + $elect_timeout\} ret\]" + } + flush $t + + set elections_in_progress($elect_serial) $t + return $elect_serial +} + +proc close_election { i } { + global elections_in_progress + set t $elections_in_progress($i) + puts $t "\$dbenv close" + close $t + unset elections_in_progress($i) +} + +proc cleanup_elections { } { + global elect_serial elections_in_progress + + for { set i 0 } { $i <= $elect_serial } { incr i } { + if { [info exists elections_in_progress($i)] != 0 } { + close_election $i + } + } + + set elect_serial 0 +} |