diff options
author | unknown <msvensson@shellback.(none)> | 2006-10-04 12:47:32 +0200 |
---|---|---|
committer | unknown <msvensson@shellback.(none)> | 2006-10-04 12:47:32 +0200 |
commit | dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1 (patch) | |
tree | ae15dc31f5918c502b4313aa7d8cd7f32ce8cca3 /mysql-test/lib/mtr_process.pl | |
parent | 99cfe38ae7d6da2dca8a3b377254d0bfba8b6ac4 (diff) | |
download | mariadb-git-dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1.tar.gz |
Backport from 5.1
-Add support for detecting version and features from mysqld binary
- Autodetect netware
- Disable some features not available below 5.0
- Cleanup executable_setup to look for one executable at a time, only llok for the ones that are needed based on the selected testcases and settings
mysql-test/lib/mtr_cases.pl:
Backport from 5.1
mysql-test/lib/mtr_io.pl:
Backport from 5.1
Add new function mtr_appendfile_to_file
mysql-test/lib/mtr_misc.pl:
Backport from 5.1
mysql-test/lib/mtr_process.pl:
Backport from 5.1
mysql-test/lib/mtr_report.pl:
Backport from 5.1
mysql-test/mysql-test-run.pl:
Add support for detecting version and features from mysqld binary
Autodetect netware
Disable some features not available below 5.0
Cleanup executable_setup to look for one executable at a time, only llok for the ones that are needed based on the selected testcases and settings
mysql-test/r/mysqltest.result:
Update result
mysql-test/lib/mtr_im.pl:
New BitKeeper file ``mysql-test/lib/mtr_im.pl''
mysql-test/lib/mtr_stress.pl:
New BitKeeper file ``mysql-test/lib/mtr_stress.pl''
Diffstat (limited to 'mysql-test/lib/mtr_process.pl')
-rw-r--r-- | mysql-test/lib/mtr_process.pl | 675 |
1 files changed, 452 insertions, 223 deletions
diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl index 662b70a4fee..5e21248790e 100644 --- a/mysql-test/lib/mtr_process.pl +++ b/mysql-test/lib/mtr_process.pl @@ -4,7 +4,6 @@ # and is part of the translation of the Bourne shell script with the # same name. -#use Carp qw(cluck); use Socket; use Errno; use strict; @@ -14,12 +13,17 @@ use POSIX 'WNOHANG'; sub mtr_run ($$$$$$;$); sub mtr_spawn ($$$$$$;$); -sub mtr_stop_mysqld_servers ($); +sub mtr_check_stop_servers ($); sub mtr_kill_leftovers (); +sub mtr_wait_blocking ($); sub mtr_record_dead_children (); +sub mtr_ndbmgm_start($$); +sub mtr_mysqladmin_start($$$); sub mtr_exit ($); sub sleep_until_file_created ($$$); sub mtr_kill_processes ($); +sub mtr_ping_with_timeout($); +sub mtr_ping_port ($); # static in C sub spawn_impl ($$$$$$$$); @@ -31,7 +35,6 @@ sub spawn_impl ($$$$$$$$); ############################################################################## # This function try to mimic the C version used in "netware/mysql_test_run.c" -# FIXME learn it to handle append mode as well, a "new" flag or a "append" sub mtr_run ($$$$$$;$) { my $path= shift; @@ -112,6 +115,9 @@ sub spawn_impl ($$$$$$$$) { print STDERR "#### ", "-" x 78, "\n"; } + mtr_error("Can't spawn with empty \"path\"") unless defined $path; + + FORK: { my $pid= fork(); @@ -144,17 +150,6 @@ sub spawn_impl ($$$$$$$$) { $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't - if ( $::glob_cygwin_shell and $mode eq 'test' ) - { - # Programs started from mysqltest under Cygwin, are to - # execute them within Cygwin. Else simple things in test - # files like - # --system "echo 1 > file" - # will fail. - # FIXME not working :-( -# $ENV{'COMSPEC'}= "$::glob_cygwin_shell -c"; - } - my $log_file_open_mode = '>'; if ($spawn_opts and $spawn_opts->{'append_log_file'}) @@ -164,7 +159,15 @@ sub spawn_impl ($$$$$$$$) { if ( $output ) { - if ( ! open(STDOUT,$log_file_open_mode,$output) ) + if ( $::glob_win32_perl ) + { + # Don't redirect stdout on ActiveState perl since this is + # just another thread in the same process. + # Should be fixed so that the thread that is created with fork + # executes the exe in another process and wait's for it to return. + # In the meanwhile, we get all the output from mysqld's to screen + } + elsif ( ! open(STDOUT,$log_file_open_mode,$output) ) { mtr_child_error("can't redirect STDOUT to \"$output\": $!"); } @@ -216,8 +219,7 @@ sub spawn_parent_impl { { # Simple run of command, we wait for it to return my $ret_pid= waitpid($pid,0); - - if ( $ret_pid <= 0 ) + if ( $ret_pid != $pid ) { mtr_error("$path ($pid) got lost somehow"); } @@ -245,7 +247,6 @@ sub spawn_parent_impl { # Someone terminated, don't know who. Collect # status info first before $? is lost, # but not $exit_value, this is flagged from - # my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid); if ( $timer_name ) @@ -272,45 +273,22 @@ sub spawn_parent_impl { last; } - # If one of the mysqld processes died, we want to - # mark this, and kill the mysqltest process. - - foreach my $idx (0..1) - { - if ( $::master->[$idx]->{'pid'} eq $ret_pid ) - { - mtr_debug("child $ret_pid was master[$idx], " . - "exit during mysqltest run"); - $::master->[$idx]->{'pid'}= 0; - last; - } - } - - foreach my $idx (0..2) - { - if ( $::slave->[$idx]->{'pid'} eq $ret_pid ) - { - mtr_debug("child $ret_pid was slave[$idx], " . - "exit during mysqltest run"); - $::slave->[$idx]->{'pid'}= 0; - last; - } - } + # One of the child processes died, unless this was expected + # mysqltest should be killed and test aborted - mtr_debug("waitpid() catched exit of unknown child $ret_pid, " . - "exit during mysqltest run"); + check_expected_crash_and_restart($ret_pid); } if ( $ret_pid != $pid ) { # We terminated the waiting because a "mysqld" process died. # Kill the mysqltest process. - + mtr_verbose("Kill mysqltest because another process died"); kill(9,$pid); $ret_pid= waitpid($pid,0); - if ( $ret_pid == -1 ) + if ( $ret_pid != $pid ) { mtr_error("$path ($pid) got lost somehow"); } @@ -351,39 +329,101 @@ sub mtr_process_exit_status { # ############################################################################## -# We just "ping" on the ports, and if we can't do a socket connect -# we assume the server is dead. So we don't *really* know a server -# is dead, we just hope that it after letting the listen port go, -# it is dead enough for us to start a new server. +# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with +# this run +# Make sure to remove the PID file, if any. +# kill IM manager first, else it will restart the servers sub mtr_kill_leftovers () { - # First, kill all masters and slaves that would conflict with - # this run. Make sure to remove the PID file, if any. + mtr_report("Killing Possible Leftover Processes"); + mtr_debug("mtr_kill_leftovers(): started."); - my @args; + mkpath("$::opt_vardir/log"); # Needed for mysqladmin log - for ( my $idx; $idx < 2; $idx++ ) + # Stop or kill Instance Manager and all its children. If we failed to do + # that, we can only abort -- there is nothing left to do. + +# mtr_error("Failed to stop Instance Manager.") +# unless mtr_im_stop($::instance_manager); + + # Start shutdown of masters and slaves. Don't touch IM-managed mysqld + # instances -- they should be stopped by mtr_im_stop(). + + mtr_debug("Shutting down mysqld-instances..."); + + my @kill_pids; + my %admin_pids; + + foreach my $srv (@{$::master}, @{$::slave}) { - push(@args,{ - pid => 0, # We don't know the PID - pidfile => $::master->[$idx]->{'path_mypid'}, - sockfile => $::master->[$idx]->{'path_mysock'}, - port => $::master->[$idx]->{'path_myport'}, - }); + mtr_debug(" - mysqld " . + "(pid: $srv->{pid}; " . + "pid file: '$srv->{path_pid}'; " . + "socket: '$srv->{path_sock}'; ". + "port: $srv->{port})"); + + my $pid= mtr_mysqladmin_start($srv, "shutdown", 70); + + # Save the pid of the mysqladmin process + $admin_pids{$pid}= 1; + + push(@kill_pids,{ + pid => $srv->{'pid'}, + pidfile => $srv->{'path_pid'}, + sockfile => $srv->{'path_sock'}, + port => $srv->{'port'}, + }); + $srv->{'pid'}= 0; # Assume we are done with it } - for ( my $idx; $idx < 3; $idx++ ) + if ( ! $::opt_skip_ndbcluster ) { - push(@args,{ - pid => 0, # We don't know the PID - pidfile => $::slave->[$idx]->{'path_mypid'}, - sockfile => $::slave->[$idx]->{'path_mysock'}, - port => $::slave->[$idx]->{'path_myport'}, - }); + # Start shutdown of clusters. + mtr_debug("Shutting down cluster..."); + + foreach my $cluster (@{$::clusters}) + { + mtr_debug(" - cluster " . + "(pid: $cluster->{pid}; " . + "pid file: '$cluster->{path_pid})"); + + my $pid= mtr_ndbmgm_start($cluster, "shutdown"); + + # Save the pid of the ndb_mgm process + $admin_pids{$pid}= 1; + + push(@kill_pids,{ + pid => $cluster->{'pid'}, + pidfile => $cluster->{'path_pid'} + }); + + $cluster->{'pid'}= 0; # Assume we are done with it + + foreach my $ndbd (@{$cluster->{'ndbds'}}) + { + mtr_debug(" - ndbd " . + "(pid: $ndbd->{pid}; " . + "pid file: '$ndbd->{path_pid})"); + + push(@kill_pids,{ + pid => $ndbd->{'pid'}, + pidfile => $ndbd->{'path_pid'}, + }); + $ndbd->{'pid'}= 0; # Assume we are done with it + } + } } - mtr_mysqladmin_shutdown(\@args, 20); + # Wait for all the admin processes to complete + mtr_wait_blocking(\%admin_pids); + + # If we trusted "mysqladmin --shutdown_timeout= ..." we could just + # terminate now, but we don't (FIXME should be debugged). + # So we try again to ping and at least wait the same amount of time + # mysqladmin would for all to die. + + mtr_ping_with_timeout(\@kill_pids); # We now have tried to terminate nice. We have waited for the listen # port to be free, but can't really tell if the mysqld process died @@ -401,6 +441,8 @@ sub mtr_kill_leftovers () { # FIXME $path_run_dir or something my $rundir= "$::opt_vardir/run"; + mtr_debug("Processing PID files in directory '$rundir'..."); + if ( -d $rundir ) { opendir(RUNDIR, $rundir) @@ -414,26 +456,32 @@ sub mtr_kill_leftovers () { if ( -f $pidfile ) { - my $pid= mtr_get_pid_from_file($pidfile); + mtr_debug("Processing PID file: '$pidfile'..."); - # Race, could have been removed between I tested with -f - # and the unlink() below, so I better check again with -f + my $pid= mtr_get_pid_from_file($pidfile); - if ( ! unlink($pidfile) and -f $pidfile ) - { - mtr_error("can't remove $pidfile"); - } + mtr_debug("Got pid: $pid from file '$pidfile'"); if ( $::glob_cygwin_perl or kill(0, $pid) ) { + mtr_debug("There is process with pid $pid -- scheduling for kill."); push(@pids, $pid); # We know (cygwin guess) it exists } + else + { + mtr_debug("There is no process with pid $pid -- skipping."); + } } } closedir(RUNDIR); if ( @pids ) { + mtr_debug("Killing the following processes with PID files: " . + join(' ', @pids) . "..."); + + start_reap_all(); + if ( $::glob_cygwin_perl ) { # We have no (easy) way of knowing the Cygwin controlling @@ -447,8 +495,9 @@ sub mtr_kill_leftovers () { my $retries= 10; # 10 seconds do { + mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids)); kill(9, @pids); - mtr_debug("Sleep 1 second waiting for processes to die"); + mtr_report("Sleep 1 second waiting for processes to die"); sleep(1) # Wait one second } while ( $retries-- and kill(0, @pids) ); @@ -457,56 +506,74 @@ sub mtr_kill_leftovers () { mtr_warning("can't kill process(es) " . join(" ", @pids)); } } + + stop_reap_all(); } } + else + { + mtr_debug("Directory for PID files ($rundir) does not exist."); + } - # We may have failed everything, bug we now check again if we have + # We may have failed everything, but we now check again if we have # the listen ports free to use, and if they are free, just go for it. - foreach my $srv ( @args ) + mtr_debug("Checking known mysqld servers..."); + + foreach my $srv ( @kill_pids ) { - if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) + if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) ) { - mtr_warning("can't kill old mysqld holding port $srv->{'port'}"); + mtr_warning("can't kill old process holding port $srv->{'port'}"); } } -} -############################################################################## -# -# Shut down mysqld servers we have started from this run of this script -# -############################################################################## + mtr_debug("mtr_kill_leftovers(): finished."); +} -# To speed things we kill servers in parallel. The argument is a list -# of 'ports', 'pids', 'pidfiles' and 'socketfiles'. +# Check that all processes in list are killed +# The argument is a list of 'ports', 'pids', 'pidfiles' and 'socketfiles' +# for which shutdown has been started. Make sure they all get killed +# in one way or the other. +# # FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and -# $srv->{'pidfile'} will not be the same PID. We need to try to kill +# the pid in $srv->{'pidfile'} will not be the same PID. We need to try to kill # both I think. -sub mtr_stop_mysqld_servers ($) { +sub mtr_check_stop_servers ($) { my $spec= shift; - # ---------------------------------------------------------------------- - # First try nice normal shutdown using 'mysqladmin' - # ---------------------------------------------------------------------- + # Return if no processes are defined + return if ! @$spec; - # Shutdown time must be high as slave may be in reconnect - mtr_mysqladmin_shutdown($spec, 70); + #mtr_report("mtr_check_stop_servers"); + + mtr_ping_with_timeout(\@$spec); # ---------------------------------------------------------------------- # We loop with waitpid() nonblocking to see how many of the ones we - # are to kill, actually got killed by mtr_mysqladmin_shutdown(). - # Note that we don't rely on this, the mysqld server might have stop + # are to kill, actually got killed by mysqladmin or ndb_mgm + # + # Note that we don't rely on this, the mysqld server might have stopped # listening to the port, but still be alive. But it is a start. # ---------------------------------------------------------------------- foreach my $srv ( @$spec ) { - if ( $srv->{'pid'} and (waitpid($srv->{'pid'},&WNOHANG) == $srv->{'pid'}) ) + my $ret_pid; + if ( $srv->{'pid'} ) { - $srv->{'pid'}= 0; + $ret_pid= waitpid($srv->{'pid'},&WNOHANG); + if ($ret_pid == $srv->{'pid'}) + { + mtr_verbose("Caught exit of process $ret_pid"); + $srv->{'pid'}= 0; + } + else + { + # mtr_warning("caught exit of unknown child $ret_pid"); + } } } @@ -540,13 +607,12 @@ sub mtr_stop_mysqld_servers ($) { } # ---------------------------------------------------------------------- - # If the processes where started from this script, and we had no PIDS + # If all the processes in list already have been killed, # then we don't have to do anything. # ---------------------------------------------------------------------- if ( ! keys %mysqld_pids ) { - # cluck "This is how we got here!"; return; } @@ -595,139 +661,288 @@ sub mtr_stop_mysqld_servers ($) { foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'}) { # Know it is dead so should be no race, careful anyway - if ( -f $file and ! unlink($file) and -f $file ) + if ( defined $file and -f $file and ! unlink($file) and -f $file ) { $errors++; mtr_warning("couldn't delete $file"); } } + $srv->{'pid'}= 0; } } } if ( $errors ) { - # We are in trouble, just die.... - mtr_error("we could not kill or clean up all processes"); + # There where errors killing processes + # do one last attempt to ping the servers + # and if they can't be pinged, assume they are dead + if ( ! mtr_ping_with_timeout( \@$spec ) ) + { + mtr_error("we could not kill or clean up all processes"); + } + else + { + mtr_verbose("All ports were free, continuing"); + } } } # FIXME We just assume they are all dead, for Cygwin we are not # really sure - + } +# Wait for all the process in the list to terminate +sub mtr_wait_blocking($) { + my $admin_pids= shift; -############################################################################## -# -# Shut down mysqld servers using "mysqladmin ... shutdown". -# To speed this up, we start them in parallel and use waitpid() to -# catch their termination. Note that this doesn't say the servers -# are terminated, just that 'mysqladmin' is terminated. -# -# Note that mysqladmin will ask the server about what PID file it uses, -# and mysqladmin will wait for it to be removed before it terminates -# (unless passes timeout). -# -# This function will take at most about 20 seconds, and we still are not -# sure we killed them all. If none is responding to ping, we return 1, -# else we return 0. -# -############################################################################## -sub mtr_mysqladmin_shutdown { - my $spec= shift; + # Return if no processes defined + return if ! %$admin_pids; + + mtr_verbose("mtr_wait_blocking"); + + # Wait for all the started processes to exit + # As mysqladmin is such a simple program, we trust it to terminate itself. + # I.e. we wait blocking, and wait for them all before we go on. + foreach my $pid (keys %{$admin_pids}) + { + my $ret_pid= waitpid($pid,0); + + } +} + +# Start "mysqladmin shutdown" for a specific mysqld +sub mtr_mysqladmin_start($$$) { + my $srv= shift; + my $command= shift; my $adm_shutdown_tmo= shift; - my %mysql_admin_pids; - my @to_kill_specs; + my $args; + mtr_init_args(\$args); - foreach my $srv ( @$spec ) + mtr_add_arg($args, "--no-defaults"); + mtr_add_arg($args, "--user=%s", $::opt_user); + mtr_add_arg($args, "--password="); + mtr_add_arg($args, "--silent"); + if ( -e $srv->{'path_sock'} ) + { + mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'}); + } + if ( $srv->{'port'} ) { - if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) + mtr_add_arg($args, "--port=%s", $srv->{'port'}); + } + if ( $srv->{'port'} and ! -e $srv->{'path_sock'} ) + { + mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket + } + mtr_add_arg($args, "--connect_timeout=5"); + + # Shutdown time must be high as slave may be in reconnect + mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo"); + mtr_add_arg($args, "$command"); + my $path_mysqladmin_log= "$::opt_vardir/log/mysqladmin.log"; + my $pid= mtr_spawn($::exe_mysqladmin, $args, + "", $path_mysqladmin_log, $path_mysqladmin_log, "", + { append_log_file => 1 }); + mtr_verbose("mtr_mysqladmin_start, pid: $pid"); + return $pid; + +} + +# Start "ndb_mgm shutdown" for a specific cluster, it will +# shutdown all data nodes and leave the ndb_mgmd running +sub mtr_ndbmgm_start($$) { + my $cluster= shift; + my $command= shift; + + my $args; + + mtr_init_args(\$args); + + mtr_add_arg($args, "--no-defaults"); + mtr_add_arg($args, "--core"); + mtr_add_arg($args, "--try-reconnect=1"); + mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'}); + mtr_add_arg($args, "-e"); + mtr_add_arg($args, "$command"); + + my $pid= mtr_spawn($::exe_ndb_mgm, $args, + "", "/dev/null", "/dev/null", "", + {}); + mtr_verbose("mtr_ndbmgm_start, pid: $pid"); + return $pid; + +} + + +# Ping all servers in list, exit when none of them answers +# or when timeout has passed +sub mtr_ping_with_timeout($) { + my $spec= shift; + my $timeout= 200; # 20 seconds max + my $res= 1; # If we just fall through, we are done + # in the sense that the servers don't + # listen to their ports any longer + + mtr_debug("Waiting for mysqld servers to stop..."); + + TIME: + while ( $timeout-- ) + { + foreach my $srv ( @$spec ) { - push(@to_kill_specs, $srv); + $res= 1; # We are optimistic + if ( $srv->{'pid'} and defined $srv->{'port'} ) + { + if ( mtr_ping_port($srv->{'port'}) ) + { + mtr_verbose("waiting for process $srv->{'pid'} to stop ". + "using port $srv->{'port'}"); + + # Millisceond sleep emulated with select + select(undef, undef, undef, (0.1)); + $res= 0; + next TIME; + } + else + { + # Process was not using port + } + } } + last; # If we got here, we are done } - - foreach my $srv ( @to_kill_specs ) + if ($res) + { + mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down."); + } + else { - # FIXME wrong log..... - # FIXME, stderr..... - # Shutdown time must be high as slave may be in reconnect - my $args; + mtr_report("mtr_ping_with_timeout(): At least one server is alive."); + } - mtr_init_args(\$args); + return $res; +} + + +# +# Loop through our list of processes and look for and entry +# with the provided pid +# Set the pid of that process to 0 if found +# +sub mark_process_dead($) +{ + my $ret_pid= shift; - mtr_add_arg($args, "--no-defaults"); - mtr_add_arg($args, "--user=%s", $::opt_user); - mtr_add_arg($args, "--password="); - if ( -e $srv->{'sockfile'} ) + foreach my $mysqld (@{$::master}, @{$::slave}) + { + if ( $mysqld->{'pid'} eq $ret_pid ) { - mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'}); + mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); + $mysqld->{'pid'}= 0; + return; } - if ( $srv->{'port'} ) + } + + foreach my $cluster (@{$::clusters}) + { + if ( $cluster->{'pid'} eq $ret_pid ) { - mtr_add_arg($args, "--port=%s", $srv->{'port'}); + mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid"); + $cluster->{'pid'}= 0; + return; } - if ( $srv->{'port'} and ! -e $srv->{'sockfile'} ) + + foreach my $ndbd (@{$cluster->{'ndbds'}}) { - mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket + if ( $ndbd->{'pid'} eq $ret_pid ) + { + mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid"); + $ndbd->{'pid'}= 0; + return; + } } - mtr_add_arg($args, "--connect_timeout=5"); - mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo"); - mtr_add_arg($args, "shutdown"); - # We don't wait for termination of mysqladmin - my $pid= mtr_spawn($::exe_mysqladmin, $args, - "", $::path_manager_log, $::path_manager_log, "", - { append_log_file => 1 }); - $mysql_admin_pids{$pid}= 1; } + mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid"); + +} + +# +# Loop through our list of processes and look for and entry +# with the provided pid, if found check for the file indicating +# expected crash and restart it. +# +sub check_expected_crash_and_restart($) +{ + my $ret_pid= shift; - # As mysqladmin is such a simple program, we trust it to terminate. - # I.e. we wait blocking, and wait wait for them all before we go on. - while (keys %mysql_admin_pids) + foreach my $mysqld (@{$::master}, @{$::slave}) { - foreach my $pid (keys %mysql_admin_pids) + if ( $mysqld->{'pid'} eq $ret_pid ) { - if ( waitpid($pid,0) > 0 ) + mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid"); + $mysqld->{'pid'}= 0; + + # Check if crash expected and restart if it was + my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" . + "$mysqld->{'idx'}" . ".expect"; + if ( -f $expect_file ) { - delete $mysql_admin_pids{$pid}; + mtr_verbose("Crash was expected, file $expect_file exists"); + mysqld_start($mysqld, $mysqld->{'start_opts'}, + $mysqld->{'start_slave_master_info'}); + unlink($expect_file); } + + return; } } - # If we trusted "mysqladmin --shutdown_timeout= ..." we could just - # terminate now, but we don't (FIXME should be debugged). - # So we try again to ping and at least wait the same amount of time - # mysqladmin would for all to die. - - my $timeout= 20; # 20 seconds max - my $res= 1; # If we just fall through, we are done - # in the sense that the servers don't - # listen to their ports any longer - TIME: - while ( $timeout-- ) + foreach my $cluster (@{$::clusters}) { - foreach my $srv ( @to_kill_specs ) + if ( $cluster->{'pid'} eq $ret_pid ) { - $res= 1; # We are optimistic - if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) + mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid"); + $cluster->{'pid'}= 0; + + # Check if crash expected and restart if it was + my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" . + ".expect"; + if ( -f $expect_file ) { - mtr_debug("Sleep 1 second waiting for processes to stop using port"); - sleep(1); # One second - $res= 0; - next TIME; + mtr_verbose("Crash was expected, file $expect_file exists"); + ndbmgmd_start($cluster); + unlink($expect_file); } + return; } - last; # If we got here, we are done - } - - $timeout or mtr_debug("At least one server is still listening to its port"); - sleep(5) if $::glob_win32; # FIXME next startup fails if no sleep + foreach my $ndbd (@{$cluster->{'ndbds'}}) + { + if ( $ndbd->{'pid'} eq $ret_pid ) + { + mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid"); + $ndbd->{'pid'}= 0; + + # Check if crash expected and restart if it was + my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" . + "$ndbd->{'idx'}" . ".expect"; + if ( -f $expect_file ) + { + mtr_verbose("Crash was expected, file $expect_file exists"); + ndbd_start($cluster, $ndbd->{'idx'}, + $ndbd->{'start_extra_args'}); + unlink($expect_file); + } + return; + } + } + } + mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid"); - return $res; } ############################################################################## @@ -740,32 +955,18 @@ sub mtr_mysqladmin_shutdown { sub mtr_record_dead_children () { + my $process_died= 0; my $ret_pid; - # FIXME the man page says to wait for -1 to terminate, - # but on OS X we get '0' all the time... - while ( ($ret_pid= waitpid(-1,&WNOHANG)) > 0 ) + # Wait without blockinng to see if any processes had died + # -1 or 0 means there are no more procesess to wait for + while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1) { - mtr_debug("waitpid() catched exit of child $ret_pid"); - foreach my $idx (0..1) - { - if ( $::master->[$idx]->{'pid'} eq $ret_pid ) - { - mtr_debug("child $ret_pid was master[$idx]"); - $::master->[$idx]->{'pid'}= 0; - } - } - - foreach my $idx (0..2) - { - if ( $::slave->[$idx]->{'pid'} eq $ret_pid ) - { - mtr_debug("child $ret_pid was slave[$idx]"); - $::slave->[$idx]->{'pid'}= 0; - last; - } - } + mtr_warning("mtr_record_dead_children: $ret_pid"); + mark_process_dead($ret_pid); + $process_died= 1; } + return $process_died; } sub start_reap_all { @@ -777,16 +978,24 @@ sub start_reap_all { # here. If a process terminated before setting $SIG{CHLD} (but after # any attempt to waitpid() it), it will still be a zombie. So we # have to handle any such process here. - while(waitpid(-1, &WNOHANG) > 0) { }; + my $pid; + while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1) + { + mtr_warning("start_reap_all pid: $pid"); + mark_process_dead($pid); + }; } sub stop_reap_all { $SIG{CHLD}= 'DEFAULT'; } -sub mtr_ping_mysqld_server () { + +sub mtr_ping_port ($) { my $port= shift; + mtr_verbose("mtr_ping_port: $port"); + my $remote= "localhost"; my $iaddr= inet_aton($remote); if ( ! $iaddr ) @@ -799,13 +1008,18 @@ sub mtr_ping_mysqld_server () { { mtr_error("can't create socket: $!"); } + + mtr_debug("Pinging server (port: $port)..."); + if ( connect(SOCK, $paddr) ) { close(SOCK); # FIXME check error? + mtr_verbose("USED"); return 1; } else { + mtr_verbose("FREE"); return 0; } } @@ -822,30 +1036,36 @@ sub sleep_until_file_created ($$$) { my $pidfile= shift; my $timeout= shift; my $pid= shift; + my $sleeptime= 100; # Milliseconds + my $loops= ($timeout * 1000) / $sleeptime; - for ( my $loop= 1; $loop <= $timeout; $loop++ ) + for ( my $loop= 1; $loop <= $loops; $loop++ ) { if ( -r $pidfile ) { return $pid; } - # Check if it died after the fork() was successful - if ( waitpid($pid,&WNOHANG) == $pid ) + # Check if it died after the fork() was successful + if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid ) { + mtr_warning("Process $pid died"); return 0; } - mtr_debug("Sleep 1 second waiting for creation of $pidfile"); + mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile"); - if ( $loop % 60 == 0 ) + # Print extra message every 60 seconds + my $seconds= ($loop * $sleeptime) / 1000; + if ( $seconds > 1 and int($seconds) % 60 == 0 ) { - my $left= $timeout - $loop; - mtr_warning("Waited $loop seconds for $pidfile to be created, " . + my $left= $timeout - $seconds; + mtr_warning("Waited $seconds seconds for $pidfile to be created, " . "still waiting for $left seconds..."); } - sleep(1); + # Millisceond sleep emulated with select + select(undef, undef, undef, ($sleeptime/1000)); } return 0; @@ -855,18 +1075,18 @@ sub sleep_until_file_created ($$$) { sub mtr_kill_processes ($) { my $pids = shift; - foreach my $sig (15,9) + mtr_verbose("mtr_kill_processes " . join(" ", @$pids)); + + foreach my $pid (@$pids) { - my $retries= 20; # FIXME 20 seconds, this is silly! - kill($sig, @{$pids}); - while ( $retries-- and kill(0, @{$pids}) ) + foreach my $sig (15, 9) { - mtr_debug("Sleep 1 second waiting for processes to die"); - sleep(1) # Wait one second + last if mtr_im_kill_process([ $pid ], $sig, 10, 1); } } } + ############################################################################## # # When we exit, we kill off all children @@ -876,7 +1096,7 @@ sub mtr_kill_processes ($) { # FIXME something is wrong, we sometimes terminate with "Hangup" written # to tty, and no STDERR output telling us why. -# FIXME for some readon, setting HUP to 'IGNORE' will cause exit() to +# FIXME for some reason, setting HUP to 'IGNORE' will cause exit() to # write out "Hangup", and maybe loose some output. We insert a sleep... sub mtr_exit ($) { @@ -884,9 +1104,18 @@ sub mtr_exit ($) { # cluck("Called mtr_exit()"); mtr_timer_stop_all($::glob_timers); local $SIG{HUP} = 'IGNORE'; - kill('HUP', -$$); - sleep 2; + # ToDo: Signalling -$$ will only work if we are the process group + # leader (in fact on QNX it will signal our session group leader, + # which might be Do-compile or Pushbuild, causing tests to be + # aborted). So we only do it if we are the group leader. We might + # set ourselves as the group leader at startup (with + # POSIX::setpgrp(0,0)), but then care must be needed to always do + # proper child process cleanup. + kill('HUP', -$$) if !$::glob_win32_perl and $$ == getpgrp(); + exit($code); } +########################################################################### + 1; |