summaryrefslogtreecommitdiff
path: root/mysql-test/lib/mtr_process.pl
diff options
context:
space:
mode:
authorunknown <kent@mysql.com>2005-02-03 21:13:27 +0100
committerunknown <kent@mysql.com>2005-02-03 21:13:27 +0100
commit044f9e8227d1aa1e41e83a3a89e162b429a6646b (patch)
tree955cc0efc6c8a116ae643539da0919be43195ade /mysql-test/lib/mtr_process.pl
parente244a3a9b4a9d9fd9dad63338ead667cf4b9a959 (diff)
downloadmariadb-git-044f9e8227d1aa1e41e83a3a89e162b429a6646b.tar.gz
mysql-test-run.pl:
Let --start-and-exit actually start a server Added that test case names can be specified on the comman line Added embedded server support Added environment variables UMASK, UMASK_DIR Added missing MASTER_MYSOCK1, MASTER_MYPORT1, USE_RUNNING_SERVER Added missing CHARSETSDIR, MYSQL_FIX_SYSTEM_TABLES, MYSQL_CLIENT_TEST Pass on return value from sleep_until_file_created(), to fail test Fail test if early termination of mysqld servers Create intial databases for the second master, and two additional slaves mtr_process.pl: Find out if port is still in use, using simple TCP connect Use non blocking waitpid() to catch terminations early Make a special case spawning the 'mysqltest' application Redo the fork() if it returns EAGAIN Make sure to record if master or slave terminated Improved debugging output Improved code that remove PID files to avoid race Abort if we can't stop all mysqld servers using our ports Many improvements in killing mysqld servers Let sleep_until_file_created() catch if server died early mtr_report.pl: Added option to disable test cases using <testcase>.disabled file If --timer, only try to open file with time data if it exists mtr_io.pl: Remove starting/ending space reading server options from file mysql-test/lib/mtr_io.pl: Remove starting/ending space reading server options from file mysql-test/lib/mtr_report.pl: Added option to disable test cases using <testcase>.disabled file If --timer, only try to open file with time data if it exists mysql-test/lib/mtr_process.pl: Find out if port is still in use, using simple TCP connect Use non blocking waitpid() to catch terminations early Make a special case spawning the 'mysqltest' application Redo the fork() if it returns EAGAIN Make sure to record if master or slave terminated Improved debugging output Improved code that remove PID files to avoid race Abort if we can't stop all mysqld servers using our ports Many improvements in killing mysqld servers Let sleep_until_file_created() catch if server died early mysql-test/mysql-test-run.pl: Let --start-and-exit actually start a server Added that test case names can be specified on the comman line Added embedded server support Added environment variables UMASK, UMASK_DIR Added missing MASTER_MYSOCK1, MASTER_MYPORT1, USE_RUNNING_SERVER Added missing CHARSETSDIR, MYSQL_FIX_SYSTEM_TABLES, MYSQL_CLIENT_TEST Pass on return value from sleep_until_file_created(), to fail test Fail test if early termination of mysqld servers Create intial databases for the second master, and two additional slaves
Diffstat (limited to 'mysql-test/lib/mtr_process.pl')
-rw-r--r--mysql-test/lib/mtr_process.pl672
1 files changed, 465 insertions, 207 deletions
diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl
index e832468d0cb..e1461a9730c 100644
--- a/mysql-test/lib/mtr_process.pl
+++ b/mysql-test/lib/mtr_process.pl
@@ -5,14 +5,19 @@
# same name.
#use Carp qw(cluck);
+use Socket;
+use Errno;
use strict;
-use POSIX ":sys_wait_h";
+#use POSIX ":sys_wait_h";
+use POSIX 'WNOHANG';
sub mtr_run ($$$$$$);
sub mtr_spawn ($$$$$$);
-sub mtr_stop_mysqld_servers ($$);
+sub mtr_stop_mysqld_servers ($);
sub mtr_kill_leftovers ();
+sub mtr_record_dead_children ();
+sub sleep_until_file_created ($$$);
# static in C
sub spawn_impl ($$$$$$$);
@@ -34,7 +39,18 @@ sub mtr_run ($$$$$$) {
my $error= shift;
my $pid_file= shift;
- return spawn_impl($path,$arg_list_t,1,$input,$output,$error,$pid_file);
+ return spawn_impl($path,$arg_list_t,'run',$input,$output,$error,$pid_file);
+}
+
+sub mtr_run_test ($$$$$$) {
+ my $path= shift;
+ my $arg_list_t= shift;
+ my $input= shift;
+ my $output= shift;
+ my $error= shift;
+ my $pid_file= shift;
+
+ return spawn_impl($path,$arg_list_t,'test',$input,$output,$error,$pid_file);
}
sub mtr_spawn ($$$$$$) {
@@ -45,7 +61,7 @@ sub mtr_spawn ($$$$$$) {
my $error= shift;
my $pid_file= shift;
- return spawn_impl($path,$arg_list_t,0,$input,$output,$error,$pid_file);
+ return spawn_impl($path,$arg_list_t,'spawn',$input,$output,$error,$pid_file);
}
@@ -58,7 +74,7 @@ sub mtr_spawn ($$$$$$) {
sub spawn_impl ($$$$$$$) {
my $path= shift;
my $arg_list_t= shift;
- my $join= shift;
+ my $mode= shift;
my $input= shift;
my $output= shift;
my $error= shift;
@@ -71,107 +87,203 @@ sub spawn_impl ($$$$$$$) {
print STDERR "#### ", "STDIN $input\n" if $input;
print STDERR "#### ", "STDOUT $output\n" if $output;
print STDERR "#### ", "STDERR $error\n" if $error;
- if ( $join )
- {
- print STDERR "#### ", "RUN ";
- }
- else
- {
- print STDERR "#### ", "SPAWN ";
- }
- print STDERR "$path ", join(" ",@$arg_list_t), "\n";
+ print STDERR "#### ", "$mode : $path ", join(" ",@$arg_list_t), "\n";
print STDERR "#### ", "-" x 78, "\n";
}
- my $pid= fork();
- if ( ! defined $pid )
+ FORK:
{
- mtr_error("$path ($pid) can't be forked");
- }
+ my $pid= fork();
- if ( $pid )
- {
- # Parent, i.e. the main script
- if ( $join )
+ if ( ! defined $pid )
{
- # We run a command and wait for the result
- # FIXME this need to be improved
- my $res= waitpid($pid,0);
-
- if ( $res == -1 )
+ if ( $! == $!{EAGAIN} ) # See "perldoc Errno"
{
- mtr_error("$path ($pid) got lost somehow");
+ mtr_debug("Got EAGAIN from fork(), sleep 1 second and redo");
+ sleep(1);
+ redo FORK;
}
- my $exit_value= $? >> 8;
- my $signal_num= $? & 127;
- my $dumped_core= $? & 128;
- if ( $signal_num )
- {
- mtr_error("$path ($pid) got signal $signal_num");
- }
- if ( $dumped_core )
+ else
{
- mtr_error("$path ($pid) dumped core");
+ mtr_error("$path ($pid) can't be forked");
}
- return $exit_value;
+ }
+
+ if ( $pid )
+ {
+ spawn_parent_impl($pid,$mode,$path);
}
else
{
- # We spawned a process we don't wait for
- return $pid;
+ # Child, redirect output and exec
+ # FIXME I tried POSIX::setsid() here to detach and, I hoped,
+ # avoid zombies. But everything went wild, somehow the parent
+ # became a deamon as well, and was hard to kill ;-)
+ # Need to catch SIGCHLD and do waitpid or something instead......
+
+ $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't
+
+ if ( $output )
+ {
+ if ( ! open(STDOUT,">",$output) )
+ {
+ mtr_error("can't redirect STDOUT to \"$output\": $!");
+ }
+ }
+ if ( $error )
+ {
+ if ( $output eq $error )
+ {
+ if ( ! open(STDERR,">&STDOUT") )
+ {
+ mtr_error("can't dup STDOUT: $!");
+ }
+ }
+ else
+ {
+ if ( ! open(STDERR,">",$error) )
+ {
+ mtr_error("can't redirect STDERR to \"$output\": $!");
+ }
+ }
+ }
+ if ( $input )
+ {
+ if ( ! open(STDIN,"<",$input) )
+ {
+ mtr_error("can't redirect STDIN to \"$input\": $!");
+ }
+ }
+ exec($path,@$arg_list_t);
}
}
- else
- {
- # Child, redirect output and exec
- # FIXME I tried POSIX::setsid() here to detach and, I hoped,
- # avoid zombies. But everything went wild, somehow the parent
- # became a deamon as well, and was hard to kill ;-)
- # Need to catch SIGCHLD and do waitpid or something instead......
+}
+
+
+sub spawn_parent_impl {
+ my $pid= shift;
+ my $mode= shift;
+ my $path= shift;
- $SIG{INT}= 'DEFAULT'; # Parent do some stuff, we don't
+ if ( $mode eq 'run' or $mode eq 'test' )
+ {
+ my $exit_value= -1;
+ my $signal_num= 0;
+ my $dumped_core= 0;
- if ( $output )
+ if ( $mode eq 'run' )
{
- if ( ! open(STDOUT,">",$output) )
+ # Simple run of command, we wait for it to return
+ my $ret_pid= waitpid($pid,0);
+
+ if ( $ret_pid <= 0 )
{
- mtr_error("can't redirect STDOUT to \"$output\": $!");
+ mtr_error("$path ($pid) got lost somehow");
}
+
+ $exit_value= $? >> 8;
+ $signal_num= $? & 127;
+ $dumped_core= $? & 128;
+
+ return $exit_value;
}
- if ( $error )
+ else
{
- if ( $output eq $error )
+ # We run mysqltest and wait for it to return. But we try to
+ # catch dying mysqld processes as well.
+ #
+ # We do blocking waitpid() until we get the return from the
+ # "mysqltest" call. But if a mysqld process dies that we
+ # started, we take this as an error, and kill mysqltest.
+ #
+ # FIXME is this as it should be? Can't mysqld terminate
+ # normally from running a test case?
+
+ my $ret_pid; # What waitpid() returns
+
+ while ( ($ret_pid= waitpid(-1,0)) != -1 )
{
- if ( ! open(STDERR,">&STDOUT") )
+ # Someone terminated, don't know who. Collect
+ # status info first before $? is lost,
+ # but not $exit_value, this is flagged from
+ #
+
+ if ( $ret_pid == $pid )
{
- mtr_error("can't dup STDOUT: $!");
+ # We got termination of mysqltest, we are done
+ $exit_value= $? >> 8;
+ $signal_num= $? & 127;
+ $dumped_core= $? & 128;
+ last;
}
- }
- else
- {
- if ( ! open(STDERR,">",$error) )
+
+ # If one of the mysqld processes died, we want to
+ # mark this, and kill the mysqltest process.
+
+ foreach my $idx (0..1)
{
- mtr_error("can't redirect STDERR to \"$output\": $!");
+ if ( $::master->[$idx]->{'pid'} eq $ret_pid )
+ {
+ mtr_debug("child $ret_pid was master[$idx], " .
+ "exit during mysqltest run");
+ $::master->[$idx]->{'pid'}= 0;
+ last;
+ }
}
+
+ foreach my $idx (0..2)
+ {
+ if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
+ {
+ mtr_debug("child $ret_pid was slave[$idx], " .
+ "exit during mysqltest run");
+ $::slave->[$idx]->{'pid'}= 0;
+ last;
+ }
+ }
+
+ mtr_debug("waitpid() catched exit of unknown child $ret_pid, " .
+ "exit during mysqltest run");
}
- }
- if ( $input )
- {
- if ( ! open(STDIN,"<",$input) )
+
+ if ( $ret_pid != $pid )
{
- mtr_error("can't redirect STDIN to \"$input\": $!");
+ # We terminated the waiting because a "mysqld" process died.
+ # Kill the mysqltest process.
+
+ kill(9,$pid);
+
+ $ret_pid= waitpid($pid,0);
+
+ if ( $ret_pid == -1 )
+ {
+ mtr_error("$path ($pid) got lost somehow");
+ }
}
+
+ return $exit_value;
}
- exec($path,@$arg_list_t);
+ }
+ else
+ {
+ # We spawned a process we don't wait for
+ return $pid;
}
}
+
+
##############################################################################
#
# Kill processes left from previous runs
#
##############################################################################
+# We just "ping" on the ports, and if we can't do a socket connect
+# we assume the server is dead. So we don't *really* know a server
+# is dead, we just hope that it after letting the listen port go,
+# it is dead enough for us to start a new server.
+
sub mtr_kill_leftovers () {
# First, kill all masters and slaves that would conflict with
@@ -199,10 +311,23 @@ sub mtr_kill_leftovers () {
});
}
- mtr_stop_mysqld_servers(\@args, 1);
+ mtr_mysqladmin_shutdown(\@args);
+
+ # We now have tried to terminate nice. We have waited for the listen
+ # port to be free, but can't really tell if the mysqld process died
+ # or not. We now try to find the process PID from the PID file, and
+ # send a kill to that process. Note that Perl let kill(0,@pids) be
+ # a way to just return the numer of processes the kernel can send
+ # signals to. So this can be used (except on Cygwin) to determine
+ # if there are processes left running that we cound out might exists.
+ #
+ # But still after all this work, all we know is that we have
+ # the ports free.
# We scan the "var/run/" directory for other process id's to kill
- my $rundir= "$::glob_mysql_test_dir/var/run"; # FIXME $path_run_dir or something
+
+ # FIXME $path_run_dir or something
+ my $rundir= "$::glob_mysql_test_dir/var/run";
if ( -d $rundir )
{
@@ -218,193 +343,157 @@ sub mtr_kill_leftovers () {
if ( -f $pidfile )
{
my $pid= mtr_get_pid_from_file($pidfile);
- if ( ! unlink($pidfile) )
+
+ # Race, could have been removed between I tested with -f
+ # and the unlink() below, so I better check again with -f
+
+ if ( ! unlink($pidfile) and -f $pidfile )
{
mtr_error("can't remove $pidfile");
}
- push(@pids, $pid);
+
+ if ( $::glob_cygwin_perl or kill(0, $pid) )
+ {
+ push(@pids, $pid); # We know (cygwin guess) it exists
+ }
}
}
closedir(RUNDIR);
- start_reap_all();
-
- if ( $::glob_cygwin_perl )
+ if ( @pids )
{
- # We have no (easy) way of knowing the Cygwin controlling
- # process, in the PID file we only have the Windows process id.
- system("kill -f " . join(" ",@pids)); # Hope for the best....
- }
- else
- {
- my $retries= 10; # 10 seconds
- do
+ if ( $::glob_cygwin_perl )
{
- kill(9, @pids);
- } while ( $retries-- and kill(0, @pids) );
-
- if ( kill(0, @pids) )
+ # We have no (easy) way of knowing the Cygwin controlling
+ # process, in the PID file we only have the Windows process id.
+ system("kill -f " . join(" ",@pids)); # Hope for the best....
+ mtr_debug("Sleep 5 seconds waiting for processes to die");
+ sleep(5);
+ }
+ else
{
- mtr_error("can't kill processes " . join(" ", @pids));
+ my $retries= 10; # 10 seconds
+ do
+ {
+ kill(9, @pids);
+ mtr_debug("Sleep 1 second waiting for processes to die");
+ sleep(1) # Wait one second
+ } while ( $retries-- and kill(0, @pids) );
+
+ if ( kill(0, @pids) ) # Check if some left
+ {
+ # FIXME maybe just mtr_warning() ?
+ mtr_error("can't kill process(es) " . join(" ", @pids));
+ }
}
}
+ }
+
+ # We may have failed everything, bug we now check again if we have
+ # the listen ports free to use, and if they are free, just go for it.
- stop_reap_all();
+ foreach my $srv ( @args )
+ {
+ if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+ {
+ mtr_error("can't kill old mysqld holding port $srv->{'port'}");
+ }
}
}
##############################################################################
#
-# Shut down mysqld servers
+# Shut down mysqld servers we have started from this run of this script
#
##############################################################################
-# To speed things we kill servers in parallel.
-# The argument is a list of 'pidfiles' and 'socketfiles'.
-# We use the pidfiles and socketfiles to try to terminate the servers.
-# This is not perfect, there could still be other server processes
-# left.
-
-# Force flag is to be set only for killing mysqld servers this script
-# didn't create in this run, i.e. initial cleanup before we start working.
-# If force flag is set, we try to kill all with mysqladmin, and
-# give up if we have no PIDs.
+# To speed things we kill servers in parallel. The argument is a list
+# of 'ports', 'pids', 'pidfiles' and 'socketfiles'.
-# FIXME On some operating systems, $srv->{'pid'} and $srv->{'pidfile'}
-# will not be the same PID. We need to try to kill both I think.
+# FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and
+# $srv->{'pidfile'} will not be the same PID. We need to try to kill
+# both I think.
-sub mtr_stop_mysqld_servers ($$) {
+sub mtr_stop_mysqld_servers ($) {
my $spec= shift;
- my $force= shift;
# ----------------------------------------------------------------------
- # If the process was not started from this file, we got no PID,
- # we try to find it in the PID file.
+ # First try nice normal shutdown using 'mysqladmin'
# ----------------------------------------------------------------------
- my $any_pid= 0; # If we have any PIDs
+ mtr_mysqladmin_shutdown($spec);
+
+ # ----------------------------------------------------------------------
+ # We loop with waitpid() nonblocking to see how many of the ones we
+ # are to kill, actually got killed by mtr_mysqladmin_shutdown().
+ # Note that we don't rely on this, the mysqld server might have stop
+ # listening to the port, but still be alive. But it is a start.
+ # ----------------------------------------------------------------------
foreach my $srv ( @$spec )
{
- if ( ! $srv->{'pid'} and -f $srv->{'pidfile'} )
- {
- $srv->{'pid'}= mtr_get_pid_from_file($srv->{'pidfile'});
- }
- if ( $srv->{'pid'} )
+ if ( $srv->{'pid'} and (waitpid($srv->{'pid'},&WNOHANG) == $srv->{'pid'}) )
{
- $any_pid= 1;
+ $srv->{'pid'}= 0;
}
}
- # If the processes where started from this script, and we know
- # no PIDs, then we don't have to do anything.
-
- if ( ! $any_pid and ! $force )
- {
- # cluck "This is how we got here!";
- return;
- }
-
# ----------------------------------------------------------------------
- # First try nice normal shutdown using 'mysqladmin'
+ # We know the process was started from this file, so there is a PID
+ # saved, or else we have nothing to do.
+ # Might be that is is recorded to be missing, but we failed to
+ # take away the PID file earlier, then we do it now.
# ----------------------------------------------------------------------
- start_reap_all(); # Don't require waitpid() of children
+ my %mysqld_pids;
foreach my $srv ( @$spec )
{
- if ( -e $srv->{'sockfile'} or $srv->{'port'} )
+ if ( $srv->{'pid'} )
{
- # FIXME wrong log.....
- # FIXME, stderr.....
- # Shutdown time must be high as slave may be in reconnect
- my $args;
-
- mtr_init_args(\$args);
-
- mtr_add_arg($args, "--no-defaults");
- mtr_add_arg($args, "--user=%s", $::opt_user);
- mtr_add_arg($args, "--password=");
- if ( -e $srv->{'sockfile'} )
- {
- mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'});
- }
- if ( $srv->{'port'} )
- {
- mtr_add_arg($args, "--port=%s", $srv->{'port'});
- }
- mtr_add_arg($args, "--connect_timeout=5");
- mtr_add_arg($args, "--shutdown_timeout=20");
- mtr_add_arg($args, "--protocol=tcp"); # FIXME new thing, will it help?!
- mtr_add_arg($args, "shutdown");
- # We don't wait for termination of mysqladmin
- mtr_spawn($::exe_mysqladmin, $args,
- "", $::path_manager_log, $::path_manager_log, "");
+ $mysqld_pids{$srv->{'pid'}}= 1;
}
- }
-
- # Wait for them all to remove their pid and socket file
-
- PIDSOCKFILEREMOVED:
- for (my $loop= $::opt_sleep_time_for_delete; $loop; $loop--)
- {
- my $pidsockfiles_left= 0;
- foreach my $srv ( @$spec )
+ else
{
- if ( -e $srv->{'sockfile'} or -f $srv->{'pidfile'} )
+ # Race, could have been removed between I tested with -f
+ # and the unlink() below, so I better check again with -f
+
+ if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
+ -f $srv->{'pidfile'} )
{
- $pidsockfiles_left++; # Could be that pidfile is left
+ mtr_error("can't remove $srv->{'pidfile'}");
}
}
- if ( ! $pidsockfiles_left )
- {
- last PIDSOCKFILEREMOVED;
- }
- if ( $loop % 20 == 1 )
- {
- mtr_warning("Still processes alive after 10 seconds, retrying for $loop seconds...");
- }
- mtr_debug("Sleep for 1 second waiting for pid and socket file removal");
- sleep(1); # One second
}
# ----------------------------------------------------------------------
- # If no known PIDs, we have nothing more to try
+ # If the processes where started from this script, and we had no PIDS
+ # then we don't have to do anything.
# ----------------------------------------------------------------------
- if ( ! $any_pid )
+ if ( ! keys %mysqld_pids )
{
- stop_reap_all();
+ # cluck "This is how we got here!";
return;
}
# ----------------------------------------------------------------------
- # We may have killed all that left a socket, but we are not sure we got
- # them all killed. If we suspect it lives, try nice kill with SIG_TERM.
- # Note that for true Win32 processes, kill(0,$pid) will not return 1.
+ # In mtr_mysqladmin_shutdown() we only waited for the mysqld servers
+ # not to listen to the port. But we are not sure we got them all
+ # killed. If we suspect it lives, try nice kill with SIG_TERM. Note
+ # that for true Win32 processes, kill(0,$pid) will not return 1.
# ----------------------------------------------------------------------
SIGNAL:
foreach my $sig (15,9)
{
- my $process_left= 0;
- foreach my $srv ( @$spec )
+ my $retries= 10; # 10 seconds
+ kill($sig, keys %mysqld_pids);
+ while ( $retries-- and kill(0, keys %mysqld_pids) )
{
- if ( $srv->{'pid'} and
- ( -f $srv->{'pidfile'} or kill(0,$srv->{'pid'}) ) )
- {
- $process_left++;
- mtr_warning("process $srv->{'pid'} not cooperating, " .
- "will send signal $sig to process");
- kill($sig,$srv->{'pid'}); # SIG_TERM
- }
- if ( ! $process_left )
- {
- last SIGNAL;
- }
+ mtr_debug("Sleep 1 second waiting for processes to die");
+ sleep(1) # Wait one second
}
- mtr_debug("Sleep for 5 seconds waiting for processes to die");
- sleep(5); # We wait longer than usual
}
# ----------------------------------------------------------------------
@@ -437,8 +526,8 @@ sub mtr_stop_mysqld_servers ($$) {
foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
{
- unlink($file);
- if ( -e $file )
+ # Know it is dead so should be no race, careful anyway
+ if ( -f $file and ! unlink($file) and -f $file )
{
$errors++;
mtr_warning("couldn't delete $file");
@@ -454,9 +543,147 @@ sub mtr_stop_mysqld_servers ($$) {
}
}
- stop_reap_all();
+ # FIXME We just assume they are all dead, for Cygwin we are not
+ # really sure
+
+}
+
+
+##############################################################################
+#
+# Shut down mysqld servers using "mysqladmin ... shutdown".
+# To speed this up, we start them in parallel and use waitpid() to
+# catch their termination. Note that this doesn't say the servers
+# are terminated, just that 'mysqladmin' is terminated.
+#
+# Note that mysqladmin will ask the server about what PID file it uses,
+# and mysqladmin will wait for it to be removed before it terminates
+# (unless passes timeout).
+#
+# This function will take at most about 20 seconds, and we still are not
+# sure we killed them all. If none is responding to ping, we return 1,
+# else we return 0.
+#
+##############################################################################
+
+sub mtr_mysqladmin_shutdown () {
+ my $spec= shift;
+
+ my @mysql_admin_pids;
+ my @to_kill_specs;
+
+ foreach my $srv ( @$spec )
+ {
+ if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+ {
+ push(@to_kill_specs, $srv);
+ }
+ }
+
+
+ foreach my $srv ( @to_kill_specs )
+ {
+ # FIXME wrong log.....
+ # FIXME, stderr.....
+ # Shutdown time must be high as slave may be in reconnect
+ my $args;
+
+ mtr_init_args(\$args);
+
+ mtr_add_arg($args, "--no-defaults");
+ mtr_add_arg($args, "--user=%s", $::opt_user);
+ mtr_add_arg($args, "--password=");
+ if ( -e $srv->{'sockfile'} )
+ {
+ mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'});
+ }
+ if ( $srv->{'port'} )
+ {
+ mtr_add_arg($args, "--port=%s", $srv->{'port'});
+ }
+ if ( $srv->{'port'} and ! -e $srv->{'sockfile'} )
+ {
+ mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
+ }
+ mtr_add_arg($args, "--connect_timeout=5");
+ mtr_add_arg($args, "--shutdown_timeout=20");
+ mtr_add_arg($args, "shutdown");
+ # We don't wait for termination of mysqladmin
+ my $pid= mtr_spawn($::exe_mysqladmin, $args,
+ "", $::path_manager_log, $::path_manager_log, "");
+ push(@mysql_admin_pids, $pid);
+ }
+
+ # We wait blocking, we wait for the last one anyway
+ foreach my $pid (@mysql_admin_pids)
+ {
+ waitpid($pid,0); # FIXME no need to check -1 or 0?
+ }
- # FIXME We just assume they are all dead, we don't know....
+ # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
+ # terminate now, but we don't (FIXME should be debugged).
+ # So we try again to ping and at least wait the same amount of time
+ # mysqladmin would for all to die.
+
+ my $timeout= 20; # 20 seconds max
+ my $res= 1; # If we just fall through, we are done
+
+ TIME:
+ while ( $timeout-- )
+ {
+ foreach my $srv ( @to_kill_specs )
+ {
+ $res= 1; # We are optimistic
+ if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+ {
+ mtr_debug("Sleep 1 second waiting for processes to stop using port");
+ sleep(1); # One second
+ $res= 0;
+ next TIME;
+ }
+ }
+ last; # If we got here, we are done
+ }
+
+ return $res;
+}
+
+##############################################################################
+#
+# The operating system will keep information about dead children,
+# we read this information here, and if we have records the process
+# is alive, we mark it as dead.
+#
+##############################################################################
+
+sub mtr_record_dead_children () {
+
+ my $ret_pid;
+
+ # FIXME the man page says to wait for -1 to terminate,
+ # but on OS X we get '0' all the time...
+ while ( ($ret_pid= waitpid(-1,&WNOHANG)) > 0 )
+ {
+ mtr_debug("waitpid() catched exit of child $ret_pid");
+ foreach my $idx (0..1)
+ {
+ if ( $::master->[$idx]->{'pid'} eq $ret_pid )
+ {
+ mtr_debug("child $ret_pid was master[$idx]");
+ $::master->[$idx]->{'pid'}= 0;
+ }
+ }
+
+ foreach my $idx (0..2)
+ {
+ if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
+ {
+ mtr_debug("child $ret_pid was slave[$idx]");
+ $::slave->[$idx]->{'pid'}= 0;
+ last;
+ }
+ }
+ }
}
sub start_reap_all {
@@ -467,6 +694,32 @@ sub stop_reap_all {
$SIG{CHLD}= 'DEFAULT';
}
+sub mtr_ping_mysqld_server () {
+ my $port= shift;
+
+ my $remote= "localhost";
+ my $iaddr= inet_aton($remote);
+ if ( ! $iaddr )
+ {
+ mtr_error("can't find IP number for $remote");
+ }
+ my $paddr= sockaddr_in($port, $iaddr);
+ my $proto= getprotobyname('tcp');
+ if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) )
+ {
+ mtr_error("can't create socket: $!");
+ }
+ if ( connect(SOCK, $paddr) )
+ {
+ close(SOCK); # FIXME check error?
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
##############################################################################
#
# Wait for a file to be created
@@ -474,33 +727,38 @@ sub stop_reap_all {
##############################################################################
-sub sleep_until_file_created ($$) {
+sub sleep_until_file_created ($$$) {
my $pidfile= shift;
my $timeout= shift;
+ my $pid= shift;
- my $loop= $timeout;
- while ( $loop-- )
+ for ( my $loop= 1; $loop <= $timeout; $loop++ )
{
if ( -r $pidfile )
{
- return;
+ return 1;
}
- mtr_debug("Sleep for 1 second waiting for creation of $pidfile");
- if ( $loop % 20 == 1 )
+ # Check if it died after the fork() was successful
+ if ( waitpid($pid,&WNOHANG) == $pid )
{
- mtr_warning("Waiting for $pidfile to be created, still trying for $loop seconds...");
+ return 0;
+ }
+
+ mtr_debug("Sleep 1 second waiting for creation of $pidfile");
+
+ if ( $loop % 60 == 0 )
+ {
+ my $left= $timeout - $loop;
+ mtr_warning("Waited $loop seconds for $pidfile to be created, " .
+ "still waiting for $left seconds...");
}
sleep(1);
}
- if ( ! -r $pidfile )
- {
- mtr_error("No $pidfile was created");
- }
+ return 0;
}
-
1;