Backport from 5.1

-Add support for detecting version and features from mysqld binary - Autodetect netware - Disable some features not available below 5.0 - Cleanup executable_setup to look for one executable at a time, only llok for the ones that are needed based on the selected testcases and settings mysql-test/lib/mtr_cases.pl: Backport from 5.1 mysql-test/lib/mtr_io.pl: Backport from 5.1 Add new function mtr_appendfile_to_file mysql-test/lib/mtr_misc.pl: Backport from 5.1 mysql-test/lib/mtr_process.pl: Backport from 5.1 mysql-test/lib/mtr_report.pl: Backport from 5.1 mysql-test/mysql-test-run.pl: Add support for detecting version and features from mysqld binary Autodetect netware Disable some features not available below 5.0 Cleanup executable_setup to look for one executable at a time, only llok for the ones that are needed based on the selected testcases and settings mysql-test/r/mysqltest.result: Update result mysql-test/lib/mtr_im.pl: New BitKeeper file ``mysql-test/lib/mtr_im.pl'' mysql-test/lib/mtr_stress.pl: New BitKeeper file ``mysql-test/lib/mtr_stress.pl''
author: unknown <msvensson@shellback.(none)> 2006-10-04 12:47:32 +0200
committer: unknown <msvensson@shellback.(none)> 2006-10-04 12:47:32 +0200
commit: dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1 (patch)
tree: ae15dc31f5918c502b4313aa7d8cd7f32ce8cca3 /mysql-test/lib/mtr_process.pl
parent: 99cfe38ae7d6da2dca8a3b377254d0bfba8b6ac4 (diff)
download: mariadb-git-dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1.tar.gz
1 files changed, 452 insertions, 223 deletions
diff --git a/mysql-test/lib/mtr_process.pl b/mysql-test/lib/mtr_process.pl
index 662b70a4fee..5e21248790e 100644
--- a/mysql-test/lib/mtr_process.pl
+++ b/mysql-test/lib/mtr_process.pl
@@ -4,7 +4,6 @@
 # and is part of the translation of the Bourne shell script with the
 # same name.
 
-#use Carp qw(cluck);
 use Socket;
 use Errno;
 use strict;
@@ -14,12 +13,17 @@ use POSIX 'WNOHANG';
 
 sub mtr_run ($$$$$$;$);
 sub mtr_spawn ($$$$$$;$);
-sub mtr_stop_mysqld_servers ($);
+sub mtr_check_stop_servers ($);
 sub mtr_kill_leftovers ();
+sub mtr_wait_blocking ($);
 sub mtr_record_dead_children ();
+sub mtr_ndbmgm_start($$);
+sub mtr_mysqladmin_start($$$);
 sub mtr_exit ($);
 sub sleep_until_file_created ($$$);
 sub mtr_kill_processes ($);
+sub mtr_ping_with_timeout($);
+sub mtr_ping_port ($);
 
 # static in C
 sub spawn_impl ($$$$$$$$);
@@ -31,7 +35,6 @@ sub spawn_impl ($$$$$$$$);
 ##############################################################################
 
 # This function try to mimic the C version used in "netware/mysql_test_run.c"
-# FIXME learn it to handle append mode as well, a "new" flag or a "append"
 
 sub mtr_run ($$$$$$;$) {
   my $path=       shift;
@@ -112,6 +115,9 @@ sub spawn_impl ($$$$$$$$) {
     print STDERR "#### ", "-" x 78, "\n";
   }
 
+  mtr_error("Can't spawn with empty \"path\"") unless defined $path;
+
+
  FORK:
   {
     my $pid= fork();
@@ -144,17 +150,6 @@ sub spawn_impl ($$$$$$$$) {
 
       $SIG{INT}= 'DEFAULT';         # Parent do some stuff, we don't
 
-      if ( $::glob_cygwin_shell and $mode eq 'test' )
-      {
-        # Programs started from mysqltest under Cygwin, are to
-        # execute them within Cygwin. Else simple things in test
-        # files like
-        # --system "echo 1 > file"
-        # will fail.
-        # FIXME not working :-(
-#       $ENV{'COMSPEC'}= "$::glob_cygwin_shell -c";
-      }
-
       my $log_file_open_mode = '>';
 
       if ($spawn_opts and $spawn_opts->{'append_log_file'})
@@ -164,7 +159,15 @@ sub spawn_impl ($$$$$$$$) {
 
       if ( $output )
       {
-        if ( ! open(STDOUT,$log_file_open_mode,$output) )
+	if ( $::glob_win32_perl )
+	{
+	  # Don't redirect stdout on ActiveState perl since this is
+          # just another thread in the same process.
+          # Should be fixed so that the thread that is created with fork
+          # executes the exe in another process and wait's for it to return.
+          # In the meanwhile, we get all the output from mysqld's to screen
+	}
+        elsif ( ! open(STDOUT,$log_file_open_mode,$output) )
         {
           mtr_child_error("can't redirect STDOUT to \"$output\": $!");
         }
@@ -216,8 +219,7 @@ sub spawn_parent_impl {
     {
       # Simple run of command, we wait for it to return
       my $ret_pid= waitpid($pid,0);
-
-      if ( $ret_pid <= 0 )
+      if ( $ret_pid != $pid )
       {
         mtr_error("$path ($pid) got lost somehow");
       }
@@ -245,7 +247,6 @@ sub spawn_parent_impl {
         # Someone terminated, don't know who. Collect
         # status info first before $? is lost,
         # but not $exit_value, this is flagged from
-        # 
 
         my $timer_name= mtr_timer_timeout($::glob_timers, $ret_pid);
         if ( $timer_name )
@@ -272,45 +273,22 @@ sub spawn_parent_impl {
           last;
         }
 
-        # If one of the mysqld processes died, we want to
-        # mark this, and kill the mysqltest process.
-
-        foreach my $idx (0..1)
-        {
-          if ( $::master->[$idx]->{'pid'} eq $ret_pid )
-          {
-            mtr_debug("child $ret_pid was master[$idx], " .
-                      "exit during mysqltest run");
-            $::master->[$idx]->{'pid'}= 0;
-            last;
-          }
-        }
-
-        foreach my $idx (0..2)
-        {
-          if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
-          {
-            mtr_debug("child $ret_pid was slave[$idx], " .
-                      "exit during mysqltest run");
-            $::slave->[$idx]->{'pid'}= 0;
-            last;
-          }
-        }
+        # One of the child processes died, unless this was expected
+	# mysqltest should be killed and test aborted
 
-        mtr_debug("waitpid() catched exit of unknown child $ret_pid, " .
-                  "exit during mysqltest run");
+	check_expected_crash_and_restart($ret_pid);
       }
 
       if ( $ret_pid != $pid )
       {
         # We terminated the waiting because a "mysqld" process died.
         # Kill the mysqltest process.
-
+	mtr_verbose("Kill mysqltest because another process died");
         kill(9,$pid);
 
         $ret_pid= waitpid($pid,0);
 
-        if ( $ret_pid == -1 )
+        if ( $ret_pid != $pid )
         {
           mtr_error("$path ($pid) got lost somehow");
         }
@@ -351,39 +329,101 @@ sub mtr_process_exit_status {
 #
 ##############################################################################
 
-# We just "ping" on the ports, and if we can't do a socket connect
-# we assume the server is dead. So we don't *really* know a server
-# is dead, we just hope that it after letting the listen port go,
-# it is dead enough for us to start a new server.
 
+# Kill all processes(mysqld, ndbd, ndb_mgmd and im) that would conflict with
+# this run
+# Make sure to remove the PID file, if any.
+# kill IM manager first, else it will restart the servers
 sub mtr_kill_leftovers () {
 
-  # First, kill all masters and slaves that would conflict with
-  # this run. Make sure to remove the PID file, if any.
+  mtr_report("Killing Possible Leftover Processes");
+  mtr_debug("mtr_kill_leftovers(): started.");
 
-  my @args;
+  mkpath("$::opt_vardir/log"); # Needed for mysqladmin log
 
-  for ( my $idx; $idx < 2; $idx++ )
+  # Stop or kill Instance Manager and all its children. If we failed to do
+  # that, we can only abort -- there is nothing left to do.
+
+#  mtr_error("Failed to stop Instance Manager.")
+#    unless mtr_im_stop($::instance_manager);
+
+  # Start shutdown of masters and slaves. Don't touch IM-managed mysqld
+  # instances -- they should be stopped by mtr_im_stop().
+
+  mtr_debug("Shutting down mysqld-instances...");
+
+  my @kill_pids;
+  my %admin_pids;
+
+  foreach my $srv (@{$::master}, @{$::slave})
   {
-    push(@args,{
-                pid      => 0,          # We don't know the PID
-                pidfile  => $::master->[$idx]->{'path_mypid'},
-                sockfile => $::master->[$idx]->{'path_mysock'},
-                port     => $::master->[$idx]->{'path_myport'},
-               });
+    mtr_debug("  - mysqld " .
+              "(pid: $srv->{pid}; " .
+              "pid file: '$srv->{path_pid}'; " .
+              "socket: '$srv->{path_sock}'; ".
+              "port: $srv->{port})");
+
+    my $pid= mtr_mysqladmin_start($srv, "shutdown", 70);
+
+    # Save the pid of the mysqladmin process
+    $admin_pids{$pid}= 1;
+
+    push(@kill_pids,{
+		     pid      => $srv->{'pid'},
+		     pidfile  => $srv->{'path_pid'},
+		     sockfile => $srv->{'path_sock'},
+		     port     => $srv->{'port'},
+		    });
+    $srv->{'pid'}= 0; # Assume we are done with it
   }
 
-  for ( my $idx; $idx < 3; $idx++ )
+  if ( ! $::opt_skip_ndbcluster )
   {
-    push(@args,{
-                pid       => 0,         # We don't know the PID
-                pidfile   => $::slave->[$idx]->{'path_mypid'},
-                sockfile  => $::slave->[$idx]->{'path_mysock'},
-                port      => $::slave->[$idx]->{'path_myport'},
-               });
+    # Start shutdown of clusters.
+    mtr_debug("Shutting down cluster...");
+
+    foreach my $cluster (@{$::clusters})
+    {
+      mtr_debug("  - cluster " .
+		"(pid: $cluster->{pid}; " .
+		"pid file: '$cluster->{path_pid})");
+
+      my $pid= mtr_ndbmgm_start($cluster, "shutdown");
+
+      # Save the pid of the ndb_mgm process
+      $admin_pids{$pid}= 1;
+
+      push(@kill_pids,{
+		       pid      => $cluster->{'pid'},
+		       pidfile  => $cluster->{'path_pid'}
+		      });
+
+      $cluster->{'pid'}= 0; # Assume we are done with it
+
+      foreach my $ndbd (@{$cluster->{'ndbds'}})
+      {
+	mtr_debug("    - ndbd " .
+		  "(pid: $ndbd->{pid}; " .
+		  "pid file: '$ndbd->{path_pid})");
+
+	push(@kill_pids,{
+			 pid      => $ndbd->{'pid'},
+			 pidfile  => $ndbd->{'path_pid'},
+			});
+	$ndbd->{'pid'}= 0; # Assume we are done with it
+      }
+    }
   }
 
-  mtr_mysqladmin_shutdown(\@args, 20);
+  # Wait for all the admin processes to complete
+  mtr_wait_blocking(\%admin_pids);
+
+  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
+  # terminate now, but we don't (FIXME should be debugged).
+  # So we try again to ping and at least wait the same amount of time
+  # mysqladmin would for all to die.
+
+  mtr_ping_with_timeout(\@kill_pids);
 
   # We now have tried to terminate nice. We have waited for the listen
   # port to be free, but can't really tell if the mysqld process died
@@ -401,6 +441,8 @@ sub mtr_kill_leftovers () {
   # FIXME $path_run_dir or something
   my $rundir= "$::opt_vardir/run";
 
+  mtr_debug("Processing PID files in directory '$rundir'...");
+
   if ( -d $rundir )
   {
     opendir(RUNDIR, $rundir)
@@ -414,26 +456,32 @@ sub mtr_kill_leftovers () {
 
       if ( -f $pidfile )
       {
-        my $pid= mtr_get_pid_from_file($pidfile);
+        mtr_debug("Processing PID file: '$pidfile'...");
 
-        # Race, could have been removed between I tested with -f
-        # and the unlink() below, so I better check again with -f
+        my $pid= mtr_get_pid_from_file($pidfile);
 
-        if ( ! unlink($pidfile) and -f $pidfile )
-        {
-          mtr_error("can't remove $pidfile");
-        }
+        mtr_debug("Got pid: $pid from file '$pidfile'");
 
         if ( $::glob_cygwin_perl or kill(0, $pid) )
         {
+          mtr_debug("There is process with pid $pid -- scheduling for kill.");
           push(@pids, $pid);            # We know (cygwin guess) it exists
         }
+        else
+        {
+          mtr_debug("There is no process with pid $pid -- skipping.");
+        }
       }
     }
     closedir(RUNDIR);
 
     if ( @pids )
     {
+      mtr_debug("Killing the following processes with PID files: " .
+                join(' ', @pids) . "...");
+
+      start_reap_all();
+
       if ( $::glob_cygwin_perl )
       {
         # We have no (easy) way of knowing the Cygwin controlling
@@ -447,8 +495,9 @@ sub mtr_kill_leftovers () {
         my $retries= 10;                    # 10 seconds
         do
         {
+          mtr_debug("Sending SIGKILL to pids: " . join(' ', @pids));
           kill(9, @pids);
-          mtr_debug("Sleep 1 second waiting for processes to die");
+          mtr_report("Sleep 1 second waiting for processes to die");
           sleep(1)                      # Wait one second
         } while ( $retries-- and  kill(0, @pids) );
 
@@ -457,56 +506,74 @@ sub mtr_kill_leftovers () {
           mtr_warning("can't kill process(es) " . join(" ", @pids));
         }
       }
+
+      stop_reap_all();
     }
   }
+  else
+  {
+    mtr_debug("Directory for PID files ($rundir) does not exist.");
+  }
 
-  # We may have failed everything, bug we now check again if we have
+  # We may have failed everything, but we now check again if we have
   # the listen ports free to use, and if they are free, just go for it.
 
-  foreach my $srv ( @args )
+  mtr_debug("Checking known mysqld servers...");
+
+  foreach my $srv ( @kill_pids )
   {
-    if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+    if ( defined $srv->{'port'} and mtr_ping_port($srv->{'port'}) )
     {
-      mtr_warning("can't kill old mysqld holding port $srv->{'port'}");
+      mtr_warning("can't kill old process holding port $srv->{'port'}");
     }
   }
-}
 
-##############################################################################
-#
-#  Shut down mysqld servers we have started from this run of this script
-#
-##############################################################################
+  mtr_debug("mtr_kill_leftovers(): finished.");
+}
 
-# To speed things we kill servers in parallel. The argument is a list
-# of 'ports', 'pids', 'pidfiles' and 'socketfiles'.
 
+# Check that all processes in list are killed
+# The argument is a list of 'ports', 'pids', 'pidfiles' and 'socketfiles'
+# for which shutdown has been started. Make sure they all get killed
+# in one way or the other.
+#
 # FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and
-# $srv->{'pidfile'} will not be the same PID. We need to try to kill
+# the pid in $srv->{'pidfile'} will not be the same PID. We need to try to kill
 # both I think.
 
-sub mtr_stop_mysqld_servers ($) {
+sub mtr_check_stop_servers ($) {
   my $spec=  shift;
 
-  # ----------------------------------------------------------------------
-  # First try nice normal shutdown using 'mysqladmin'
-  # ----------------------------------------------------------------------
+  # Return if no processes are defined
+  return if ! @$spec;
 
-  # Shutdown time must be high as slave may be in reconnect
-  mtr_mysqladmin_shutdown($spec, 70);
+  #mtr_report("mtr_check_stop_servers");
+
+  mtr_ping_with_timeout(\@$spec);
 
   # ----------------------------------------------------------------------
   # We loop with waitpid() nonblocking to see how many of the ones we
-  # are to kill, actually got killed by mtr_mysqladmin_shutdown().
-  # Note that we don't rely on this, the mysqld server might have stop
+  # are to kill, actually got killed by mysqladmin or ndb_mgm
+  #
+  # Note that we don't rely on this, the mysqld server might have stopped
   # listening to the port, but still be alive. But it is a start.
   # ----------------------------------------------------------------------
 
   foreach my $srv ( @$spec )
   {
-    if ( $srv->{'pid'} and (waitpid($srv->{'pid'},&WNOHANG) == $srv->{'pid'}) )
+    my $ret_pid;
+    if ( $srv->{'pid'} )
     {
-      $srv->{'pid'}= 0;
+      $ret_pid= waitpid($srv->{'pid'},&WNOHANG);
+      if ($ret_pid == $srv->{'pid'})
+      {
+	mtr_verbose("Caught exit of process $ret_pid");
+	$srv->{'pid'}= 0;
+      }
+      else
+      {
+	# mtr_warning("caught exit of unknown child $ret_pid");
+      }
     }
   }
 
@@ -540,13 +607,12 @@ sub mtr_stop_mysqld_servers ($) {
   }
 
   # ----------------------------------------------------------------------
-  # If the processes where started from this script, and we had no PIDS
+  # If all the processes in list already have been killed,
   # then we don't have to do anything.
   # ----------------------------------------------------------------------
 
   if ( ! keys %mysqld_pids )
   {
-    # cluck "This is how we got here!";
     return;
   }
 
@@ -595,139 +661,288 @@ sub mtr_stop_mysqld_servers ($) {
           foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
           {
             # Know it is dead so should be no race, careful anyway
-            if ( -f $file and ! unlink($file) and -f $file )
+            if ( defined $file and -f $file and ! unlink($file) and -f $file )
             {
               $errors++;
               mtr_warning("couldn't delete $file");
             }
           }
+	  $srv->{'pid'}= 0;
         }
       }
     }
     if ( $errors )
     {
-      # We are in trouble, just die....
-      mtr_error("we could not kill or clean up all processes");
+      # There where errors killing processes
+      # do one last attempt to ping the servers
+      # and if they can't be pinged, assume they are dead
+      if ( ! mtr_ping_with_timeout( \@$spec ) )
+      {
+	mtr_error("we could not kill or clean up all processes");
+      }
+      else
+      {
+	mtr_verbose("All ports were free, continuing");
+      }
     }
   }
 
   # FIXME We just assume they are all dead, for Cygwin we are not
   # really sure
-    
+
 }
 
+# Wait for all the process in the list to terminate
+sub mtr_wait_blocking($) {
+  my $admin_pids= shift;
 
-##############################################################################
-#
-#  Shut down mysqld servers using "mysqladmin ... shutdown".
-#  To speed this up, we start them in parallel and use waitpid() to
-#  catch their termination. Note that this doesn't say the servers
-#  are terminated, just that 'mysqladmin' is terminated.
-#
-#  Note that mysqladmin will ask the server about what PID file it uses,
-#  and mysqladmin will wait for it to be removed before it terminates
-#  (unless passes timeout).
-#
-#  This function will take at most about 20 seconds, and we still are not
-#  sure we killed them all. If none is responding to ping, we return 1,
-#  else we return 0.
-#
-##############################################################################
 
-sub mtr_mysqladmin_shutdown {
-  my $spec= shift;
+  # Return if no processes defined
+  return if ! %$admin_pids;
+
+  mtr_verbose("mtr_wait_blocking");
+
+  # Wait for all the started processes to exit
+  # As mysqladmin is such a simple program, we trust it to terminate itself.
+  # I.e. we wait blocking, and wait for them all before we go on.
+  foreach my $pid (keys %{$admin_pids})
+  {
+    my $ret_pid= waitpid($pid,0);
+
+  }
+}
+
+# Start "mysqladmin shutdown" for a specific mysqld
+sub mtr_mysqladmin_start($$$) {
+  my $srv= shift;
+  my $command= shift;
   my $adm_shutdown_tmo= shift;
 
-  my %mysql_admin_pids;
-  my @to_kill_specs;
+  my $args;
+  mtr_init_args(\$args);
 
-  foreach my $srv ( @$spec )
+  mtr_add_arg($args, "--no-defaults");
+  mtr_add_arg($args, "--user=%s", $::opt_user);
+  mtr_add_arg($args, "--password=");
+  mtr_add_arg($args, "--silent");
+  if ( -e $srv->{'path_sock'} )
+  {
+    mtr_add_arg($args, "--socket=%s", $srv->{'path_sock'});
+  }
+  if ( $srv->{'port'} )
   {
-    if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+    mtr_add_arg($args, "--port=%s", $srv->{'port'});
+  }
+  if ( $srv->{'port'} and ! -e $srv->{'path_sock'} )
+  {
+    mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
+  }
+  mtr_add_arg($args, "--connect_timeout=5");
+
+  # Shutdown time must be high as slave may be in reconnect
+  mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
+  mtr_add_arg($args, "$command");
+  my $path_mysqladmin_log= "$::opt_vardir/log/mysqladmin.log";
+  my $pid= mtr_spawn($::exe_mysqladmin, $args,
+		     "", $path_mysqladmin_log, $path_mysqladmin_log, "",
+		     { append_log_file => 1 });
+  mtr_verbose("mtr_mysqladmin_start, pid: $pid");
+  return $pid;
+
+}
+
+# Start "ndb_mgm shutdown" for a specific cluster, it will
+# shutdown all data nodes and leave the ndb_mgmd running
+sub mtr_ndbmgm_start($$) {
+  my $cluster= shift;
+  my $command= shift;
+
+  my $args;
+
+  mtr_init_args(\$args);
+
+  mtr_add_arg($args, "--no-defaults");
+  mtr_add_arg($args, "--core");
+  mtr_add_arg($args, "--try-reconnect=1");
+  mtr_add_arg($args, "--ndb_connectstring=%s", $cluster->{'connect_string'});
+  mtr_add_arg($args, "-e");
+  mtr_add_arg($args, "$command");
+
+  my $pid= mtr_spawn($::exe_ndb_mgm, $args,
+		     "", "/dev/null", "/dev/null", "",
+		     {});
+  mtr_verbose("mtr_ndbmgm_start, pid: $pid");
+  return $pid;
+
+}
+
+
+# Ping all servers in list, exit when none of them answers
+# or when timeout has passed
+sub mtr_ping_with_timeout($) {
+  my $spec= shift;
+  my $timeout= 200;                     # 20 seconds max
+  my $res= 1;                           # If we just fall through, we are done
+                                        # in the sense that the servers don't
+                                        # listen to their ports any longer
+
+  mtr_debug("Waiting for mysqld servers to stop...");
+
+ TIME:
+  while ( $timeout-- )
+  {
+    foreach my $srv ( @$spec )
     {
-      push(@to_kill_specs, $srv);
+      $res= 1;                          # We are optimistic
+      if ( $srv->{'pid'} and defined $srv->{'port'} )
+      {
+	if ( mtr_ping_port($srv->{'port'}) )
+	{
+	  mtr_verbose("waiting for process $srv->{'pid'} to stop ".
+		      "using port $srv->{'port'}");
+
+	  # Millisceond sleep emulated with select
+	  select(undef, undef, undef, (0.1));
+	  $res= 0;
+	  next TIME;
+	}
+	else
+	{
+	  # Process was not using port
+	}
+      }
     }
+    last;                               # If we got here, we are done
   }
 
-
-  foreach my $srv ( @to_kill_specs )
+  if ($res)
+  {
+    mtr_debug("mtr_ping_with_timeout(): All mysqld instances are down.");
+  }
+  else
   {
-    # FIXME wrong log.....
-    # FIXME, stderr.....
-    # Shutdown time must be high as slave may be in reconnect
-    my $args;
+    mtr_report("mtr_ping_with_timeout(): At least one server is alive.");
+  }
 
-    mtr_init_args(\$args);
+  return $res;
+}
+
+
+#
+# Loop through our list of processes and look for and entry
+# with the provided pid
+# Set the pid of that process to 0 if found
+#
+sub mark_process_dead($)
+{
+  my $ret_pid= shift;
 
-    mtr_add_arg($args, "--no-defaults");
-    mtr_add_arg($args, "--user=%s", $::opt_user);
-    mtr_add_arg($args, "--password=");
-    if ( -e $srv->{'sockfile'} )
+  foreach my $mysqld (@{$::master}, @{$::slave})
+  {
+    if ( $mysqld->{'pid'} eq $ret_pid )
     {
-      mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'});
+      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
+      $mysqld->{'pid'}= 0;
+      return;
     }
-    if ( $srv->{'port'} )
+  }
+
+  foreach my $cluster (@{$::clusters})
+  {
+    if ( $cluster->{'pid'} eq $ret_pid )
     {
-      mtr_add_arg($args, "--port=%s", $srv->{'port'});
+      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
+      $cluster->{'pid'}= 0;
+      return;
     }
-    if ( $srv->{'port'} and ! -e $srv->{'sockfile'} )
+
+    foreach my $ndbd (@{$cluster->{'ndbds'}})
     {
-      mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket
+      if ( $ndbd->{'pid'} eq $ret_pid )
+      {
+	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
+	$ndbd->{'pid'}= 0;
+	return;
+      }
     }
-    mtr_add_arg($args, "--connect_timeout=5");
-    mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo");
-    mtr_add_arg($args, "shutdown");
-    # We don't wait for termination of mysqladmin
-    my $pid= mtr_spawn($::exe_mysqladmin, $args,
-                       "", $::path_manager_log, $::path_manager_log, "",
-                       { append_log_file => 1 });
-    $mysql_admin_pids{$pid}= 1;
   }
+  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");
+
+}
+
+#
+# Loop through our list of processes and look for and entry
+# with the provided pid, if found check for the file indicating
+# expected crash and restart it.
+#
+sub check_expected_crash_and_restart($)
+{
+  my $ret_pid= shift;
 
-  # As mysqladmin is such a simple program, we trust it to terminate.
-  # I.e. we wait blocking, and wait wait for them all before we go on.
-  while (keys %mysql_admin_pids)
+  foreach my $mysqld (@{$::master}, @{$::slave})
   {
-    foreach my $pid (keys %mysql_admin_pids)
+    if ( $mysqld->{'pid'} eq $ret_pid )
     {
-      if ( waitpid($pid,0) > 0 )
+      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
+      $mysqld->{'pid'}= 0;
+
+      # Check if crash expected and restart if it was
+      my $expect_file= "$::opt_vardir/tmp/" . "$mysqld->{'type'}" .
+	"$mysqld->{'idx'}" . ".expect";
+      if ( -f $expect_file )
       {
-        delete $mysql_admin_pids{$pid};
+	mtr_verbose("Crash was expected, file $expect_file exists");
+	mysqld_start($mysqld, $mysqld->{'start_opts'},
+		     $mysqld->{'start_slave_master_info'});
+	unlink($expect_file);
       }
+
+      return;
     }
   }
 
-  # If we trusted "mysqladmin --shutdown_timeout= ..." we could just
-  # terminate now, but we don't (FIXME should be debugged).
-  # So we try again to ping and at least wait the same amount of time
-  # mysqladmin would for all to die.
-
-  my $timeout= 20;                      # 20 seconds max
-  my $res= 1;                           # If we just fall through, we are done
-                                        # in the sense that the servers don't
-                                        # listen to their ports any longer
- TIME:
-  while ( $timeout-- )
+  foreach my $cluster (@{$::clusters})
   {
-    foreach my $srv ( @to_kill_specs )
+    if ( $cluster->{'pid'} eq $ret_pid )
     {
-      $res= 1;                          # We are optimistic
-      if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) )
+      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
+      $cluster->{'pid'}= 0;
+
+      # Check if crash expected and restart if it was
+      my $expect_file= "$::opt_vardir/tmp/ndb_mgmd_" . "$cluster->{'type'}" .
+	".expect";
+      if ( -f $expect_file )
       {
-        mtr_debug("Sleep 1 second waiting for processes to stop using port");
-        sleep(1);                       # One second
-        $res= 0;
-        next TIME;
+	mtr_verbose("Crash was expected, file $expect_file exists");
+	ndbmgmd_start($cluster);
+	unlink($expect_file);
       }
+      return;
     }
-    last;                               # If we got here, we are done
-  }
-
-  $timeout or mtr_debug("At least one server is still listening to its port");
 
-  sleep(5) if $::glob_win32;            # FIXME next startup fails if no sleep
+    foreach my $ndbd (@{$cluster->{'ndbds'}})
+    {
+      if ( $ndbd->{'pid'} eq $ret_pid )
+      {
+	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
+	$ndbd->{'pid'}= 0;
+
+	# Check if crash expected and restart if it was
+	my $expect_file= "$::opt_vardir/tmp/ndbd_" . "$cluster->{'type'}" .
+	  "$ndbd->{'idx'}" . ".expect";
+	if ( -f $expect_file )
+	{
+	  mtr_verbose("Crash was expected, file $expect_file exists");
+	  ndbd_start($cluster, $ndbd->{'idx'},
+		     $ndbd->{'start_extra_args'});
+	  unlink($expect_file);
+	}
+	return;
+      }
+    }
+  }
+  mtr_warning("check_expected_crash_and_restart couldn't find an entry for pid: $ret_pid");
 
-  return $res;
 }
 
 ##############################################################################
@@ -740,32 +955,18 @@ sub mtr_mysqladmin_shutdown {
 
 sub mtr_record_dead_children () {
 
+  my $process_died= 0;
   my $ret_pid;
 
-  # FIXME the man page says to wait for -1 to terminate,
-  # but on OS X we get '0' all the time...
-  while ( ($ret_pid= waitpid(-1,&WNOHANG)) > 0 )
+  # Wait without blockinng to see if any processes had died
+  # -1 or 0 means there are no more procesess to wait for
+  while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
   {
-    mtr_debug("waitpid() catched exit of child $ret_pid");
-    foreach my $idx (0..1)
-    {
-      if ( $::master->[$idx]->{'pid'} eq $ret_pid )
-      {
-        mtr_debug("child $ret_pid was master[$idx]");
-        $::master->[$idx]->{'pid'}= 0;
-      }
-    }
-
-    foreach my $idx (0..2)
-    {
-      if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
-      {
-        mtr_debug("child $ret_pid was slave[$idx]");
-        $::slave->[$idx]->{'pid'}= 0;
-        last;
-      }
-    }
+    mtr_warning("mtr_record_dead_children: $ret_pid");
+    mark_process_dead($ret_pid);
+    $process_died= 1;
   }
+  return $process_died;
 }
 
 sub start_reap_all {
@@ -777,16 +978,24 @@ sub start_reap_all {
   # here. If a process terminated before setting $SIG{CHLD} (but after
   # any attempt to waitpid() it), it will still be a zombie. So we
   # have to handle any such process here.
-  while(waitpid(-1, &WNOHANG) > 0) { };
+  my $pid;
+  while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
+  {
+    mtr_warning("start_reap_all pid: $pid");
+    mark_process_dead($pid);
+  };
 }
 
 sub stop_reap_all {
   $SIG{CHLD}= 'DEFAULT';
 }
 
-sub mtr_ping_mysqld_server () {
+
+sub mtr_ping_port ($) {
   my $port= shift;
 
+  mtr_verbose("mtr_ping_port: $port");
+
   my $remote= "localhost";
   my $iaddr=  inet_aton($remote);
   if ( ! $iaddr )
@@ -799,13 +1008,18 @@ sub mtr_ping_mysqld_server () {
   {
     mtr_error("can't create socket: $!");
   }
+
+  mtr_debug("Pinging server (port: $port)...");
+
   if ( connect(SOCK, $paddr) )
   {
     close(SOCK);                        # FIXME check error?
+    mtr_verbose("USED");
     return 1;
   }
   else
   {
+    mtr_verbose("FREE");
     return 0;
   }
 }
@@ -822,30 +1036,36 @@ sub sleep_until_file_created ($$$) {
   my $pidfile= shift;
   my $timeout= shift;
   my $pid=     shift;
+  my $sleeptime= 100; # Milliseconds
+  my $loops= ($timeout * 1000) / $sleeptime;
 
-  for ( my $loop= 1; $loop <= $timeout; $loop++ )
+  for ( my $loop= 1; $loop <= $loops; $loop++ )
   {
     if ( -r $pidfile )
     {
       return $pid;
     }
 
-    # Check if it died after the fork() was successful 
-    if ( waitpid($pid,&WNOHANG) == $pid )
+    # Check if it died after the fork() was successful
+    if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
     {
+      mtr_warning("Process $pid died");
       return 0;
     }
 
-    mtr_debug("Sleep 1 second waiting for creation of $pidfile");
+    mtr_debug("Sleep $sleeptime milliseconds waiting for $pidfile");
 
-    if ( $loop % 60 == 0 )
+    # Print extra message every 60 seconds
+    my $seconds= ($loop * $sleeptime) / 1000;
+    if ( $seconds > 1 and int($seconds) % 60 == 0 )
     {
-      my $left= $timeout - $loop;
-      mtr_warning("Waited $loop seconds for $pidfile to be created, " .
+      my $left= $timeout - $seconds;
+      mtr_warning("Waited $seconds seconds for $pidfile to be created, " .
                   "still waiting for $left seconds...");
     }
 
-    sleep(1);
+    # Millisceond sleep emulated with select
+    select(undef, undef, undef, ($sleeptime/1000));
   }
 
   return 0;
@@ -855,18 +1075,18 @@ sub sleep_until_file_created ($$$) {
 sub mtr_kill_processes ($) {
   my $pids = shift;
 
-  foreach my $sig (15,9)
+  mtr_verbose("mtr_kill_processes " . join(" ", @$pids));
+
+  foreach my $pid (@$pids)
   {
-    my $retries= 20;                    # FIXME 20 seconds, this is silly!
-    kill($sig, @{$pids});
-    while ( $retries-- and  kill(0, @{$pids}) )
+    foreach my $sig (15, 9)
     {
-      mtr_debug("Sleep 1 second waiting for processes to die");
-      sleep(1)                      # Wait one second
+      last if mtr_im_kill_process([ $pid ], $sig, 10, 1);
     }
   }
 }
 
+
 ##############################################################################
 #
 #  When we exit, we kill off all children
@@ -876,7 +1096,7 @@ sub mtr_kill_processes ($) {
 # FIXME something is wrong, we sometimes terminate with "Hangup" written
 # to tty, and no STDERR output telling us why.
 
-# FIXME for some readon, setting HUP to 'IGNORE' will cause exit() to
+# FIXME for some reason, setting HUP to 'IGNORE' will cause exit() to
 # write out "Hangup", and maybe loose some output. We insert a sleep...
 
 sub mtr_exit ($) {
@@ -884,9 +1104,18 @@ sub mtr_exit ($) {
 #  cluck("Called mtr_exit()");
   mtr_timer_stop_all($::glob_timers);
   local $SIG{HUP} = 'IGNORE';
-  kill('HUP', -$$);
-  sleep 2;
+  # ToDo: Signalling -$$ will only work if we are the process group
+  # leader (in fact on QNX it will signal our session group leader,
+  # which might be Do-compile or Pushbuild, causing tests to be
+  # aborted). So we only do it if we are the group leader. We might
+  # set ourselves as the group leader at startup (with
+  # POSIX::setpgrp(0,0)), but then care must be needed to always do
+  # proper child process cleanup.
+  kill('HUP', -$$) if !$::glob_win32_perl and $$ == getpgrp();
+
   exit($code);
 }
 
+###########################################################################
+
 1;
author	unknown <msvensson@shellback.(none)>	2006-10-04 12:47:32 +0200
committer	unknown <msvensson@shellback.(none)>	2006-10-04 12:47:32 +0200
commit	dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1 (patch)
tree	ae15dc31f5918c502b4313aa7d8cd7f32ce8cca3 /mysql-test/lib/mtr_process.pl
parent	99cfe38ae7d6da2dca8a3b377254d0bfba8b6ac4 (diff)
download	mariadb-git-dccc6b10c34cc111e6a587642b4e7eb93bd3b6a1.tar.gz