summaryrefslogtreecommitdiff
path: root/mysql-test
diff options
context:
space:
mode:
authorAleksey Midenkov <midenok@gmail.com>2022-07-18 23:16:18 +0300
committerAleksey Midenkov <midenok@gmail.com>2022-07-18 23:16:18 +0300
commit1848804840f5595f982c4cd502ba2112f6dd7911 (patch)
treebbb99fb47e51fa22650deb1638bbb62efa9bdaa1 /mysql-test
parent7ca5c7d8f9c70067db36b76da31671fae430cc9d (diff)
downloadmariadb-git-1848804840f5595f982c4cd502ba2112f6dd7911.tar.gz
MDEV-29023 MTR hangs after multiple failures
Passing $opt_parallel as $childs is wrong: child can be killed before it connects and you will never decrement $childs for this. Another problem is (and that is the cause of this bug): child can be killed and never close server socket. This can happen f.ex. after unmaskable KILL signal. In such case the socket is closed by reaping the child but that never happens inside reading the socket loop in run_test_server(). The proper design is the waitless reap of children inside the socket loop and if there is no more children we finish the socket loop. Since there is Windows variation where we don't control the children via waitpid(), all the clients must normally close the socket and only this can finish the socket loop. For Unix variation we reckon that case as all children closed the socket but not all yet died and for that we do final waiting waitpid() (was done before the patch as well). To be more complete, we now handle 3 end-of-game scenarios in Unix: 1. all children closed socket, all children died: everything is handled by the socket loop; 2. all children closed socket, not all yet died: we wait for alive children to die after exiting the socket loop; 3. not all children closed socket, all children died: everything is handled by the socket loop. For Windows end-of-game scenario is only one: All children close the socket.
Diffstat (limited to 'mysql-test')
-rwxr-xr-xmysql-test/mysql-test-run.pl47
1 files changed, 39 insertions, 8 deletions
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index bb044a1388b..6882ad3e7e4 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -78,6 +78,7 @@ use lib "lib";
use Cwd ;
use Cwd 'realpath';
+use POSIX ":sys_wait_h";
use Getopt::Long qw(:config bundling);
use My::File::Path; # Patched version of File::Path
use File::Basename;
@@ -649,7 +650,7 @@ sub main {
mark_time_used('init');
my ($prefix, $fail, $completed, $extra_warnings)=
- run_test_server($server, $tests, $opt_parallel);
+ run_test_server($server, $tests, \%children);
exit(0) if $opt_start_exit;
@@ -664,10 +665,12 @@ sub main {
if ($ret_pid == -1) {
# Child was automatically reaped. Probably not possible
# unless you $SIG{CHLD}= 'IGNORE'
- mtr_report("Child ${pid} was automatically reaped (this should never happend)");
+ mtr_warning("Child ${pid} was automatically reaped (this should never happen)");
} elsif ($ret_pid != $pid) {
confess("Unexpected PID ${ret_pid} instead of expected ${pid}");
}
+ my $exit_status= ($? >> 8);
+ mtr_verbose2("Child ${pid} exited with status ${exit_status}");
delete $children{$ret_pid};
}
}
@@ -727,7 +730,7 @@ sub main {
sub run_test_server ($$$) {
- my ($server, $tests, $childs) = @_;
+ my ($server, $tests, $children) = @_;
my $num_saved_datadir= 0; # Number of datadirs saved in vardir/log/ so far.
my $num_failed_test= 0; # Number of tests failed so far
@@ -742,6 +745,7 @@ sub run_test_server ($$$) {
my $suite_timeout= start_timer(suite_timeout());
my $s= IO::Select->new();
+ my $childs= 0;
$s->add($server);
while (1) {
if ($opt_stop_file)
@@ -755,12 +759,14 @@ sub run_test_server ($$$) {
mark_time_used('admin');
my @ready = $s->can_read(1); # Wake up once every second
+ mtr_debug("Got ". (0 + @ready). " connection(s)");
mark_time_idle();
foreach my $sock (@ready) {
if ($sock == $server) {
# New client connected
+ ++$childs;
my $child= $sock->accept();
- mtr_verbose2("Client connected");
+ mtr_verbose2("Client connected (got ${childs} childs)");
$s->add($child);
print $child "HELLO\n";
}
@@ -768,12 +774,10 @@ sub run_test_server ($$$) {
my $line= <$sock>;
if (!defined $line) {
# Client disconnected
- mtr_verbose2("Child closed socket");
+ --$childs;
+ mtr_verbose2("Child closed socket (left ${childs} childs)");
$s->remove($sock);
$sock->close;
- if (--$childs == 0){
- return ("Completed", $test_failure, $completed, $extra_warnings);
- }
next;
}
chomp($line);
@@ -1016,6 +1020,33 @@ sub run_test_server ($$$) {
}
}
+ if (!IS_WINDOWS) {
+ foreach my $pid (keys %$children)
+ {
+ my $res= waitpid($pid, WNOHANG);
+ if ($res == $pid || $res == -1) {
+ if ($res == -1) {
+ # Child was automatically reaped. Probably not possible
+ # unless you $SIG{CHLD}= 'IGNORE'
+ mtr_warning("Child ${pid} was automatically reaped (this should never happen)");
+ }
+ my $exit_status= ($? >> 8);
+ mtr_verbose2("Child ${pid} exited with status ${exit_status}");
+ delete $children->{$pid};
+ if (!%$children && $childs) {
+ mtr_verbose2("${childs} children didn't close socket before dying!");
+ $childs= 0;
+ }
+ } elsif ($res != 0) {
+ confess("Unexpected result ${res} on waitpid(${pid}, WNOHANG)");
+ }
+ }
+ }
+
+ if ($childs == 0){
+ return ("Completed", $test_failure, $completed, $extra_warnings);
+ }
+
# ----------------------------------------------------
# Check if test suite timer expired
# ----------------------------------------------------