Make PerlIO marginally reentrant

Currently if an operation on a file handle is interrupted, and if the signal handler accesses that same file handle (e.g. closes it), then perl will crash. See [perl #75556]. This commit provides some basic infrastructure to avoid segfaults. Basically it adds a lock count field to each handle (by re-purposing the unused flags field in the PL_perlio array), then each time a signal handler is called, the count is incremented. Then various parts of PerlIO use a positive count to change behaviour. Most importantly, when layers are popped, the PerlIOl structure is cleared, but not freed, and is left in the chain of layers. This means that callers still holding pointers to the various layers won't access freed structures. It does however mean that PerlIOl structs may be leaked, and possibly slots in PL_perlio. But this is better than crashing. Not much has been done to give sensible behaviour on re-entrancy; for example, a buffer that has already been written once might get written again. Fixing this sort of thing would require a large-scale audit of perlio.c.
author: David Mitchell <davem@iabyn.com> 2010-11-22 19:18:49 +0000
committer: David Mitchell <davem@iabyn.com> 2010-11-26 16:01:34 +0000
commit: abf9167d3fff002ddaed53abb44d638387bca978 (patch)
tree: f81a5b1c720adff647b3866f0a7b7890b444534b
parent: cc6623a84b782d30463b9046c2916f35064a7e3f (diff)
download: perl-abf9167d3fff002ddaed53abb44d638387bca978.tar.gz
6 files changed, 218 insertions, 16 deletions
diff --git a/MANIFEST b/MANIFEST
index 8a72a8ad67..63bf4297a9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4481,6 +4481,7 @@ t/io/crlf.t			See if :crlf works
 t/io/crlf_through.t		See if pipe passes data intact with :crlf
 t/io/defout.t			See if PL_defoutgv works
 t/io/dup.t			See if >& works right
+t/io/eintr.t			See if code called during EINTR is safe
 t/io/errnosig.t			Test case for restoration $! when leaving signal handlers
 t/io/errno.t			See if $! is correctly set
 t/io/fflush.t			See if auto-flush on fork/exec/system/qx works
diff --git a/perl.h b/perl.h
index 1be51b2253..4f66da2cc3 100644
--- a/perl.h
+++ b/perl.h
@@ -8,7 +8,6 @@
  *
  */
 
-
 #ifndef H_PERL
 #define H_PERL 1
 
diff --git a/perlio.c b/perlio.c
index 663715ad60..cd584480cc 100644
--- a/perlio.c
+++ b/perlio.c
@@ -70,6 +70,8 @@
 int mkstemp(char*);
 #endif
 
+#define PerlIO_lockcnt(f) (((PerlIOl*)(f))->head->flags)
+
 /* Call the callback or PerlIOBase, and return failure. */
 #define Perl_PerlIO_or_Base(f, callback, base, failure, args) 	\
 	if (PerlIOValid(f)) {					\
@@ -583,7 +585,7 @@ PerlIO_allocate(pTHX)
 	last = (PerlIOl **) (f);
 	for (i = 1; i < PERLIO_TABLE_SIZE; i++) {
 	    if (!((++f)->next)) {
-		f->flags = 0;
+		f->flags = 0; /* lockcnt */
 		f->tab = NULL;
 		f->head = f;
 		return (PerlIO *)f;
@@ -595,7 +597,7 @@ PerlIO_allocate(pTHX)
 	return NULL;
     }
     *last = (PerlIOl*) f++;
-    f->flags = 0;
+    f->flags = 0; /* lockcnt */
     f->tab = NULL;
     f->head = f;
     return (PerlIO*) f;
@@ -782,8 +784,16 @@ PerlIO_pop(pTHX_ PerlIO *f)
 	    if ((*l->tab->Popped) (aTHX_ f) != 0)
 		return;
 	}
-	*f = l->next;
-	Safefree(l);
+	if (PerlIO_lockcnt(f)) {
+	    /* we're in use; defer freeing the structure */
+	    PerlIOBase(f)->flags = PERLIO_F_CLEARED;
+	    PerlIOBase(f)->tab = NULL;
+	}
+	else {
+	    *f = l->next;
+	    Safefree(l);
+	}
+
     }
 }
 
@@ -1488,6 +1498,9 @@ Perl_PerlIO_close(pTHX_ PerlIO *f)
     const int code = PerlIO__close(aTHX_ f);
     while (PerlIOValid(f)) {
 	PerlIO_pop(aTHX_ f);
+	if (PerlIO_lockcnt(f))
+	    /* we're in use; the 'pop' deferred freeing the structure */
+	    f = PerlIONext(f);
     }
     return code;
 }
@@ -2518,6 +2531,38 @@ typedef struct {
     int oflags;                 /* open/fcntl flags */
 } PerlIOUnix;
 
+static void
+S_lockcnt_dec(pTHX_ const void* f)
+{
+    PerlIO_lockcnt((PerlIO*)f)--;
+}
+
+
+/* call the signal handler, and if that handler happens to clear
+ * this handle, free what we can and return true */
+
+static bool
+S_perlio_async_run(pTHX_ PerlIO* f) {
+    ENTER;
+    SAVEDESTRUCTOR_X(S_lockcnt_dec, (void*)f);
+    PerlIO_lockcnt(f)++;
+    PERL_ASYNC_CHECK();
+    if ( !(PerlIOBase(f)->flags & PERLIO_F_CLEARED) )
+	return 0;
+    /* we've just run some perl-level code that could have done
+     * anything, including closing the file or clearing this layer.
+     * If so, free any lower layers that have already been
+     * cleared, then return an error. */
+    while (PerlIOValid(f) &&
+	    (PerlIOBase(f)->flags & PERLIO_F_CLEARED))
+    {
+	const PerlIOl *l = *f;
+	*f = l->next;
+	Safefree(l);
+    }
+    return 1;
+}
+
 int
 PerlIOUnix_oflags(const char *mode)
 {
@@ -2721,7 +2766,10 @@ SSize_t
 PerlIOUnix_read(pTHX_ PerlIO *f, void *vbuf, Size_t count)
 {
     dVAR;
-    const int fd = PerlIOSelf(f, PerlIOUnix)->fd;
+    int fd;
+    if (PerlIO_lockcnt(f)) /* in use: abort ungracefully */
+	return -1;
+    fd = PerlIOSelf(f, PerlIOUnix)->fd;
 #ifdef PERLIO_STD_SPECIAL
     if (fd == 0)
         return PERLIO_STD_IN(fd, vbuf, count);
@@ -2744,7 +2792,9 @@ PerlIOUnix_read(pTHX_ PerlIO *f, void *vbuf, Size_t count)
 	    }
 	    return len;
 	}
-	PERL_ASYNC_CHECK();
+	/* EINTR */
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
     }
     /*NOTREACHED*/
 }
@@ -2753,7 +2803,10 @@ SSize_t
 PerlIOUnix_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
 {
     dVAR;
-    const int fd = PerlIOSelf(f, PerlIOUnix)->fd;
+    int fd;
+    if (PerlIO_lockcnt(f)) /* in use: abort ungracefully */
+	return -1;
+    fd = PerlIOSelf(f, PerlIOUnix)->fd;
 #ifdef PERLIO_STD_SPECIAL
     if (fd == 1 || fd == 2)
         return PERLIO_STD_OUT(fd, vbuf, count);
@@ -2768,7 +2821,9 @@ PerlIOUnix_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
 	    }
 	    return len;
 	}
-	PERL_ASYNC_CHECK();
+	/* EINTR */
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
     }
     /*NOTREACHED*/
 }
@@ -2803,7 +2858,9 @@ PerlIOUnix_close(pTHX_ PerlIO *f)
 	    code = -1;
 	    break;
 	}
-	PERL_ASYNC_CHECK();
+	/* EINTR */
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
     }
     if (code == 0) {
 	PerlIOBase(f)->flags &= ~PERLIO_F_OPEN;
@@ -3276,8 +3333,11 @@ SSize_t
 PerlIOStdio_read(pTHX_ PerlIO *f, void *vbuf, Size_t count)
 {
     dVAR;
-    FILE * const s = PerlIOSelf(f, PerlIOStdio)->stdio;
+    FILE * s;
     SSize_t got = 0;
+    if (PerlIO_lockcnt(f)) /* in use: abort ungracefully */
+	return -1;
+    s = PerlIOSelf(f, PerlIOStdio)->stdio;
     for (;;) {
 	if (count == 1) {
 	    STDCHAR *buf = (STDCHAR *) vbuf;
@@ -3297,7 +3357,8 @@ PerlIOStdio_read(pTHX_ PerlIO *f, void *vbuf, Size_t count)
 	    got = -1;
 	if (got >= 0 || errno != EINTR)
 	    break;
-	PERL_ASYNC_CHECK();
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
 	SETERRNO(0,0);	/* just in case */
     }
     return got;
@@ -3366,12 +3427,15 @@ PerlIOStdio_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
 {
     dVAR;
     SSize_t got;
+    if (PerlIO_lockcnt(f)) /* in use: abort ungracefully */
+	return -1;
     for (;;) {
 	got = PerlSIO_fwrite(vbuf, 1, count,
 			      PerlIOSelf(f, PerlIOStdio)->stdio);
 	if (got >= 0 || errno != EINTR)
 	    break;
-	PERL_ASYNC_CHECK();
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
 	SETERRNO(0,0);	/* just in case */
     }
     return got;
@@ -3533,9 +3597,12 @@ PerlIOStdio_set_ptrcnt(pTHX_ PerlIO *f, STDCHAR * ptr, SSize_t cnt)
 IV
 PerlIOStdio_fill(pTHX_ PerlIO *f)
 {
-    FILE * const stdio = PerlIOSelf(f, PerlIOStdio)->stdio;
+    FILE * stdio;
     int c;
     PERL_UNUSED_CONTEXT;
+    if (PerlIO_lockcnt(f)) /* in use: abort ungracefully */
+	return -1;
+    stdio = PerlIOSelf(f, PerlIOStdio)->stdio;
 
     /*
      * fflush()ing read-only streams can cause trouble on some stdio-s
@@ -3550,7 +3617,8 @@ PerlIOStdio_fill(pTHX_ PerlIO *f)
 	    break;
 	if (! PerlSIO_ferror(stdio) || errno != EINTR)
 	    return EOF;
-	PERL_ASYNC_CHECK();
+	if (PL_sig_pending && S_perlio_async_run(aTHX_ f))
+	    return -1;
 	SETERRNO(0,0);
     }
 
@@ -4082,7 +4150,8 @@ PerlIOBuf_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
 		PerlIO_flush(f);
 	}
 	if (b->ptr >= (b->buf + b->bufsiz))
-	    PerlIO_flush(f);
+	    if (PerlIO_flush(f) == -1)
+		return -1;
     }
     if (PerlIOBase(f)->flags & PERLIO_F_UNBUF)
 	PerlIO_flush(f);
diff --git a/perliol.h b/perliol.h
index 744ffc8870..019fa8ca04 100644
--- a/perliol.h
+++ b/perliol.h
@@ -90,6 +90,7 @@ struct _PerlIO {
 #define PERLIO_F_FASTGETS	0x00400000
 #define PERLIO_F_TTY		0x00800000
 #define PERLIO_F_NOTREG         0x01000000   
+#define PERLIO_F_CLEARED        0x02000000 /* layer cleared but not freed */
 
 #define PerlIOBase(f)      (*(f))
 #define PerlIOSelf(f,type) ((type *)PerlIOBase(f))
diff --git a/pod/perlipc.pod b/pod/perlipc.pod
index 8aa500584c..5e9f408a62 100644
--- a/pod/perlipc.pod
+++ b/pod/perlipc.pod
@@ -408,6 +408,12 @@ operation.)
 The default in Perl 5.7.3 and later is to automatically use
 the C<:perlio> layer.
 
+Note that it is not advisable to access a file handle within a signal
+handler where that signal has interrupted an I/O operation on that same
+handle. While perl will at least try hard not to crash, there are no
+guarantees of data integrity; for example, some data might get dropped or
+written twice.
+
 Some networking library functions like gethostbyname() are known to have
 their own implementations of timeouts which may conflict with your
 timeouts.  If you have problems with such functions, try using the POSIX
diff --git a/t/io/eintr.t b/t/io/eintr.t
new file mode 100644
index 0000000000..3b6b0a4368
--- /dev/null
+++ b/t/io/eintr.t
@@ -0,0 +1,126 @@
+#!./perl
+
+# If a read or write is interrupted by a signal, Perl will call the
+# signal handler and then attempt to restart the call. If the handler does
+# something nasty like close the handle or pop layers, make sure that the
+# read/write handles this gracefully (for some definition of 'graceful':
+# principally, don't segfault).
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+use warnings;
+use strict;
+use Config;
+
+require './test.pl';
+
+my $piped;
+eval {
+	pipe my $in, my $out;
+	$piped = 1;
+};
+if (!$piped) {
+	skip_all('pipe not implemented');
+	exit 0;
+}
+unless (exists  $Config{'d_alarm'}) {
+	skip_all('alarm not implemented');
+	exit 0;
+}
+
+# XXX for some reason the stdio layer doesn't seem to interrupt
+# write system call when the alarm triggers.  This makes the tests
+# hang.
+
+if (exists $ENV{PERLIO} && $ENV{PERLIO} =~ /stdio/  ) {
+	skip_all('stdio not supported for this script');
+	exit 0;
+}
+
+my ($in, $out, $st, $sigst, $buf);
+
+plan(tests => 10);
+
+
+# make two handles that will always block
+
+sub fresh_io {
+	undef $in; undef $out; # use fresh handles each time
+	pipe $in, $out;
+	$sigst = "";
+}
+
+$SIG{PIPE} = 'IGNORE';
+
+# close during read
+
+fresh_io;
+$SIG{ALRM} = sub { $sigst = close($in) ? "ok" : "nok" };
+alarm(1);
+$st = read($in, $buf, 1);
+alarm(0);
+is($sigst, 'ok', 'read/close: sig handler close status');
+ok(!$st, 'read/close: read status');
+ok(!close($in), 'read/close: close status');
+
+# die during read
+
+fresh_io;
+$SIG{ALRM} = sub { die };
+alarm(1);
+$st = eval { read($in, $buf, 1) };
+alarm(0);
+ok(!$st, 'read/die: read status');
+ok(close($in), 'read/die: close status');
+
+# close during print
+
+fresh_io;
+$SIG{ALRM} = sub { $sigst = close($out) ? "ok" : "nok" };
+$buf = "a" x 1_000_000 . "\n"; # bigger than any pipe buffer hopefully
+select $out; $| = 1; select STDOUT;
+alarm(1);
+$st = print $out $buf;
+alarm(0);
+is($sigst, 'nok', 'print/close: sig handler close status');
+ok(!$st, 'print/close: print status');
+ok(!close($out), 'print/close: close status');
+
+# die during print
+
+fresh_io;
+$SIG{ALRM} = sub { die };
+$buf = "a" x 1_000_000 . "\n"; # bigger than any pipe buffer hopefully
+select $out; $| = 1; select STDOUT;
+alarm(1);
+$st = eval { print $out $buf };
+alarm(0);
+ok(!$st, 'print/die: print status');
+# the close will hang since there's data to flush, so use alarm
+alarm(1);
+ok(!eval {close($out)}, 'print/die: close status');
+alarm(0);
+
+# close during close
+
+# Apparently there's nothing in standard Linux that can cause an
+# EINTR in close(2); but run the code below just in case it does on some
+# platform, just to see if it segfaults.
+fresh_io;
+$SIG{ALRM} = sub { $sigst = close($in) ? "ok" : "nok" };
+alarm(1);
+close $in;
+alarm(0);
+
+# die during close
+
+fresh_io;
+$SIG{ALRM} = sub { die };
+alarm(1);
+eval { close $in };
+alarm(0);
+
+# vim: ts=4 sts=4 sw=4:
author	David Mitchell <davem@iabyn.com>	2010-11-22 19:18:49 +0000
committer	David Mitchell <davem@iabyn.com>	2010-11-26 16:01:34 +0000
commit	abf9167d3fff002ddaed53abb44d638387bca978 (patch)
tree	f81a5b1c720adff647b3866f0a7b7890b444534b
parent	cc6623a84b782d30463b9046c2916f35064a7e3f (diff)
download	perl-abf9167d3fff002ddaed53abb44d638387bca978.tar.gz