8 files changed, 65 insertions, 57 deletions
diff --git a/perl.c b/perl.c
index 7064e2b9eb..9a577fe4d0 100644
--- a/perl.c
+++ b/perl.c
@@ -788,12 +788,13 @@ perl_free(pTHXx)
 #  if defined(WIN32)
 #  if defined(PERL_IMPLICIT_SYS)
     void *host = w32_internal_host;
-    if (PerlProc_lasthost())
-	PerlIO_cleanup();
+    if (PerlProc_lasthost()) {
+	PerlIO_cleanup();     
+    }
     PerlMem_free(aTHXx);
     win32_delete_internal_host(host);
 #else
-    PerlIO_cleanup();
+    PerlIO_cleanup();     
     PerlMem_free(aTHXx);
 #endif
 #  else
diff --git a/perlio.c b/perlio.c
index 874dece319..3d7f2c18fd 100644
--- a/perlio.c
+++ b/perlio.c
@@ -28,12 +28,6 @@
 #define PERL_IN_PERLIO_C
 #include "perl.h"
 
-#undef PerlMemShared_calloc
-#define PerlMemShared_calloc(x,y) calloc(x,y)
-#undef PerlMemShared_free
-#define PerlMemShared_free(x) free(x)
-
-
 #ifndef PERLIO_LAYERS
 int
 PerlIO_apply_layers(pTHX_ PerlIO *f, const char *mode, const char *names)
diff --git a/pp_sys.c b/pp_sys.c
index 314b8851fd..c167336ef7 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3724,9 +3724,6 @@ PP(pp_fork)
     if (childpid < 0)
 	RETSETUNDEF;
     if (!childpid) {
-#ifdef SOCKS_64BIT_BUG
-	Perl_do_s64_init_buffer();
-#endif
 	/*SUPPRESS 560*/
 	if ((tmpgv = gv_fetchpv("$", TRUE, SVt_PV)))
 	    sv_setiv(GvSV(tmpgv), (IV)PerlProc_getpid());
diff --git a/t/lib/tie-refhash.t b/t/lib/tie-refhash.t
index a82c19c743..d80b2e10fc 100644
--- a/t/lib/tie-refhash.t
+++ b/t/lib/tie-refhash.t
@@ -1,19 +1,19 @@
 #!/usr/bin/perl -w
-#
+# 
 # Basic test suite for Tie::RefHash and Tie::RefHash::Nestable.
-#
+# 
 # The testing is in two parts: first, run lots of tests on both a tied
 # hash and an ordinary un-tied hash, and check they give the same
 # answer.  Then there are tests for those cases where the tied hashes
 # should behave differently to normal hashes, that is, when using
 # references as keys.
-#
+# 
 
 BEGIN {
     chdir 't' if -d 't';
-    @INC = '.';
+    @INC = '.'; 
     push @INC, '../lib';
-}
+}    
 
 use strict;
 use Tie::RefHash;
@@ -28,7 +28,7 @@ my $ref = []; my $ref1 = [];
 # on a tied hash and on a normal hash, and checking that the results
 # are the same.  This does of course assume that Perl hashes are not
 # buggy :-)
-#
+# 
 my @tests = standard_hash_tests();
 
 my @ordinary_results = runtests(\@tests, undef);
@@ -40,13 +40,13 @@ foreach my $class ('Tie::RefHash', 'Tie::RefHash::Nestable') {
     foreach my $i (0 .. $#ordinary_results) {
         my ($or, $ow, $oe) = @{$ordinary_results[$i]};
         my ($tr, $tw, $te) = @{$tied_results[$i]};
-
+        
         my $ok = 1;
         local $^W = 0;
         $ok = 0 if (defined($or) != defined($tr)) or ($or ne $tr);
         $ok = 0 if (defined($ow) != defined($tw)) or ($ow ne $tw);
         $ok = 0 if (defined($oe) != defined($te)) or ($oe ne $te);
-
+        
         if (not $ok) {
             print STDERR
               "failed for $class: $tests[$i]\n",
@@ -127,7 +127,7 @@ exit();
 
 # Print 'ok X' if true, 'not ok X' if false
 # Uses global $currtest.
-#
+# 
 sub test {
     my $t = shift;
     print 'not ' if not $t;
@@ -135,7 +135,7 @@ sub test {
 }
 
 
-# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string.
+# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string. 
 sub dumped {
     my $s = shift;
     my $d = Dumper($s);
@@ -148,7 +148,7 @@ sub dumped {
 # Crudely dump a hash into a canonical string representation (because
 # hash keys can appear in any order, Data::Dumper may give different
 # strings for the same hash).
-#
+# 
 sub dumph {
     my $h = shift;
     my $r = '';
@@ -159,17 +159,17 @@ sub dumph {
 }
 
 # Run the tests and give results.
-#
+# 
 # Parameters: reference to list of tests to run
 #             name of class to use for tied hash, or undef if not tied
-#
+# 
 # Returns: list of [R, W, E] tuples, one for each test.
 # R is the return value from running the test, W any warnings it gave,
 # and E any exception raised with 'die'.  E and W will be tidied up a
 # little to remove irrelevant details like line numbers :-)
-#
+# 
 # Will also run a few of its own 'ok N' tests.
-#
+# 
 sub runtests {
     my ($tests, $class) = @_;
     my @r;
@@ -215,14 +215,14 @@ sub runtests {
 
 # Things that should work just the same for an ordinary hash and a
 # Tie::RefHash.
-#
+# 
 # Each test is a code string to be eval'd, it should do something with
 # %h and give a scalar return value.  The global $ref and $ref1 may
 # also be used.
-#
+# 
 # One thing we don't test is that the ordering from 'keys', 'values'
 # and 'each' is the same.  You can't reasonably expect that.
-#
+# 
 sub standard_hash_tests {
     my @r;
 
@@ -234,12 +234,12 @@ sub standard_hash_tests {
     { my ($k, $v, %tmp); $tmp{"$k$;$v"}++ while (($k, $v) = each %h); dumph(\%tmp) }
 END
   ;
-
+    
     # Tests on the existence of the element 'foo'
     my $FOO_TESTS = <<'END'
     defined $h{foo};
     exists $h{foo};
-    $h{foo};
+    $h{foo};    
 END
   ;
 
@@ -278,7 +278,7 @@ END
   ;
         }
     }
-
+    
     # Test hash slices
     my @slicetests;
     @slicetests = split /\n/, <<'END'
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index c631c0a7a9..ac42b85577 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -53,11 +53,11 @@ my @MK = split(/\n/, <<__EOMK__);
 3.1.8 n "�������"	-		7	80:bf:80:bf:80:bf:80	-	unexpected continuation byte 0x80
 3.1.9 n "����������������������������������������������������������������"				-	64	80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf	-	unexpected continuation byte 0x80
 3.2	Lonely start characters
-3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � "	-	64 	c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20	-	unexpected non-continuation byte 0x20 after byte 0xc0
-3.2.2 n "� � � � � � � � � � � � � � � � "	-	32	e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20	-	unexpected non-continuation byte 0x20 after byte 0xe0
-3.2.3 n "� � � � � � � � "	-	16	f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20	-	unexpected non-continuation byte 0x20 after byte 0xf0
-3.2.4 n "� � � � "		-	8	f8:20:f9:20:fa:20:fb:20	-	unexpected non-continuation byte 0x20 after byte 0xf8
-3.2.5 n "� � "			-	4	fc:20:fd:20	-	unexpected non-continuation byte 0x20 after byte 0xfc
+3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � "	-	64 	c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20	-	unexpected non-continuation byte 0x20 after start byte 0xc0
+3.2.2 n "� � � � � � � � � � � � � � � � "	-	32	e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20	-	unexpected non-continuation byte 0x20 after start byte 0xe0
+3.2.3 n "� � � � � � � � "	-	16	f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20	-	unexpected non-continuation byte 0x20 after start byte 0xf0
+3.2.4 n "� � � � "		-	8	f8:20:f9:20:fa:20:fb:20	-	unexpected non-continuation byte 0x20 after start byte 0xf8
+3.2.5 n "� � "			-	4	fc:20:fd:20	-	unexpected non-continuation byte 0x20 after start byte 0xfc
 3.3	Sequences with last continuation byte missing
 3.3.1 n "�"			-	1	c0	-	1 byte, need 2
 3.3.2 n "��"			-	2	e0:80	-	2 bytes, need 3
@@ -70,7 +70,7 @@ my @MK = split(/\n/, <<__EOMK__);
 3.3.9 n "����"			-	4	fb:bf:bf:bf	-	4 bytes, need 5
 3.3.10 n "�����"		-	5	fd:bf:bf:bf:bf	-	5 bytes, need 6
 3.4	Concatenation of incomplete sequences
-3.4.1 n "�����������������������������"	-	30	c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf	-	unexpected continuation byte 0xe0
+3.4.1 n "�����������������������������"	-	30	c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf	-	unexpected non-continuation byte 0xe0 after start byte 0xc0
 3.5	Impossible bytes
 3.5.1 n "�"			-	1	fe	-	byte 0xfe
 3.5.2 n "�"			-	1	ff	-	byte 0xff
@@ -125,7 +125,7 @@ __EOMK__
 
     local $SIG{__WARN__} =
 	sub {
-	    # print "# $id: @_";
+	    print "# $id: @_";
 	    $WARNCNT++;
 	    $WARNMSG = "@_";
 	};
diff --git a/t/pragma/warn/utf8 b/t/pragma/warn/utf8
index adc10c645e..9a7dbafdee 100644
--- a/t/pragma/warn/utf8
+++ b/t/pragma/warn/utf8
@@ -30,6 +30,6 @@ my $a = "sn�storm" ;
     my $a = "sn�storm";
 }
 EXPECT
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 9.
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 14.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14.
 ########
diff --git a/utf8.c b/utf8.c
index 9ef7ce108b..98e13e865c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -190,10 +190,10 @@ If C<s> does not point to a well-formed UTF8 character, the behaviour
 is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
 it is assumed that the caller will raise a warning, and this function
 will set C<retlen> to C<-1> and return zero.  If the C<flags> does not
-contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD)
-will be returned, and C<retlen> will be set to the expected length of
-the UTF-8 character in bytes.  The C<flags> can also contain various
-flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
+contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT (0xFFFD) will be
+returned, and C<retlen> will be set to the expected length of the
+UTF-8 character in bytes.  The C<flags> can also contain various flags
+to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
 
 =cut */
 
@@ -216,13 +216,13 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	goto malformed;
     }
 
-    if (uv <= 0x7f) { /* Pure ASCII. */
+    if (UTF8_IS_ASCII(uv)) {
 	if (retlen)
 	    *retlen = 1;
 	return *s;
     }
 
-    if ((uv >= 0x80 && uv <= 0xbf) &&
+    if (UTF8_IS_CONTINUATION(uv) &&
 	!(flags & UTF8_ALLOW_CONTINUATION)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -231,11 +231,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	goto malformed;
     }
 
-    if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) &&
+    if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) &&
 	!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
-			"Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after byte 0x%02"UVxf")",
+			"Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")",
 			(UV)s[1], uv);
 	goto malformed;
     }
@@ -276,10 +276,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
     ouv = uv;
 
     while (len--) {
-	if ((*s & 0xc0) != 0x80) {
+	if (!UTF8_IS_CONTINUATION(*s) &&
+	    !(flags & UTF8_ALLOW_NON_CONTINUATION)) {
 	    if (dowarn)
 		Perl_warner(aTHX_ WARN_UTF8,
-			    "Malformed UTF-8 character (unexpected continuation byte 0x%02x)",
+			    "Malformed UTF-8 character (unexpected non-continuation byte 0x%02x)",
 			    *s);
 	    goto malformed;
 	}
@@ -297,14 +298,14 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	ouv = uv;
     }
 
-    if ((uv >= 0xd800 && uv <= 0xdfff) &&
+    if (UNICODE_IS_SURROGATE(uv) &&
 	!(flags & UTF8_ALLOW_SURROGATE)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
 			"Malformed UTF-8 character (UTF-16 surrogate 0x%04"UVxf")",
 			uv);
 	goto malformed;
-    } else if ((uv == 0xfffe) &&
+    } else if (UNICODE_IS_BYTE_ORDER_MARK(uv) &&
 	       !(flags & UTF8_ALLOW_BOM)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -318,7 +319,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 			"Malformed UTF-8 character (%d byte%s, need %d)",
 			expectlen, expectlen == 1 ? "": "s", UNISKIP(uv));
 	goto malformed;
-    } else if ((uv == 0xffff) &&
+    } else if (UNICODE_IS_ILLEGAL(uv) &&
 	       !(flags & UTF8_ALLOW_FFFF)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -340,7 +341,7 @@ malformed:
     if (retlen)
 	*retlen = expectlen;
 
-    return UNICODE_REPLACEMENT_CHARACTER;
+    return UNICODE_REPLACEMENT;
 }
 
 /*
diff --git a/utf8.h b/utf8.h
index 25ddc14d09..bafdc57f97 100644
--- a/utf8.h
+++ b/utf8.h
@@ -46,10 +46,26 @@ END_EXTERN_C
 #define UTF8_ALLOW_ANY			0x00ff
 #define UTF8_CHECK_ONLY			0x0100
 
+#define UNICODE_SURROGATE_FIRST		0xd800
+#define UNICODE_SURROGATE_LAST		0xdfff
+#define UNICODE_REPLACEMENT		0xfffd
+#define UNICODE_BYTER_ORDER_MARK	0xfffe
+#define UNICODE_ILLEGAL			0xffff
+
+#define UNICODE_IS_SURROGATE(c)		((c) >= UNICODE_SURROGATE_FIRST && \
+					 (c) <= UNICODE_SURROGATE_LAST)
+#define UNICODE_IS_REPLACEMENT(c)	((c) == UNICODE_REPLACMENT)
+#define UNICODE_IS_BYTE_ORDER_MARK(c)	((c) == UNICODE_BYTER_ORDER_MARK)
+#define UNICODE_IS_ILLEGAL(c)		((c) == UNICODE_ILLEGAL)
+
 #define UTF8SKIP(s) PL_utf8skip[*(U8*)s]
 
 #define UTF8_QUAD_MAX	UINT64_C(0x1000000000)
 
+#define UTF8_IS_ASCII(c) 		((c) <  0x80)
+#define UTF8_IS_START(c)		((c) >= 0xc0 && ((c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c)		((c) >= 0x80 && ((c) <= 0xbf))
+
 #ifdef HAS_QUAD
 #define UNISKIP(uv) ( (uv) < 0x80           ? 1 : \
 		      (uv) < 0x800          ? 2 : \
@@ -68,7 +84,6 @@ END_EXTERN_C
 		      (uv) < 0x80000000     ? 6 : 7 )
 #endif
 
-#define UNICODE_REPLACEMENT_CHARACTER	0xfffd
 
 /*
  * Note: we try to be careful never to call the isXXX_utf8() functions