23 files changed, 420 insertions, 120 deletions
diff --git a/Changes b/Changes
index 575e09f0ce..3cdab5877b 100644
--- a/Changes
+++ b/Changes
@@ -32,6 +32,190 @@ Version v5.7.1		Development release working toward v5.8
 --------------
 
 ____________________________________________________________________________
+[  8042] By: jhi                                   on 2000/12/08  16:33:39
+        Log: Do not return the Unicode replacement character if UTF-8
+             decoding goes awry, it should be up to the caller to decide.
+     Branch: perl
+	   ! pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[  8041] By: jhi                                   on 2000/12/08  16:22:28
+        Log: metaconfig maintenance.
+     Branch: metaconfig
+	   ! U/modified/stdchar.U
+     Branch: metaconfig/U/perl
+	   + testsyml.U
+____________________________________________________________________________
+[  8040] By: jhi                                   on 2000/12/08  16:03:08
+        Log: Subject: [ID 20001207.009] Not OK: perl v5.7.0 +DEVEL8030 on os2-64int-ld 2.30
+             From: sthoenna@efn.org
+             Date: Thu, 7 Dec 2000 21:32:43 -0800 (PST)
+             Message-Id: <200012080532.eB85Wh729109@garcia.efn.org>
+     Branch: perl
+	   ! Changes lib/ExtUtils/MM_OS2.pm
+____________________________________________________________________________
+[  8039] By: jhi                                   on 2000/12/08  15:57:11
+        Log: Subject: [PATCH] Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful
+             From: Simon Cozens <simon@cozens.net>
+             Date: Fri, 8 Dec 2000 13:33:31 +0000
+             Message-ID: <20001208133331.A11535@deep-dark-truthful-mirror.perlhacker.org>
+             
+             (The pp_hot part needed a rewrite.)
+     Branch: perl
+	   ! doop.c pp_hot.c utf8.c
+____________________________________________________________________________
+[  8038] By: jhi                                   on 2000/12/08  15:25:08
+        Log: Subject: djSP
+             From: Michael Stevens <michael@etla.org>
+             Date: Wed, 6 Dec 2000 23:24:01 +0000
+             Message-ID: <20001206232400.A21381@firedrake.org>
+             
+             Plus a note from Nick Ing-Simmons.
+     Branch: perl
+	   ! pp.h
+____________________________________________________________________________
+[  8037] By: jhi                                   on 2000/12/08  15:18:35
+        Log: Filetype is text.
+     Branch: perl
+	   ! t/op/utf8decode.t
+____________________________________________________________________________
+[  8036] By: jhi                                   on 2000/12/08  03:31:27
+        Log: Subject: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support
+             From: Andy Dougherty <doughera@lafayette.edu>
+             Date: Thu, 7 Dec 2000 12:36:45 -0500 (EST)
+             Message-Id: <Pine.SOL.4.10.10012071235400.13162-100000@maxwell.phys.lafayette.edu>
+             
+             Subject: Re: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support
+             From: Andy Dougherty <doughera@lafayette.edu>
+             Date: Thu, 7 Dec 2000 13:56:10 -0500 (EST)
+             Message-ID: <Pine.SOL.4.10.10012071354420.6665-100000@maxwell.phys.lafayette.edu>
+     Branch: perl
+	   + ext/POSIX/hints/svr4.pl
+	   ! MANIFEST ext/POSIX/Makefile.PL hints/svr4.sh t/lib/bigfltpm.t
+____________________________________________________________________________
+[  8035] By: jhi                                   on 2000/12/08  03:26:01
+        Log: Subject: [ID 20001207.003] [PATCH] t/base/term.t causes abort if Config.pm not built
+             From: Andy Dougherty <doughera@lafayette.edu>
+             Date: Thu, 7 Dec 2000 10:50:47 -0500 (EST)
+             Message-Id: <Pine.SOL.4.10.10012071049400.7566-100000@maxwell.phys.lafayette.edu>
+     Branch: perl
+	   ! t/README t/base/term.t
+____________________________________________________________________________
+[  8034] By: jhi                                   on 2000/12/08  03:21:59
+        Log: Subject: DOC PATCH 5.6.0: -s return value incompletely documented
+             From: mjd@plover.com  
+             Date: 7 Dec 2000 21:04:20 -0000
+             Message-ID: <20001207210420.22282.qmail@plover.com>
+             
+             Plus -z doc.
+     Branch: perl
+	   ! pod/perlfunc.pod
+____________________________________________________________________________
+[  8033] By: jhi                                   on 2000/12/08  03:19:03
+        Log: Use the UTF8 macros a bit.  They can't be used with abandon
+             everywhere because we do generate illegal UTF-8 in some situations.
+             This is of course naughty.
+     Branch: perl
+	   ! pod/perlapi.pod utf8.c utf8.h
+____________________________________________________________________________
+[  8032] By: jhi                                   on 2000/12/08  03:00:09
+        Log: Out of sync?
+     Branch: perl
+	   ! t/pragma/warn/utf8
+____________________________________________________________________________
+[  8031] By: jhi                                   on 2000/12/08  02:22:39
+        Log: Cnt spl.
+     Branch: perl
+	   ! MANIFEST
+____________________________________________________________________________
+[  8030] By: jhi                                   on 2000/12/08  01:23:54
+        Log: Add test for reverse().
+     Branch: perl
+	   + t/op/reverse.t
+	   ! MANIFEST
+____________________________________________________________________________
+[  8029] By: jhi                                   on 2000/12/08  01:21:47
+        Log: Integrate perlio.
+     Branch: perl
+	  !> ext/IO/IO.xs fakesdio.h global.sym lib/warnings.pm perlapi.c
+	  !> perlio.c perlio.h perlsdio.h warnings.pl
+____________________________________________________________________________
+[  8028] By: jhi                                   on 2000/12/08  01:19:08
+        Log: Introduce macros for UTF8 decoding.
+     Branch: perl
+	   ! t/op/utf8decode.t t/pragma/warn/utf8 utf8.c utf8.h
+____________________________________________________________________________
+[  8027] By: nick                                  on 2000/12/07  22:18:19
+        Log: Integrate mainline
+     Branch: perlio
+	  !> embed.pl pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[  8026] By: nick                                  on 2000/12/07  21:45:08
+        Log: Various oddities p4 diff -se showed up
+             Remove 'our' from warnings.pl
+     Branch: perlio
+	   ! global.sym lib/warnings.pm pod/perlapi.pod warnings.pl
+____________________________________________________________________________
+[  8025] By: nick                                  on 2000/12/07  21:43:32
+        Log: Change PerlIO_(get|set)pos to take SV *
+             Should fix, OS/2, VMS, (sfio??)
+     Branch: perlio
+	   ! ext/IO/IO.xs fakesdio.h perlapi.c perlio.c perlio.h perlsdio.h
+____________________________________________________________________________
+[  8024] By: jhi                                   on 2000/12/07  19:05:32
+        Log: Document utf8_to_uv() better.
+     Branch: perl
+	   ! pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[  8023] By: jhi                                   on 2000/12/07  18:23:47
+        Log: Document utf8_length(), utf8_distance(), and utf8_hop().
+     Branch: perl
+	   ! embed.pl pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[  8022] By: jhi                                   on 2000/12/07  04:13:51
+        Log: Integrate perlio.
+     Branch: perl
+	  !> perlio.c t/io/dup.t win32/config.vc win32/config_H.bc
+	  !> win32/config_H.gc win32/config_H.vc win32/config_h.PL
+	  !> win32/makefile.mk
+____________________________________________________________________________
+[  8021] By: nick                                  on 2000/12/07  00:28:14
+        Log: Various attempts at MSVC debug - not sure what has
+             changed but works now. 
+             Seems atexit() _may_ work for DLLs built with MSVC so don't
+             call cleanup that way.
+     Branch: perlio
+	   ! perlio.c win32/makefile.mk
+____________________________________________________________________________
+[  8020] By: nick                                  on 2000/12/06  19:57:20
+        Log: Integrate mainline
+     Branch: perlio
+	  +> lib/ExtUtils/MANIFEST.SKIP t/op/concat.t
+	  !> (integrate 75 files)
+____________________________________________________________________________
+[  8019] By: nick                                  on 2000/12/06  19:28:21
+        Log: Add useperlio to config.vc
+             Turn off binmode in config_H.PL
+             Regen all the config_H.xx
+             Attempt to get debugging build with MSVC.
+     Branch: perlio
+	   ! win32/config.vc win32/config_H.bc win32/config_H.gc
+	   ! win32/config_H.vc win32/config_h.PL win32/makefile.mk
+____________________________________________________________________________
+[  8018] By: nick                                  on 2000/12/06  19:21:57
+        Log: Test various dup/external program options on all platforms.
+     Branch: perlio
+	   ! t/io/dup.t
+____________________________________________________________________________
+[  8017] By: nick                                  on 2000/12/06  19:20:47
+        Log: Fix harness to be less picky
+     Branch: perlio
+	   ! lib/Test/Harness.pm
+____________________________________________________________________________
+[  8016] By: jhi                                   on 2000/12/06  16:45:12
+        Log: Update Changes.
+     Branch: perl
+	   ! Changes patchlevel.h
+____________________________________________________________________________
 [  8015] By: jhi                                   on 2000/12/06  16:41:03
         Log: Test \x{...} with ord().
      Branch: perl
@@ -269,7 +453,7 @@ ____________________________________________________________________________
              Date: Mon, 04 Dec 2000 22:21:59 -0800
              Message-ID: <HmIL6gzkgepS092yn@efn.org>
              
-             Harness shouldn't ignore responses like "ok 3\r\n\n".
+             Harness shouldn't ignore responses like "ok 3\r\r\n".
      Branch: perl
 	   ! lib/Test/Harness.pm
 ____________________________________________________________________________
diff --git a/MANIFEST b/MANIFEST
index 28454065a4..2f334275be 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -247,6 +247,7 @@ ext/Encode/Encode/iso8859-10.enc	Encoding tables
 ext/Encode/Encode/iso8859-13.enc	Encoding tables
 ext/Encode/Encode/iso8859-14.enc	Encoding tables
 ext/Encode/Encode/iso8859-15.enc	Encoding tables
+ext/Encode/Encode/iso8859-16.enc	Encoding tables
 ext/Encode/Encode/iso8859-2.enc		Encoding tables
 ext/Encode/Encode/iso8859-3.enc		Encoding tables
 ext/Encode/Encode/iso8859-4.enc		Encoding tables
@@ -364,6 +365,7 @@ ext/POSIX/hints/netbsd.pl       Hint for POSIX for named architecture
 ext/POSIX/hints/next_3.pl       Hint for POSIX for named architecture
 ext/POSIX/hints/openbsd.pl	Hint for POSIX for named architecture
 ext/POSIX/hints/sunos_4.pl	Hint for POSIX for named architecture
+ext/POSIX/hints/svr4.pl		Hint for POSIX for named architecture
 ext/POSIX/typemap		POSIX extension interface types
 ext/SDBM_File/Makefile.PL	SDBM extension makefile writer
 ext/SDBM_File/SDBM_File.pm	SDBM extension Perl module
@@ -1557,6 +1559,7 @@ t/op/regexp.t		See if regular expressions work
 t/op/regexp_noamp.t	See if regular expressions work with optimizations
 t/op/regmesg.t		See if one can get regular expression errors
 t/op/repeat.t		See if x operator works
+t/op/reverse.t		See if reverse operator works
 t/op/runlevel.t		See if die() works from perl_call_*()
 t/op/sleep.t		See if sleep works
 t/op/sort.t		See if sort works
diff --git a/doop.c b/doop.c
index 9dbee678ef..7acad602c4 100644
--- a/doop.c
+++ b/doop.c
@@ -79,10 +79,7 @@ S_do_trans_simple(pTHX_ SV *sv)
 	c = utf8_to_uv(s, send - s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
-            if (ch < 0x80)
-                *d++ = ch;
-            else
-                d = uv_to_utf8(d,ch);
+            d = uv_to_utf8(d,ch);
             s += ulen;
         }
 	else { /* No match -> copy */
@@ -192,12 +189,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
                matches--;
            }
 
-           if (ch >= 0) {
-               if (hasutf)
-                 d = uv_to_utf8(d, ch);
-               else 
-                 *d++ = ch;
-           }
+           if (ch >= 0)
+               d = uv_to_utf8(d, ch);
+           
            matches++;
 
            s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
diff --git a/ext/Encode/Encode/iso8859-16.enc b/ext/Encode/Encode/iso8859-16.enc
new file mode 100644
index 0000000000..1936b97b6f
--- /dev/null
+++ b/ext/Encode/Encode/iso8859-16.enc
@@ -0,0 +1,20 @@
+# Encoding file: iso8859-16, single-byte
+S
+003F 0 1
+00
+0000000100020003000400050006000700080009000A000B000C000D000E000F
+0010001100120013001400150016001700180019001A001B001C001D001E001F
+0020002100220023002400250026002700280029002A002B002C002D002E002F
+0030003100320033003400350036003700380039003A003B003C003D003E003F
+0040004100420043004400450046004700480049004A004B004C004D004E004F
+0050005100520053005400550056005700580059005A005B005C005D005E005F
+0060006100620063006400650066006700680069006A006B006C006D006E006F
+0070007100720073007400750076007700780079007A007B007C007D007E007F
+0080008100820083008400850086008700880089008A008B008C008D008E008F
+0090009100920093009400950096009700980099009A009B009C009D009E009F
+00A001040105014120AC00AB016000A7016100A90218201E017900AD017A017B
+00B000B1010C0142017D201D00B600B7017E010D021900BB015201530178017C
+00C000C100C2010200C4010600C600C700C800C900CA00CB00CC00CD00CE00CF
+0110014300D200D300D4015000D6015A017000D900DA00DB00DC0118021A00DF
+00E000E100E2010300E4010700E600E700E800E900EA00EB00EC00ED00EE00EF
+0111014400F200F300F4015100F6015B017100F900FA00FB00FC0119021B00FF
diff --git a/ext/POSIX/Makefile.PL b/ext/POSIX/Makefile.PL
index 55c5c1fbf3..73bb02dddb 100644
--- a/ext/POSIX/Makefile.PL
+++ b/ext/POSIX/Makefile.PL
@@ -2,12 +2,7 @@ use ExtUtils::MakeMaker;
 use Config;
 my @libs;
 if ($^O ne 'MSWin32') {
-    if ($Config{archname} =~ /RM\d\d\d-svr4/) {
-	@libs = ('LIBS' => ["-lm -lc -lposix -lcposix"]);
-    }
-    else {
-	@libs = ('LIBS' => ["-lm -lposix -lcposix"]);
-    }
+    @libs = ('LIBS' => ["-lm -lposix -lcposix"]);
 }
 WriteMakefile(
     NAME	=> 'POSIX',
diff --git a/ext/POSIX/hints/svr4.pl b/ext/POSIX/hints/svr4.pl
new file mode 100644
index 0000000000..07f2cb0412
--- /dev/null
+++ b/ext/POSIX/hints/svr4.pl
@@ -0,0 +1,12 @@
+# NCR MP-RAS.  Thanks to Doug Hendricks for this info.
+# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0'
+# This system needs to explicitly link against -lmw to pull in some
+# symbols such as _mwoflocheckl and possibly others.
+#  A. Dougherty  Thu Dec  7 11:55:28 EST 2000
+if ($Config{'archname'} =~ /3441-svr4/) {
+    $self->{LIBS} = ['-lm -posix -lcposix -lmw'];
+}
+# Not sure what OS this one is.
+elsif ($Config{archname} =~ /RM\d\d\d-svr4/) {
+    $self->{LIBS} = ['-lm -lc -lposix -lcposix'];
+}
diff --git a/hints/svr4.sh b/hints/svr4.sh
index 8109b39752..69af6fda2f 100644
--- a/hints/svr4.sh
+++ b/hints/svr4.sh
@@ -135,6 +135,22 @@ case "`uname -sm`" in
     ;;
 esac
 
+# NCR MP-RAS.  Thanks to Doug Hendricks for this info.
+# The output of uname -a looks like this
+#	foo foo 4.0 3.0 3441 Pentium III(TM)-ISA/PCI
+# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0'
+case "$myuname" in
+*3441*)
+    # With the NCR High Performance C Compiler R3.0c, miniperl fails 
+    # t/op/regexp.t test 461 unless we compile with optimizie=-g.
+    # The whole O/S is being phased out, so more detailed probing
+    # is probably not warranted.
+    case "$optimize" in 
+    '') optimize='-g' ;;
+    esac
+    ;;
+esac
+
 # Configure may fail to find lstat() since it's a static/inline function
 # in <sys/stat.h> on Unisys U6000 SVR4, UnixWare 2.x, and possibly other
 # SVR4 derivatives.  (Though UnixWare has it in /usr/ccs/lib/libc.so.)
diff --git a/lib/ExtUtils/MM_OS2.pm b/lib/ExtUtils/MM_OS2.pm
index 501832bfec..c0c52402f6 100644
--- a/lib/ExtUtils/MM_OS2.pm
+++ b/lib/ExtUtils/MM_OS2.pm
@@ -38,7 +38,7 @@ $self->{BASEEXT}.def: Makefile.PL
      ', "DL_VARS" => ', neatvalue($vars), ');\'
 ');
     }
-    if (%{$self->{IMPORTS}}) {
+    if ($self->{IMPORTS} && %{$self->{IMPORTS}}) {
 	# Make import files (needed for static build)
 	-d 'tmp_imp' or mkdir 'tmp_imp', 0777 or die "Can't mkdir tmp_imp";
 	open IMP, '>tmpimp.imp' or die "Can't open tmpimp.imp";
@@ -61,7 +61,7 @@ $self->{BASEEXT}.def: Makefile.PL
 sub static_lib {
     my($self) = @_;
     my $old = $self->ExtUtils::MM_Unix::static_lib();
-    return $old unless %{$self->{IMPORTS}};
+    return $old unless $self->{IMPORTS} && %{$self->{IMPORTS}};
     
     my @chunks = split /\n{2,}/, $old;
     shift @chunks unless length $chunks[0]; # Empty lines at the start
diff --git a/lib/ExtUtils/MM_VMS.pm b/lib/ExtUtils/MM_VMS.pm
index 3485786bf8..cc63d2f67a 100644
--- a/lib/ExtUtils/MM_VMS.pm
+++ b/lib/ExtUtils/MM_VMS.pm
@@ -126,7 +126,7 @@ sub ExtUtils::MM_VMS::makeaperl;
 sub ExtUtils::MM_VMS::ext;
 sub ExtUtils::MM_VMS::nicetext;
 
-#use SelfLoader;
+our $AUTOLOAD;
 sub AUTOLOAD {
     my $code;
     if (defined fileno(DATA)) {
@@ -562,21 +562,21 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision
 # DLBASE  = Basename part of dynamic library. May be just equal BASEEXT.
 ];
 
-    for $tmp (qw/
+    for my $tmp (qw/
 	      FULLEXT VERSION_FROM OBJECT LDFROM
 	      /	) {
 	next unless defined $self->{$tmp};
 	push @m, "$tmp = ",$self->fixpath($self->{$tmp},0),"\n";
     }
 
-    for $tmp (qw/
+    for my $tmp (qw/
 	      BASEEXT PARENT_NAME DLBASE INC DEFINE LINKTYPE
 	      /	) {
 	next unless defined $self->{$tmp};
 	push @m, "$tmp = $self->{$tmp}\n";
     }
 
-    for $tmp (qw/ XS MAN1PODS MAN3PODS PM /) {
+    for my $tmp (qw/ XS MAN1PODS MAN3PODS PM /) {
 	next unless defined $self->{$tmp};
 	my(%tmp,$key);
 	for $key (keys %{$self->{$tmp}}) {
@@ -585,7 +585,7 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision
 	$self->{$tmp} = \%tmp;
     }
 
-    for $tmp (qw/ C O_FILES H /) {
+    for my $tmp (qw/ C O_FILES H /) {
 	next unless defined $self->{$tmp};
 	my(@tmp,$val);
 	for $val (@{$self->{$tmp}}) {
@@ -606,7 +606,7 @@ MAN3PODS = ',$self->wraplist(sort keys %{$self->{MAN3PODS}}),'
 
 ';
 
-    for $tmp (qw/
+    for my $tmp (qw/
 	      INST_MAN1DIR INSTALLMAN1DIR MAN1EXT INST_MAN3DIR INSTALLMAN3DIR MAN3EXT
 	      /) {
 	next unless defined $self->{$tmp};
@@ -705,7 +705,7 @@ sub cflags {
     # conflate the ones from $Config{'ccflags'} and $self->{DEFINE}
     # ($self->{DEFINE} has already been VMSified in constants() above)
     if ($self->{DEFINE}) { $quals .= $self->{DEFINE}; }
-    for $type (qw(Def Undef)) {
+    for my $type (qw(Def Undef)) {
 	my(@terms);
 	while ($quals =~ m:/${type}i?n?e?=([^/]+):ig) {
 		my $term = $1;
@@ -1419,7 +1419,7 @@ sub processPL {
         my $list = ref($self->{PL_FILES}->{$plfile})
 		? $self->{PL_FILES}->{$plfile}
 		: [$self->{PL_FILES}->{$plfile}];
-	foreach $target (@$list) {
+	foreach my $target (@$list) {
 	    my $vmsplfile = vmsify($plfile);
 	    my $vmsfile = vmsify($target);
 	    push @m, "
@@ -2051,6 +2051,8 @@ Consequently, it hasn't really been tested, and may well be incomplete.
 
 =cut
 
+our %olbs;
+
 sub makeaperl {
     my($self, %attribs) = @_;
     my($makefilename, $searchdirs, $static, $extra, $perlinc, $target, $tmp, $libperl) = 
@@ -2093,7 +2095,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE)
     $linkcmd =~ s/\s+/ /g;
 
     # Which *.olb files could we make use of...
-    local(%olbs);
+    local(%olbs);       # XXX can this be lexical?
     $olbs{$self->{INST_ARCHAUTODIR}} = "$self->{BASEEXT}\$(LIB_EXT)";
     require File::Find;
     File::Find::find(sub {
@@ -2190,6 +2192,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE)
     push @optlibs, @$extra;
 
     $target = "Perl$Config{'exe_ext'}" unless $target;
+    my $shrtarget;
     ($shrtarget,$targdir) = fileparse($target);
     $shrtarget =~ s/^([^.]*)/$1Shr/;
     $shrtarget = $targdir . $shrtarget;
diff --git a/lib/ExtUtils/Manifest.pm b/lib/ExtUtils/Manifest.pm
index 4b98185e93..46ace64e61 100644
--- a/lib/ExtUtils/Manifest.pm
+++ b/lib/ExtUtils/Manifest.pm
@@ -430,6 +430,6 @@ L<ExtUtils::MakeMaker> which has handy targets for most of the functionality.
 
 =head1 AUTHOR
 
-Andreas Koenig <F<koenig@franz.ww.TU-Berlin.DE>>
+Andreas Koenig <F<andreas.koenig@anima.de>>
 
 =cut
diff --git a/patchlevel.h b/patchlevel.h
index acee45547b..ca39c4e892 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -70,7 +70,7 @@
 #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT)
 static	char	*local_patches[] = {
         NULL
-	,"DEVEL8015"
+	,"DEVEL8042"
 	,NULL
 };
 
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 50ac40c650..fa18e661b6 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2368,19 +2368,19 @@ false, defined or undefined.  Does not handle 'get' magic.
 =for hackers
 Found in file sv.h
 
-=item svtype
+=item SvTYPE
 
-An enum of flags for Perl types.  These are found in the file B<sv.h> 
-in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV.  See C<svtype>.
+
+	svtype	SvTYPE(SV* sv)
 
 =for hackers
 Found in file sv.h
 
-=item SvTYPE
-
-Returns the type of the SV.  See C<svtype>.
+=item svtype
 
-	svtype	SvTYPE(SV* sv)
+An enum of flags for Perl types.  These are found in the file B<sv.h> 
+in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
 
 =for hackers
 Found in file sv.h
@@ -3234,11 +3234,12 @@ Found in file utf8.c
 
 =item utf8_hop
 
-Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
-or backward.
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
 
 WARNING: do not use the following unless you *know* C<off> is within
-the UTF-8 buffer pointed to by C<s>.
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
 
 	U8*	utf8_hop(U8 *s, I32 off)
 
@@ -3278,11 +3279,13 @@ and the pointer C<s> will be advanced to the end of the character.
 If C<s> does not point to a well-formed UTF8 character, the behaviour
 is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
 it is assumed that the caller will raise a warning, and this function
-will set C<retlen> to C<-1> and return zero.  If the C<flags> does not
-contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD)
-will be returned, and C<retlen> will be set to the expected length of
-the UTF-8 character in bytes.  The C<flags> can also contain various
-flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
+will silently just set C<retlen> to C<-1> and return zero.  If the
+C<flags> does not contain UTF8_CHECK_ONLY, warnings about
+malformations will be given, C<retlen> will be set to the expected
+length of the UTF-8 character in bytes, and zero will be returned.
+
+The C<flags> can also contain various flags to allow deviations from
+the strict UTF-8 encoding (see F<utf8.h>).
 
 	U8* s	utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags)
 
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index f2bf51e18a..06d3b1da2d 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -274,8 +274,8 @@ X<-S>X<-b>X<-c>X<-t>X<-u>X<-g>X<-k>X<-T>X<-B>X<-M>X<-A>X<-C>
     -O	File is owned by real uid.
 
     -e	File exists.
-    -z	File has zero size.
-    -s	File has nonzero size (returns size).
+    -z	File has zero size (is empty).
+    -s	File has nonzero size (returns size in bytes).
 
     -f	File is a plain file.
     -d	File is a directory.
diff --git a/pp.h b/pp.h
index 5e531dada6..ccd372dafe 100644
--- a/pp.h
+++ b/pp.h
@@ -33,6 +33,13 @@ L<perlcall>.
 Declares a local copy of perl's stack pointer for the XSUB, available via
 the C<SP> macro.  See C<SP>.
 
+=for apidoc ms||djSP
+
+Declare Just C<SP>. This is actually identical to C<dSP>, and declares
+a local copy of perl's stack pointer, available via the C<SP> macro.
+See C<SP>.  (Available for backward source code compatibility with the
+old (Perl 5.005) thread model.)
+
 =for apidoc Ams||dMARK
 Declare a stack marker variable, C<mark>, for the XSUB.  See C<MARK> and
 C<dORIGMARK>.
@@ -46,8 +53,7 @@ The original stack mark for the XSUB.  See C<dORIGMARK>.
 =for apidoc Ams||SPAGAIN
 Refetch the stack pointer.  Used after a callback.  See L<perlcall>.
 
-=cut
-*/
+=cut */
 
 #define SP sp
 #define MARK mark
diff --git a/pp_hot.c b/pp_hot.c
index 830d56ed03..4020f200a2 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -142,19 +142,19 @@ PP(pp_concat)
     dPOPTOPssrl;
     STRLEN len;
     U8 *s;
-    bool left_utf;
-    bool right_utf;
+    bool left_utf8;
+    bool right_utf8;
 
     if (TARG == right && SvGMAGICAL(right))
         mg_get(right);
     if (SvGMAGICAL(left))
         mg_get(left);
 
-    left_utf  = DO_UTF8(left);
-    right_utf = DO_UTF8(right);
+    left_utf8  = DO_UTF8(left);
+    right_utf8 = DO_UTF8(right);
  
-    if (left_utf != right_utf) {
-        if (TARG == right && !right_utf) {
+    if (left_utf8 != right_utf8) {
+        if (TARG == right && !right_utf8) {
             sv_utf8_upgrade(TARG); /* Now straight binary copy */
             SvUTF8_on(TARG);
         }
@@ -163,7 +163,7 @@ PP(pp_concat)
             U8 *l, *c, *olds = NULL;
             STRLEN targlen;
 	    s = (U8*)SvPV(right,len);
-	    right_utf |= DO_UTF8(right);
+	    right_utf8 |= DO_UTF8(right);
             if (TARG == right) {
 		/* Take a copy since we're about to overwrite TARG */
 		olds = s = (U8*)savepvn((char*)s, len);
@@ -175,28 +175,28 @@ PP(pp_concat)
 		    sv_setpv(left, "");	/* Suppress warning. */
 	    }
             l = (U8*)SvPV(left, targlen);
-	    left_utf |= DO_UTF8(left);
+	    left_utf8 |= DO_UTF8(left);
             if (TARG != left)
                 sv_setpvn(TARG, (char*)l, targlen);
-            if (!left_utf)
+            if (!left_utf8)
                 sv_utf8_upgrade(TARG);
             /* Extend TARG to length of right (s) */
             targlen = SvCUR(TARG) + len;
-            if (!right_utf) {
+            if (!right_utf8) {
                 /* plus one for each hi-byte char if we have to upgrade */
                 for (c = s; c < s + len; c++)  {
-                    if (*c & 0x80)
+                    if (UTF8_IS_CONTINUED(*c))
                         targlen++;
                 }
             }
             SvGROW(TARG, targlen+1);
             /* And now copy, maybe upgrading right to UTF8 on the fly */
-            for (c = (U8*)SvEND(TARG); len--; s++) {
-                 if (*s & 0x80 && !right_utf)
-                     c = uv_to_utf8(c, *s);
-                 else
-                     *c++ = *s;
-            }
+	    if (right_utf8)
+		Copy(s, SvEND(TARG), len, U8);
+	    else {
+		for (c = (U8*)SvEND(TARG); len--; s++)
+		    c = uv_to_utf8(c, *s);
+	    }
             SvCUR_set(TARG, targlen);
             *SvEND(TARG) = '\0';
             SvUTF8_on(TARG);
@@ -235,7 +235,7 @@ PP(pp_concat)
     }
     else
 	sv_setpvn(TARG, (char *)s, len);	/* suppress warning */
-    if (left_utf)
+    if (left_utf8)
 	SvUTF8_on(TARG);
     SETTARG;
     RETURN;
diff --git a/t/README b/t/README
index 0953026607..7cff553d5b 100644
--- a/t/README
+++ b/t/README
@@ -1,4 +1,4 @@
-This is the perl test library.  To run all the tests, just type 'TEST'.
+This is the perl test library.  To run all the tests, just type './TEST'.
 
 To add new tests, just look at the current tests and do likewise.
 
@@ -14,3 +14,8 @@ will fail, you may want to use Test::Harness thusly:
 This method pinpoints failed tests automatically.
 
 If you come up with new tests, please send them to perlbug@perl.org.
+
+Tests in the base/ directory ought to be runnable with plain miniperl.
+That is, they should not require Config.pm nor should they require any
+extensions to have been built.  TEST will abort if any tests in the
+base/ directory fail.
diff --git a/t/base/term.t b/t/base/term.t
index e96313dec5..49df11fa31 100755
--- a/t/base/term.t
+++ b/t/base/term.t
@@ -4,19 +4,16 @@
 
 BEGIN {
     chdir 't' if -d 't';
-    @INC = '../lib';
 }
 
-use Config;
-
 print "1..7\n";
 
 # check "" interpretation
 
 $x = "\n";
 # 10 is ASCII/Iso Latin, 21 is EBCDIC.
-if ($x eq chr(10) ||
-    ($Config{ebcdic} eq 'define' && $x eq chr(21))) {print "ok 1\n";}
+if ($x eq chr(10)) { print "ok 1\n";}
+elsif ($x eq chr(21)) { print "ok 1 # EBCDIC\n"; }
 else {print "not ok 1\n";}
 
 # check `` processing
diff --git a/t/lib/bigfltpm.t b/t/lib/bigfltpm.t
index a8fb19209a..b335d13016 100755
--- a/t/lib/bigfltpm.t
+++ b/t/lib/bigfltpm.t
@@ -449,10 +449,10 @@ $Math::BigFloat::div_scale = 20
 $Math::BigFloat::div_scale = 40
 &fsqrt
 +0:0
--1:/^(?i:0|\?|NaNQ?|-n\.an)$
--2:/^(?i:0|\?|NaNQ?|-n\.an)$
--16:/^(?i:0|\?|NaNQ?|-n\.an)$
--123.456:/^(?i:0|\?|NaNQ?|-n\.an)$
+-1:/^(?i:0|\?|-?N\.?aNQ?)$
+-2:/^(?i:0|\?|-?N\.?aNQ?)$
+-16:/^(?i:0|\?|-?N\.?aNQ?)$
+-123.456:/^(?i:0|\?|-?N\.?aNQ?)$
 +1:1.
 +1.44:1.2
 +2:1.41421356237309504880168872420969807857
diff --git a/t/op/reverse.t b/t/op/reverse.t
new file mode 100644
index 0000000000..bb7b9b77fe
--- /dev/null
+++ b/t/op/reverse.t
@@ -0,0 +1,33 @@
+#!./perl
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '../lib';
+}
+
+print "1..4\n";
+
+print "not " unless reverse("abc")    eq "cba";
+print "ok 1\n";
+
+$_ = "foobar";
+print "not " unless reverse()         eq "raboof";
+print "ok 2\n";
+
+{
+    my @a = ("foo", "bar");
+    my @b = reverse @a;
+
+    print "not " unless $b[0] eq $a[1] && $b[1] eq $a[0];
+    print "ok 3\n";
+}
+
+{
+    # Unicode.
+
+    my $a = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
+    my $b = scalar reverse($a);
+    my $c = scalar reverse($b);
+    print "not " unless $a eq $c;
+    print "ok 4\n";
+}
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index c631c0a7a9..ac42b85577 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -53,11 +53,11 @@ my @MK = split(/\n/, <<__EOMK__);
 3.1.8 n "�������"	-		7	80:bf:80:bf:80:bf:80	-	unexpected continuation byte 0x80
 3.1.9 n "����������������������������������������������������������������"				-	64	80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf	-	unexpected continuation byte 0x80
 3.2	Lonely start characters
-3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � "	-	64 	c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20	-	unexpected non-continuation byte 0x20 after byte 0xc0
-3.2.2 n "� � � � � � � � � � � � � � � � "	-	32	e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20	-	unexpected non-continuation byte 0x20 after byte 0xe0
-3.2.3 n "� � � � � � � � "	-	16	f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20	-	unexpected non-continuation byte 0x20 after byte 0xf0
-3.2.4 n "� � � � "		-	8	f8:20:f9:20:fa:20:fb:20	-	unexpected non-continuation byte 0x20 after byte 0xf8
-3.2.5 n "� � "			-	4	fc:20:fd:20	-	unexpected non-continuation byte 0x20 after byte 0xfc
+3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � "	-	64 	c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20	-	unexpected non-continuation byte 0x20 after start byte 0xc0
+3.2.2 n "� � � � � � � � � � � � � � � � "	-	32	e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20	-	unexpected non-continuation byte 0x20 after start byte 0xe0
+3.2.3 n "� � � � � � � � "	-	16	f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20	-	unexpected non-continuation byte 0x20 after start byte 0xf0
+3.2.4 n "� � � � "		-	8	f8:20:f9:20:fa:20:fb:20	-	unexpected non-continuation byte 0x20 after start byte 0xf8
+3.2.5 n "� � "			-	4	fc:20:fd:20	-	unexpected non-continuation byte 0x20 after start byte 0xfc
 3.3	Sequences with last continuation byte missing
 3.3.1 n "�"			-	1	c0	-	1 byte, need 2
 3.3.2 n "��"			-	2	e0:80	-	2 bytes, need 3
@@ -70,7 +70,7 @@ my @MK = split(/\n/, <<__EOMK__);
 3.3.9 n "����"			-	4	fb:bf:bf:bf	-	4 bytes, need 5
 3.3.10 n "�����"		-	5	fd:bf:bf:bf:bf	-	5 bytes, need 6
 3.4	Concatenation of incomplete sequences
-3.4.1 n "�����������������������������"	-	30	c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf	-	unexpected continuation byte 0xe0
+3.4.1 n "�����������������������������"	-	30	c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf	-	unexpected non-continuation byte 0xe0 after start byte 0xc0
 3.5	Impossible bytes
 3.5.1 n "�"			-	1	fe	-	byte 0xfe
 3.5.2 n "�"			-	1	ff	-	byte 0xff
@@ -125,7 +125,7 @@ __EOMK__
 
     local $SIG{__WARN__} =
 	sub {
-	    # print "# $id: @_";
+	    print "# $id: @_";
 	    $WARNCNT++;
 	    $WARNMSG = "@_";
 	};
diff --git a/t/pragma/warn/utf8 b/t/pragma/warn/utf8
index adc10c645e..9a7dbafdee 100644
--- a/t/pragma/warn/utf8
+++ b/t/pragma/warn/utf8
@@ -30,6 +30,6 @@ my $a = "sn�storm" ;
     my $a = "sn�storm";
 }
 EXPECT
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 9.
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 14.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14.
 ########
diff --git a/utf8.c b/utf8.c
index 9ef7ce108b..8d812ab3e5 100644
--- a/utf8.c
+++ b/utf8.c
@@ -137,7 +137,7 @@ Perl_is_utf8_char(pTHX_ U8 *s)
     while (slen--) {
 	if ((*s & 0xc0) != 0x80)
 	    return 0;
-	uv = (uv << 6) | (*s & 0x3f);
+	uv = UTF8_ACCUMULATE(uv, *s);
 	if (uv < ouv)
 	    return 0;
 	ouv = uv;
@@ -189,11 +189,13 @@ and the pointer C<s> will be advanced to the end of the character.
 If C<s> does not point to a well-formed UTF8 character, the behaviour
 is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
 it is assumed that the caller will raise a warning, and this function
-will set C<retlen> to C<-1> and return zero.  If the C<flags> does not
-contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD)
-will be returned, and C<retlen> will be set to the expected length of
-the UTF-8 character in bytes.  The C<flags> can also contain various
-flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
+will silently just set C<retlen> to C<-1> and return zero.  If the
+C<flags> does not contain UTF8_CHECK_ONLY, warnings about
+malformations will be given, C<retlen> will be set to the expected
+length of the UTF-8 character in bytes, and zero will be returned.
+
+The C<flags> can also contain various flags to allow deviations from
+the strict UTF-8 encoding (see F<utf8.h>).
 
 =cut */
 
@@ -216,13 +218,13 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	goto malformed;
     }
 
-    if (uv <= 0x7f) { /* Pure ASCII. */
+    if (UTF8_IS_ASCII(uv)) {
 	if (retlen)
 	    *retlen = 1;
 	return *s;
     }
 
-    if ((uv >= 0x80 && uv <= 0xbf) &&
+    if (UTF8_IS_CONTINUATION(uv) &&
 	!(flags & UTF8_ALLOW_CONTINUATION)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -231,11 +233,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	goto malformed;
     }
 
-    if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) &&
+    if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) &&
 	!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
-			"Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after byte 0x%02"UVxf")",
+			"Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")",
 			(UV)s[1], uv);
 	goto malformed;
     }
@@ -276,15 +278,16 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
     ouv = uv;
 
     while (len--) {
-	if ((*s & 0xc0) != 0x80) {
+	if (!UTF8_IS_CONTINUATION(*s) &&
+	    !(flags & UTF8_ALLOW_NON_CONTINUATION)) {
 	    if (dowarn)
 		Perl_warner(aTHX_ WARN_UTF8,
-			    "Malformed UTF-8 character (unexpected continuation byte 0x%02x)",
+			    "Malformed UTF-8 character (unexpected non-continuation byte 0x%02x)",
 			    *s);
 	    goto malformed;
 	}
 	else
-	    uv = (uv << 6) | (*s & 0x3f);
+	    uv = UTF8_ACCUMULATE(uv, *s);
 	if (uv < ouv) {
 	    /* This cannot be allowed. */
 	    if (dowarn)
@@ -297,14 +300,14 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 	ouv = uv;
     }
 
-    if ((uv >= 0xd800 && uv <= 0xdfff) &&
+    if (UNICODE_IS_SURROGATE(uv) &&
 	!(flags & UTF8_ALLOW_SURROGATE)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
 			"Malformed UTF-8 character (UTF-16 surrogate 0x%04"UVxf")",
 			uv);
 	goto malformed;
-    } else if ((uv == 0xfffe) &&
+    } else if (UNICODE_IS_BYTE_ORDER_MARK(uv) &&
 	       !(flags & UTF8_ALLOW_BOM)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -318,7 +321,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 			"Malformed UTF-8 character (%d byte%s, need %d)",
 			expectlen, expectlen == 1 ? "": "s", UNISKIP(uv));
 	goto malformed;
-    } else if ((uv == 0xffff) &&
+    } else if (UNICODE_IS_ILLEGAL(uv) &&
 	       !(flags & UTF8_ALLOW_FFFF)) {
 	if (dowarn)
 	    Perl_warner(aTHX_ WARN_UTF8,
@@ -338,9 +341,9 @@ malformed:
     }
 
     if (retlen)
-	*retlen = expectlen;
+	*retlen = expectlen ? expectlen : len;
 
-    return UNICODE_REPLACEMENT_CHARACTER;
+    return 0;
 }
 
 /*
@@ -378,6 +381,10 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
 {
     STRLEN len = 0;
 
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g.
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
     if (e < s)
 	Perl_croak(aTHX_ "panic: utf8_length: unexpected end");
     while (s < e) {
@@ -408,6 +415,10 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
 {
     IV off = 0;
 
+    /* Note: cannot use UTF8_IS_...() too eagerly here since  e.g.
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
     if (a < b) {
 	while (a < b) {
 	    U8 c = UTF8SKIP(a);
@@ -435,17 +446,22 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
 /*
 =for apidoc Am|U8*|utf8_hop|U8 *s|I32 off
 
-Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
-or backward.
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
 
 WARNING: do not use the following unless you *know* C<off> is within
-the UTF-8 buffer pointed to by C<s>.
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
 
 =cut */
 
 U8 *
 Perl_utf8_hop(pTHX_ U8 *s, I32 off)
 {
+    /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+     * the bitops (especially ~) can create illegal UTF-8.
+     * In other words: in Perl UTF-8 is not just for Unicode. */
+
     if (off >= 0) {
 	while (off--)
 	    s += UTF8SKIP(s);
@@ -453,10 +469,8 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off)
     else {
 	while (off++) {
 	    s--;
-	    if (*s & 0x80) {
-		while ((*s & 0xc0) == 0x80)
-		    s--;
-	    }
+	    while (UTF8_IS_CONTINUATION(*s))
+		s--;
 	}
     }
     return s;
@@ -494,14 +508,9 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
 
     d = s = save;
     while (s < send) {
-        if (*s < 0x80) {
-	    *d++ = *s++;
-	}
-        else {
-            STRLEN ulen;
-            *d++ = (U8)utf8_to_uv_simple(s, &ulen);
-            s += ulen;
-        }
+        STRLEN ulen;
+        *d++ = (U8)utf8_to_uv_simple(s, &ulen);
+        s += ulen;
     }
     *d = '\0';
     *len = d - save;
diff --git a/utf8.h b/utf8.h
index 25ddc14d09..26ef7236ee 100644
--- a/utf8.h
+++ b/utf8.h
@@ -46,10 +46,31 @@ END_EXTERN_C
 #define UTF8_ALLOW_ANY			0x00ff
 #define UTF8_CHECK_ONLY			0x0100
 
+#define UNICODE_SURROGATE_FIRST		0xd800
+#define UNICODE_SURROGATE_LAST		0xdfff
+#define UNICODE_REPLACEMENT		0xfffd
+#define UNICODE_BYTER_ORDER_MARK	0xfffe
+#define UNICODE_ILLEGAL			0xffff
+
+#define UNICODE_IS_SURROGATE(c)		((c) >= UNICODE_SURROGATE_FIRST && \
+					 (c) <= UNICODE_SURROGATE_LAST)
+#define UNICODE_IS_REPLACEMENT(c)	((c) == UNICODE_REPLACMENT)
+#define UNICODE_IS_BYTE_ORDER_MARK(c)	((c) == UNICODE_BYTER_ORDER_MARK)
+#define UNICODE_IS_ILLEGAL(c)		((c) == UNICODE_ILLEGAL)
+
 #define UTF8SKIP(s) PL_utf8skip[*(U8*)s]
 
 #define UTF8_QUAD_MAX	UINT64_C(0x1000000000)
 
+#define UTF8_IS_ASCII(c) 		((c) <  0x80)
+#define UTF8_IS_START(c)		((c) >= 0xc0 && ((c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c)		((c) >= 0x80 && ((c) <= 0xbf))
+#define UTF8_IS_CONTINUED(c) 		((c) &  0x80)
+
+#define UTF8_CONTINUATION_MASK		0x3f
+#define UTF8_ACCUMULATION_SHIFT		6
+#define UTF8_ACCUMULATE(old, new)	((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
+
 #ifdef HAS_QUAD
 #define UNISKIP(uv) ( (uv) < 0x80           ? 1 : \
 		      (uv) < 0x800          ? 2 : \
@@ -68,7 +89,6 @@ END_EXTERN_C
 		      (uv) < 0x80000000     ? 6 : 7 )
 #endif
 
-#define UNICODE_REPLACEMENT_CHARACTER	0xfffd
 
 /*
  * Note: we try to be careful never to call the isXXX_utf8() functions