summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2000-12-09 12:30:43 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2000-12-09 12:30:43 +0000
commit553151e7a76c75a9f0ccbf6e0802e72fde2ec8f2 (patch)
tree8edfed529ab585fc3fffcdea329f73a8f70dc568
parent9f2fcde2fd69814c4c7eeae7dbeadfe25b591bc0 (diff)
parent185419472dd17e55d44dadf1694c95abd0142e43 (diff)
downloadperl-553151e7a76c75a9f0ccbf6e0802e72fde2ec8f2.tar.gz
Integrate mainline.
p4raw-id: //depot/perlio@8048
-rw-r--r--Changes186
-rw-r--r--MANIFEST3
-rw-r--r--doop.c14
-rw-r--r--ext/Encode/Encode/iso8859-16.enc20
-rw-r--r--ext/POSIX/Makefile.PL7
-rw-r--r--ext/POSIX/hints/svr4.pl12
-rw-r--r--hints/svr4.sh16
-rw-r--r--lib/ExtUtils/MM_OS2.pm4
-rw-r--r--lib/ExtUtils/MM_VMS.pm21
-rw-r--r--lib/ExtUtils/Manifest.pm2
-rw-r--r--patchlevel.h2
-rw-r--r--pod/perlapi.pod33
-rw-r--r--pod/perlfunc.pod4
-rw-r--r--pp.h10
-rw-r--r--pp_hot.c36
-rw-r--r--t/README7
-rwxr-xr-xt/base/term.t7
-rwxr-xr-xt/lib/bigfltpm.t8
-rw-r--r--t/op/reverse.t33
-rw-r--r--t/op/utf8decode.t14
-rw-r--r--t/pragma/warn/utf84
-rw-r--r--utf8.c75
-rw-r--r--utf8.h22
23 files changed, 420 insertions, 120 deletions
diff --git a/Changes b/Changes
index 575e09f0ce..3cdab5877b 100644
--- a/Changes
+++ b/Changes
@@ -32,6 +32,190 @@ Version v5.7.1 Development release working toward v5.8
--------------
____________________________________________________________________________
+[ 8042] By: jhi on 2000/12/08 16:33:39
+ Log: Do not return the Unicode replacement character if UTF-8
+ decoding goes awry, it should be up to the caller to decide.
+ Branch: perl
+ ! pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[ 8041] By: jhi on 2000/12/08 16:22:28
+ Log: metaconfig maintenance.
+ Branch: metaconfig
+ ! U/modified/stdchar.U
+ Branch: metaconfig/U/perl
+ + testsyml.U
+____________________________________________________________________________
+[ 8040] By: jhi on 2000/12/08 16:03:08
+ Log: Subject: [ID 20001207.009] Not OK: perl v5.7.0 +DEVEL8030 on os2-64int-ld 2.30
+ From: sthoenna@efn.org
+ Date: Thu, 7 Dec 2000 21:32:43 -0800 (PST)
+ Message-Id: <200012080532.eB85Wh729109@garcia.efn.org>
+ Branch: perl
+ ! Changes lib/ExtUtils/MM_OS2.pm
+____________________________________________________________________________
+[ 8039] By: jhi on 2000/12/08 15:57:11
+ Log: Subject: [PATCH] Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful
+ From: Simon Cozens <simon@cozens.net>
+ Date: Fri, 8 Dec 2000 13:33:31 +0000
+ Message-ID: <20001208133331.A11535@deep-dark-truthful-mirror.perlhacker.org>
+
+ (The pp_hot part needed a rewrite.)
+ Branch: perl
+ ! doop.c pp_hot.c utf8.c
+____________________________________________________________________________
+[ 8038] By: jhi on 2000/12/08 15:25:08
+ Log: Subject: djSP
+ From: Michael Stevens <michael@etla.org>
+ Date: Wed, 6 Dec 2000 23:24:01 +0000
+ Message-ID: <20001206232400.A21381@firedrake.org>
+
+ Plus a note from Nick Ing-Simmons.
+ Branch: perl
+ ! pp.h
+____________________________________________________________________________
+[ 8037] By: jhi on 2000/12/08 15:18:35
+ Log: Filetype is text.
+ Branch: perl
+ ! t/op/utf8decode.t
+____________________________________________________________________________
+[ 8036] By: jhi on 2000/12/08 03:31:27
+ Log: Subject: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support
+ From: Andy Dougherty <doughera@lafayette.edu>
+ Date: Thu, 7 Dec 2000 12:36:45 -0500 (EST)
+ Message-Id: <Pine.SOL.4.10.10012071235400.13162-100000@maxwell.phys.lafayette.edu>
+
+ Subject: Re: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support
+ From: Andy Dougherty <doughera@lafayette.edu>
+ Date: Thu, 7 Dec 2000 13:56:10 -0500 (EST)
+ Message-ID: <Pine.SOL.4.10.10012071354420.6665-100000@maxwell.phys.lafayette.edu>
+ Branch: perl
+ + ext/POSIX/hints/svr4.pl
+ ! MANIFEST ext/POSIX/Makefile.PL hints/svr4.sh t/lib/bigfltpm.t
+____________________________________________________________________________
+[ 8035] By: jhi on 2000/12/08 03:26:01
+ Log: Subject: [ID 20001207.003] [PATCH] t/base/term.t causes abort if Config.pm not built
+ From: Andy Dougherty <doughera@lafayette.edu>
+ Date: Thu, 7 Dec 2000 10:50:47 -0500 (EST)
+ Message-Id: <Pine.SOL.4.10.10012071049400.7566-100000@maxwell.phys.lafayette.edu>
+ Branch: perl
+ ! t/README t/base/term.t
+____________________________________________________________________________
+[ 8034] By: jhi on 2000/12/08 03:21:59
+ Log: Subject: DOC PATCH 5.6.0: -s return value incompletely documented
+ From: mjd@plover.com
+ Date: 7 Dec 2000 21:04:20 -0000
+ Message-ID: <20001207210420.22282.qmail@plover.com>
+
+ Plus -z doc.
+ Branch: perl
+ ! pod/perlfunc.pod
+____________________________________________________________________________
+[ 8033] By: jhi on 2000/12/08 03:19:03
+ Log: Use the UTF8 macros a bit. They can't be used with abandon
+ everywhere because we do generate illegal UTF-8 in some situations.
+ This is of course naughty.
+ Branch: perl
+ ! pod/perlapi.pod utf8.c utf8.h
+____________________________________________________________________________
+[ 8032] By: jhi on 2000/12/08 03:00:09
+ Log: Out of sync?
+ Branch: perl
+ ! t/pragma/warn/utf8
+____________________________________________________________________________
+[ 8031] By: jhi on 2000/12/08 02:22:39
+ Log: Cnt spl.
+ Branch: perl
+ ! MANIFEST
+____________________________________________________________________________
+[ 8030] By: jhi on 2000/12/08 01:23:54
+ Log: Add test for reverse().
+ Branch: perl
+ + t/op/reverse.t
+ ! MANIFEST
+____________________________________________________________________________
+[ 8029] By: jhi on 2000/12/08 01:21:47
+ Log: Integrate perlio.
+ Branch: perl
+ !> ext/IO/IO.xs fakesdio.h global.sym lib/warnings.pm perlapi.c
+ !> perlio.c perlio.h perlsdio.h warnings.pl
+____________________________________________________________________________
+[ 8028] By: jhi on 2000/12/08 01:19:08
+ Log: Introduce macros for UTF8 decoding.
+ Branch: perl
+ ! t/op/utf8decode.t t/pragma/warn/utf8 utf8.c utf8.h
+____________________________________________________________________________
+[ 8027] By: nick on 2000/12/07 22:18:19
+ Log: Integrate mainline
+ Branch: perlio
+ !> embed.pl pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[ 8026] By: nick on 2000/12/07 21:45:08
+ Log: Various oddities p4 diff -se showed up
+ Remove 'our' from warnings.pl
+ Branch: perlio
+ ! global.sym lib/warnings.pm pod/perlapi.pod warnings.pl
+____________________________________________________________________________
+[ 8025] By: nick on 2000/12/07 21:43:32
+ Log: Change PerlIO_(get|set)pos to take SV *
+ Should fix, OS/2, VMS, (sfio??)
+ Branch: perlio
+ ! ext/IO/IO.xs fakesdio.h perlapi.c perlio.c perlio.h perlsdio.h
+____________________________________________________________________________
+[ 8024] By: jhi on 2000/12/07 19:05:32
+ Log: Document utf8_to_uv() better.
+ Branch: perl
+ ! pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[ 8023] By: jhi on 2000/12/07 18:23:47
+ Log: Document utf8_length(), utf8_distance(), and utf8_hop().
+ Branch: perl
+ ! embed.pl pod/perlapi.pod utf8.c
+____________________________________________________________________________
+[ 8022] By: jhi on 2000/12/07 04:13:51
+ Log: Integrate perlio.
+ Branch: perl
+ !> perlio.c t/io/dup.t win32/config.vc win32/config_H.bc
+ !> win32/config_H.gc win32/config_H.vc win32/config_h.PL
+ !> win32/makefile.mk
+____________________________________________________________________________
+[ 8021] By: nick on 2000/12/07 00:28:14
+ Log: Various attempts at MSVC debug - not sure what has
+ changed but works now.
+ Seems atexit() _may_ work for DLLs built with MSVC so don't
+ call cleanup that way.
+ Branch: perlio
+ ! perlio.c win32/makefile.mk
+____________________________________________________________________________
+[ 8020] By: nick on 2000/12/06 19:57:20
+ Log: Integrate mainline
+ Branch: perlio
+ +> lib/ExtUtils/MANIFEST.SKIP t/op/concat.t
+ !> (integrate 75 files)
+____________________________________________________________________________
+[ 8019] By: nick on 2000/12/06 19:28:21
+ Log: Add useperlio to config.vc
+ Turn off binmode in config_H.PL
+ Regen all the config_H.xx
+ Attempt to get debugging build with MSVC.
+ Branch: perlio
+ ! win32/config.vc win32/config_H.bc win32/config_H.gc
+ ! win32/config_H.vc win32/config_h.PL win32/makefile.mk
+____________________________________________________________________________
+[ 8018] By: nick on 2000/12/06 19:21:57
+ Log: Test various dup/external program options on all platforms.
+ Branch: perlio
+ ! t/io/dup.t
+____________________________________________________________________________
+[ 8017] By: nick on 2000/12/06 19:20:47
+ Log: Fix harness to be less picky
+ Branch: perlio
+ ! lib/Test/Harness.pm
+____________________________________________________________________________
+[ 8016] By: jhi on 2000/12/06 16:45:12
+ Log: Update Changes.
+ Branch: perl
+ ! Changes patchlevel.h
+____________________________________________________________________________
[ 8015] By: jhi on 2000/12/06 16:41:03
Log: Test \x{...} with ord().
Branch: perl
@@ -269,7 +453,7 @@ ____________________________________________________________________________
Date: Mon, 04 Dec 2000 22:21:59 -0800
Message-ID: <HmIL6gzkgepS092yn@efn.org>
- Harness shouldn't ignore responses like "ok 3\r\n\n".
+ Harness shouldn't ignore responses like "ok 3\r\r\n".
Branch: perl
! lib/Test/Harness.pm
____________________________________________________________________________
diff --git a/MANIFEST b/MANIFEST
index 28454065a4..2f334275be 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -247,6 +247,7 @@ ext/Encode/Encode/iso8859-10.enc Encoding tables
ext/Encode/Encode/iso8859-13.enc Encoding tables
ext/Encode/Encode/iso8859-14.enc Encoding tables
ext/Encode/Encode/iso8859-15.enc Encoding tables
+ext/Encode/Encode/iso8859-16.enc Encoding tables
ext/Encode/Encode/iso8859-2.enc Encoding tables
ext/Encode/Encode/iso8859-3.enc Encoding tables
ext/Encode/Encode/iso8859-4.enc Encoding tables
@@ -364,6 +365,7 @@ ext/POSIX/hints/netbsd.pl Hint for POSIX for named architecture
ext/POSIX/hints/next_3.pl Hint for POSIX for named architecture
ext/POSIX/hints/openbsd.pl Hint for POSIX for named architecture
ext/POSIX/hints/sunos_4.pl Hint for POSIX for named architecture
+ext/POSIX/hints/svr4.pl Hint for POSIX for named architecture
ext/POSIX/typemap POSIX extension interface types
ext/SDBM_File/Makefile.PL SDBM extension makefile writer
ext/SDBM_File/SDBM_File.pm SDBM extension Perl module
@@ -1557,6 +1559,7 @@ t/op/regexp.t See if regular expressions work
t/op/regexp_noamp.t See if regular expressions work with optimizations
t/op/regmesg.t See if one can get regular expression errors
t/op/repeat.t See if x operator works
+t/op/reverse.t See if reverse operator works
t/op/runlevel.t See if die() works from perl_call_*()
t/op/sleep.t See if sleep works
t/op/sort.t See if sort works
diff --git a/doop.c b/doop.c
index 9dbee678ef..7acad602c4 100644
--- a/doop.c
+++ b/doop.c
@@ -79,10 +79,7 @@ S_do_trans_simple(pTHX_ SV *sv)
c = utf8_to_uv(s, send - s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
- if (ch < 0x80)
- *d++ = ch;
- else
- d = uv_to_utf8(d,ch);
+ d = uv_to_utf8(d,ch);
s += ulen;
}
else { /* No match -> copy */
@@ -192,12 +189,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
matches--;
}
- if (ch >= 0) {
- if (hasutf)
- d = uv_to_utf8(d, ch);
- else
- *d++ = ch;
- }
+ if (ch >= 0)
+ d = uv_to_utf8(d, ch);
+
matches++;
s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
diff --git a/ext/Encode/Encode/iso8859-16.enc b/ext/Encode/Encode/iso8859-16.enc
new file mode 100644
index 0000000000..1936b97b6f
--- /dev/null
+++ b/ext/Encode/Encode/iso8859-16.enc
@@ -0,0 +1,20 @@
+# Encoding file: iso8859-16, single-byte
+S
+003F 0 1
+00
+0000000100020003000400050006000700080009000A000B000C000D000E000F
+0010001100120013001400150016001700180019001A001B001C001D001E001F
+0020002100220023002400250026002700280029002A002B002C002D002E002F
+0030003100320033003400350036003700380039003A003B003C003D003E003F
+0040004100420043004400450046004700480049004A004B004C004D004E004F
+0050005100520053005400550056005700580059005A005B005C005D005E005F
+0060006100620063006400650066006700680069006A006B006C006D006E006F
+0070007100720073007400750076007700780079007A007B007C007D007E007F
+0080008100820083008400850086008700880089008A008B008C008D008E008F
+0090009100920093009400950096009700980099009A009B009C009D009E009F
+00A001040105014120AC00AB016000A7016100A90218201E017900AD017A017B
+00B000B1010C0142017D201D00B600B7017E010D021900BB015201530178017C
+00C000C100C2010200C4010600C600C700C800C900CA00CB00CC00CD00CE00CF
+0110014300D200D300D4015000D6015A017000D900DA00DB00DC0118021A00DF
+00E000E100E2010300E4010700E600E700E800E900EA00EB00EC00ED00EE00EF
+0111014400F200F300F4015100F6015B017100F900FA00FB00FC0119021B00FF
diff --git a/ext/POSIX/Makefile.PL b/ext/POSIX/Makefile.PL
index 55c5c1fbf3..73bb02dddb 100644
--- a/ext/POSIX/Makefile.PL
+++ b/ext/POSIX/Makefile.PL
@@ -2,12 +2,7 @@ use ExtUtils::MakeMaker;
use Config;
my @libs;
if ($^O ne 'MSWin32') {
- if ($Config{archname} =~ /RM\d\d\d-svr4/) {
- @libs = ('LIBS' => ["-lm -lc -lposix -lcposix"]);
- }
- else {
- @libs = ('LIBS' => ["-lm -lposix -lcposix"]);
- }
+ @libs = ('LIBS' => ["-lm -lposix -lcposix"]);
}
WriteMakefile(
NAME => 'POSIX',
diff --git a/ext/POSIX/hints/svr4.pl b/ext/POSIX/hints/svr4.pl
new file mode 100644
index 0000000000..07f2cb0412
--- /dev/null
+++ b/ext/POSIX/hints/svr4.pl
@@ -0,0 +1,12 @@
+# NCR MP-RAS. Thanks to Doug Hendricks for this info.
+# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0'
+# This system needs to explicitly link against -lmw to pull in some
+# symbols such as _mwoflocheckl and possibly others.
+# A. Dougherty Thu Dec 7 11:55:28 EST 2000
+if ($Config{'archname'} =~ /3441-svr4/) {
+ $self->{LIBS} = ['-lm -posix -lcposix -lmw'];
+}
+# Not sure what OS this one is.
+elsif ($Config{archname} =~ /RM\d\d\d-svr4/) {
+ $self->{LIBS} = ['-lm -lc -lposix -lcposix'];
+}
diff --git a/hints/svr4.sh b/hints/svr4.sh
index 8109b39752..69af6fda2f 100644
--- a/hints/svr4.sh
+++ b/hints/svr4.sh
@@ -135,6 +135,22 @@ case "`uname -sm`" in
;;
esac
+# NCR MP-RAS. Thanks to Doug Hendricks for this info.
+# The output of uname -a looks like this
+# foo foo 4.0 3.0 3441 Pentium III(TM)-ISA/PCI
+# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0'
+case "$myuname" in
+*3441*)
+ # With the NCR High Performance C Compiler R3.0c, miniperl fails
+ # t/op/regexp.t test 461 unless we compile with optimizie=-g.
+ # The whole O/S is being phased out, so more detailed probing
+ # is probably not warranted.
+ case "$optimize" in
+ '') optimize='-g' ;;
+ esac
+ ;;
+esac
+
# Configure may fail to find lstat() since it's a static/inline function
# in <sys/stat.h> on Unisys U6000 SVR4, UnixWare 2.x, and possibly other
# SVR4 derivatives. (Though UnixWare has it in /usr/ccs/lib/libc.so.)
diff --git a/lib/ExtUtils/MM_OS2.pm b/lib/ExtUtils/MM_OS2.pm
index 501832bfec..c0c52402f6 100644
--- a/lib/ExtUtils/MM_OS2.pm
+++ b/lib/ExtUtils/MM_OS2.pm
@@ -38,7 +38,7 @@ $self->{BASEEXT}.def: Makefile.PL
', "DL_VARS" => ', neatvalue($vars), ');\'
');
}
- if (%{$self->{IMPORTS}}) {
+ if ($self->{IMPORTS} && %{$self->{IMPORTS}}) {
# Make import files (needed for static build)
-d 'tmp_imp' or mkdir 'tmp_imp', 0777 or die "Can't mkdir tmp_imp";
open IMP, '>tmpimp.imp' or die "Can't open tmpimp.imp";
@@ -61,7 +61,7 @@ $self->{BASEEXT}.def: Makefile.PL
sub static_lib {
my($self) = @_;
my $old = $self->ExtUtils::MM_Unix::static_lib();
- return $old unless %{$self->{IMPORTS}};
+ return $old unless $self->{IMPORTS} && %{$self->{IMPORTS}};
my @chunks = split /\n{2,}/, $old;
shift @chunks unless length $chunks[0]; # Empty lines at the start
diff --git a/lib/ExtUtils/MM_VMS.pm b/lib/ExtUtils/MM_VMS.pm
index 3485786bf8..cc63d2f67a 100644
--- a/lib/ExtUtils/MM_VMS.pm
+++ b/lib/ExtUtils/MM_VMS.pm
@@ -126,7 +126,7 @@ sub ExtUtils::MM_VMS::makeaperl;
sub ExtUtils::MM_VMS::ext;
sub ExtUtils::MM_VMS::nicetext;
-#use SelfLoader;
+our $AUTOLOAD;
sub AUTOLOAD {
my $code;
if (defined fileno(DATA)) {
@@ -562,21 +562,21 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision
# DLBASE = Basename part of dynamic library. May be just equal BASEEXT.
];
- for $tmp (qw/
+ for my $tmp (qw/
FULLEXT VERSION_FROM OBJECT LDFROM
/ ) {
next unless defined $self->{$tmp};
push @m, "$tmp = ",$self->fixpath($self->{$tmp},0),"\n";
}
- for $tmp (qw/
+ for my $tmp (qw/
BASEEXT PARENT_NAME DLBASE INC DEFINE LINKTYPE
/ ) {
next unless defined $self->{$tmp};
push @m, "$tmp = $self->{$tmp}\n";
}
- for $tmp (qw/ XS MAN1PODS MAN3PODS PM /) {
+ for my $tmp (qw/ XS MAN1PODS MAN3PODS PM /) {
next unless defined $self->{$tmp};
my(%tmp,$key);
for $key (keys %{$self->{$tmp}}) {
@@ -585,7 +585,7 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision
$self->{$tmp} = \%tmp;
}
- for $tmp (qw/ C O_FILES H /) {
+ for my $tmp (qw/ C O_FILES H /) {
next unless defined $self->{$tmp};
my(@tmp,$val);
for $val (@{$self->{$tmp}}) {
@@ -606,7 +606,7 @@ MAN3PODS = ',$self->wraplist(sort keys %{$self->{MAN3PODS}}),'
';
- for $tmp (qw/
+ for my $tmp (qw/
INST_MAN1DIR INSTALLMAN1DIR MAN1EXT INST_MAN3DIR INSTALLMAN3DIR MAN3EXT
/) {
next unless defined $self->{$tmp};
@@ -705,7 +705,7 @@ sub cflags {
# conflate the ones from $Config{'ccflags'} and $self->{DEFINE}
# ($self->{DEFINE} has already been VMSified in constants() above)
if ($self->{DEFINE}) { $quals .= $self->{DEFINE}; }
- for $type (qw(Def Undef)) {
+ for my $type (qw(Def Undef)) {
my(@terms);
while ($quals =~ m:/${type}i?n?e?=([^/]+):ig) {
my $term = $1;
@@ -1419,7 +1419,7 @@ sub processPL {
my $list = ref($self->{PL_FILES}->{$plfile})
? $self->{PL_FILES}->{$plfile}
: [$self->{PL_FILES}->{$plfile}];
- foreach $target (@$list) {
+ foreach my $target (@$list) {
my $vmsplfile = vmsify($plfile);
my $vmsfile = vmsify($target);
push @m, "
@@ -2051,6 +2051,8 @@ Consequently, it hasn't really been tested, and may well be incomplete.
=cut
+our %olbs;
+
sub makeaperl {
my($self, %attribs) = @_;
my($makefilename, $searchdirs, $static, $extra, $perlinc, $target, $tmp, $libperl) =
@@ -2093,7 +2095,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE)
$linkcmd =~ s/\s+/ /g;
# Which *.olb files could we make use of...
- local(%olbs);
+ local(%olbs); # XXX can this be lexical?
$olbs{$self->{INST_ARCHAUTODIR}} = "$self->{BASEEXT}\$(LIB_EXT)";
require File::Find;
File::Find::find(sub {
@@ -2190,6 +2192,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE)
push @optlibs, @$extra;
$target = "Perl$Config{'exe_ext'}" unless $target;
+ my $shrtarget;
($shrtarget,$targdir) = fileparse($target);
$shrtarget =~ s/^([^.]*)/$1Shr/;
$shrtarget = $targdir . $shrtarget;
diff --git a/lib/ExtUtils/Manifest.pm b/lib/ExtUtils/Manifest.pm
index 4b98185e93..46ace64e61 100644
--- a/lib/ExtUtils/Manifest.pm
+++ b/lib/ExtUtils/Manifest.pm
@@ -430,6 +430,6 @@ L<ExtUtils::MakeMaker> which has handy targets for most of the functionality.
=head1 AUTHOR
-Andreas Koenig <F<koenig@franz.ww.TU-Berlin.DE>>
+Andreas Koenig <F<andreas.koenig@anima.de>>
=cut
diff --git a/patchlevel.h b/patchlevel.h
index acee45547b..ca39c4e892 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -70,7 +70,7 @@
#if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT)
static char *local_patches[] = {
NULL
- ,"DEVEL8015"
+ ,"DEVEL8042"
,NULL
};
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 50ac40c650..fa18e661b6 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2368,19 +2368,19 @@ false, defined or undefined. Does not handle 'get' magic.
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
@@ -3234,11 +3234,12 @@ Found in file utf8.c
=item utf8_hop
-Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
-or backward.
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
WARNING: do not use the following unless you *know* C<off> is within
-the UTF-8 buffer pointed to by C<s>.
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
U8* utf8_hop(U8 *s, I32 off)
@@ -3278,11 +3279,13 @@ and the pointer C<s> will be advanced to the end of the character.
If C<s> does not point to a well-formed UTF8 character, the behaviour
is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
it is assumed that the caller will raise a warning, and this function
-will set C<retlen> to C<-1> and return zero. If the C<flags> does not
-contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD)
-will be returned, and C<retlen> will be set to the expected length of
-the UTF-8 character in bytes. The C<flags> can also contain various
-flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
+will silently just set C<retlen> to C<-1> and return zero. If the
+C<flags> does not contain UTF8_CHECK_ONLY, warnings about
+malformations will be given, C<retlen> will be set to the expected
+length of the UTF-8 character in bytes, and zero will be returned.
+
+The C<flags> can also contain various flags to allow deviations from
+the strict UTF-8 encoding (see F<utf8.h>).
U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags)
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index f2bf51e18a..06d3b1da2d 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -274,8 +274,8 @@ X<-S>X<-b>X<-c>X<-t>X<-u>X<-g>X<-k>X<-T>X<-B>X<-M>X<-A>X<-C>
-O File is owned by real uid.
-e File exists.
- -z File has zero size.
- -s File has nonzero size (returns size).
+ -z File has zero size (is empty).
+ -s File has nonzero size (returns size in bytes).
-f File is a plain file.
-d File is a directory.
diff --git a/pp.h b/pp.h
index 5e531dada6..ccd372dafe 100644
--- a/pp.h
+++ b/pp.h
@@ -33,6 +33,13 @@ L<perlcall>.
Declares a local copy of perl's stack pointer for the XSUB, available via
the C<SP> macro. See C<SP>.
+=for apidoc ms||djSP
+
+Declare Just C<SP>. This is actually identical to C<dSP>, and declares
+a local copy of perl's stack pointer, available via the C<SP> macro.
+See C<SP>. (Available for backward source code compatibility with the
+old (Perl 5.005) thread model.)
+
=for apidoc Ams||dMARK
Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and
C<dORIGMARK>.
@@ -46,8 +53,7 @@ The original stack mark for the XSUB. See C<dORIGMARK>.
=for apidoc Ams||SPAGAIN
Refetch the stack pointer. Used after a callback. See L<perlcall>.
-=cut
-*/
+=cut */
#define SP sp
#define MARK mark
diff --git a/pp_hot.c b/pp_hot.c
index 830d56ed03..4020f200a2 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -142,19 +142,19 @@ PP(pp_concat)
dPOPTOPssrl;
STRLEN len;
U8 *s;
- bool left_utf;
- bool right_utf;
+ bool left_utf8;
+ bool right_utf8;
if (TARG == right && SvGMAGICAL(right))
mg_get(right);
if (SvGMAGICAL(left))
mg_get(left);
- left_utf = DO_UTF8(left);
- right_utf = DO_UTF8(right);
+ left_utf8 = DO_UTF8(left);
+ right_utf8 = DO_UTF8(right);
- if (left_utf != right_utf) {
- if (TARG == right && !right_utf) {
+ if (left_utf8 != right_utf8) {
+ if (TARG == right && !right_utf8) {
sv_utf8_upgrade(TARG); /* Now straight binary copy */
SvUTF8_on(TARG);
}
@@ -163,7 +163,7 @@ PP(pp_concat)
U8 *l, *c, *olds = NULL;
STRLEN targlen;
s = (U8*)SvPV(right,len);
- right_utf |= DO_UTF8(right);
+ right_utf8 |= DO_UTF8(right);
if (TARG == right) {
/* Take a copy since we're about to overwrite TARG */
olds = s = (U8*)savepvn((char*)s, len);
@@ -175,28 +175,28 @@ PP(pp_concat)
sv_setpv(left, ""); /* Suppress warning. */
}
l = (U8*)SvPV(left, targlen);
- left_utf |= DO_UTF8(left);
+ left_utf8 |= DO_UTF8(left);
if (TARG != left)
sv_setpvn(TARG, (char*)l, targlen);
- if (!left_utf)
+ if (!left_utf8)
sv_utf8_upgrade(TARG);
/* Extend TARG to length of right (s) */
targlen = SvCUR(TARG) + len;
- if (!right_utf) {
+ if (!right_utf8) {
/* plus one for each hi-byte char if we have to upgrade */
for (c = s; c < s + len; c++) {
- if (*c & 0x80)
+ if (UTF8_IS_CONTINUED(*c))
targlen++;
}
}
SvGROW(TARG, targlen+1);
/* And now copy, maybe upgrading right to UTF8 on the fly */
- for (c = (U8*)SvEND(TARG); len--; s++) {
- if (*s & 0x80 && !right_utf)
- c = uv_to_utf8(c, *s);
- else
- *c++ = *s;
- }
+ if (right_utf8)
+ Copy(s, SvEND(TARG), len, U8);
+ else {
+ for (c = (U8*)SvEND(TARG); len--; s++)
+ c = uv_to_utf8(c, *s);
+ }
SvCUR_set(TARG, targlen);
*SvEND(TARG) = '\0';
SvUTF8_on(TARG);
@@ -235,7 +235,7 @@ PP(pp_concat)
}
else
sv_setpvn(TARG, (char *)s, len); /* suppress warning */
- if (left_utf)
+ if (left_utf8)
SvUTF8_on(TARG);
SETTARG;
RETURN;
diff --git a/t/README b/t/README
index 0953026607..7cff553d5b 100644
--- a/t/README
+++ b/t/README
@@ -1,4 +1,4 @@
-This is the perl test library. To run all the tests, just type 'TEST'.
+This is the perl test library. To run all the tests, just type './TEST'.
To add new tests, just look at the current tests and do likewise.
@@ -14,3 +14,8 @@ will fail, you may want to use Test::Harness thusly:
This method pinpoints failed tests automatically.
If you come up with new tests, please send them to perlbug@perl.org.
+
+Tests in the base/ directory ought to be runnable with plain miniperl.
+That is, they should not require Config.pm nor should they require any
+extensions to have been built. TEST will abort if any tests in the
+base/ directory fail.
diff --git a/t/base/term.t b/t/base/term.t
index e96313dec5..49df11fa31 100755
--- a/t/base/term.t
+++ b/t/base/term.t
@@ -4,19 +4,16 @@
BEGIN {
chdir 't' if -d 't';
- @INC = '../lib';
}
-use Config;
-
print "1..7\n";
# check "" interpretation
$x = "\n";
# 10 is ASCII/Iso Latin, 21 is EBCDIC.
-if ($x eq chr(10) ||
- ($Config{ebcdic} eq 'define' && $x eq chr(21))) {print "ok 1\n";}
+if ($x eq chr(10)) { print "ok 1\n";}
+elsif ($x eq chr(21)) { print "ok 1 # EBCDIC\n"; }
else {print "not ok 1\n";}
# check `` processing
diff --git a/t/lib/bigfltpm.t b/t/lib/bigfltpm.t
index a8fb19209a..b335d13016 100755
--- a/t/lib/bigfltpm.t
+++ b/t/lib/bigfltpm.t
@@ -449,10 +449,10 @@ $Math::BigFloat::div_scale = 20
$Math::BigFloat::div_scale = 40
&fsqrt
+0:0
--1:/^(?i:0|\?|NaNQ?|-n\.an)$
--2:/^(?i:0|\?|NaNQ?|-n\.an)$
--16:/^(?i:0|\?|NaNQ?|-n\.an)$
--123.456:/^(?i:0|\?|NaNQ?|-n\.an)$
+-1:/^(?i:0|\?|-?N\.?aNQ?)$
+-2:/^(?i:0|\?|-?N\.?aNQ?)$
+-16:/^(?i:0|\?|-?N\.?aNQ?)$
+-123.456:/^(?i:0|\?|-?N\.?aNQ?)$
+1:1.
+1.44:1.2
+2:1.41421356237309504880168872420969807857
diff --git a/t/op/reverse.t b/t/op/reverse.t
new file mode 100644
index 0000000000..bb7b9b77fe
--- /dev/null
+++ b/t/op/reverse.t
@@ -0,0 +1,33 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print "1..4\n";
+
+print "not " unless reverse("abc") eq "cba";
+print "ok 1\n";
+
+$_ = "foobar";
+print "not " unless reverse() eq "raboof";
+print "ok 2\n";
+
+{
+ my @a = ("foo", "bar");
+ my @b = reverse @a;
+
+ print "not " unless $b[0] eq $a[1] && $b[1] eq $a[0];
+ print "ok 3\n";
+}
+
+{
+ # Unicode.
+
+ my $a = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
+ my $b = scalar reverse($a);
+ my $c = scalar reverse($b);
+ print "not " unless $a eq $c;
+ print "ok 4\n";
+}
diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index c631c0a7a9..ac42b85577 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -53,11 +53,11 @@ my @MK = split(/\n/, <<__EOMK__);
3.1.8 n "" - 7 80:bf:80:bf:80:bf:80 - unexpected continuation byte 0x80
3.1.9 n "" - 64 80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf - unexpected continuation byte 0x80
3.2 Lonely start characters
-3.2.1 n " " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after byte 0xc0
-3.2.2 n " " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after byte 0xe0
-3.2.3 n " " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after byte 0xf0
-3.2.4 n " " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after byte 0xf8
-3.2.5 n " " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after byte 0xfc
+3.2.1 n " " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after start byte 0xc0
+3.2.2 n " " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after start byte 0xe0
+3.2.3 n " " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after start byte 0xf0
+3.2.4 n " " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after start byte 0xf8
+3.2.5 n " " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after start byte 0xfc
3.3 Sequences with last continuation byte missing
3.3.1 n "" - 1 c0 - 1 byte, need 2
3.3.2 n "" - 2 e0:80 - 2 bytes, need 3
@@ -70,7 +70,7 @@ my @MK = split(/\n/, <<__EOMK__);
3.3.9 n "" - 4 fb:bf:bf:bf - 4 bytes, need 5
3.3.10 n "" - 5 fd:bf:bf:bf:bf - 5 bytes, need 6
3.4 Concatenation of incomplete sequences
-3.4.1 n "" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected continuation byte 0xe0
+3.4.1 n "" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0 after start byte 0xc0
3.5 Impossible bytes
3.5.1 n "" - 1 fe - byte 0xfe
3.5.2 n "" - 1 ff - byte 0xff
@@ -125,7 +125,7 @@ __EOMK__
local $SIG{__WARN__} =
sub {
- # print "# $id: @_";
+ print "# $id: @_";
$WARNCNT++;
$WARNMSG = "@_";
};
diff --git a/t/pragma/warn/utf8 b/t/pragma/warn/utf8
index adc10c645e..9a7dbafdee 100644
--- a/t/pragma/warn/utf8
+++ b/t/pragma/warn/utf8
@@ -30,6 +30,6 @@ my $a = "snstorm" ;
my $a = "snstorm";
}
EXPECT
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 9.
-Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 14.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9.
+Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14.
########
diff --git a/utf8.c b/utf8.c
index 9ef7ce108b..8d812ab3e5 100644
--- a/utf8.c
+++ b/utf8.c
@@ -137,7 +137,7 @@ Perl_is_utf8_char(pTHX_ U8 *s)
while (slen--) {
if ((*s & 0xc0) != 0x80)
return 0;
- uv = (uv << 6) | (*s & 0x3f);
+ uv = UTF8_ACCUMULATE(uv, *s);
if (uv < ouv)
return 0;
ouv = uv;
@@ -189,11 +189,13 @@ and the pointer C<s> will be advanced to the end of the character.
If C<s> does not point to a well-formed UTF8 character, the behaviour
is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
it is assumed that the caller will raise a warning, and this function
-will set C<retlen> to C<-1> and return zero. If the C<flags> does not
-contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD)
-will be returned, and C<retlen> will be set to the expected length of
-the UTF-8 character in bytes. The C<flags> can also contain various
-flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>).
+will silently just set C<retlen> to C<-1> and return zero. If the
+C<flags> does not contain UTF8_CHECK_ONLY, warnings about
+malformations will be given, C<retlen> will be set to the expected
+length of the UTF-8 character in bytes, and zero will be returned.
+
+The C<flags> can also contain various flags to allow deviations from
+the strict UTF-8 encoding (see F<utf8.h>).
=cut */
@@ -216,13 +218,13 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
goto malformed;
}
- if (uv <= 0x7f) { /* Pure ASCII. */
+ if (UTF8_IS_ASCII(uv)) {
if (retlen)
*retlen = 1;
return *s;
}
- if ((uv >= 0x80 && uv <= 0xbf) &&
+ if (UTF8_IS_CONTINUATION(uv) &&
!(flags & UTF8_ALLOW_CONTINUATION)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
@@ -231,11 +233,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
goto malformed;
}
- if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) &&
+ if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) &&
!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
- "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after byte 0x%02"UVxf")",
+ "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")",
(UV)s[1], uv);
goto malformed;
}
@@ -276,15 +278,16 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
ouv = uv;
while (len--) {
- if ((*s & 0xc0) != 0x80) {
+ if (!UTF8_IS_CONTINUATION(*s) &&
+ !(flags & UTF8_ALLOW_NON_CONTINUATION)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
- "Malformed UTF-8 character (unexpected continuation byte 0x%02x)",
+ "Malformed UTF-8 character (unexpected non-continuation byte 0x%02x)",
*s);
goto malformed;
}
else
- uv = (uv << 6) | (*s & 0x3f);
+ uv = UTF8_ACCUMULATE(uv, *s);
if (uv < ouv) {
/* This cannot be allowed. */
if (dowarn)
@@ -297,14 +300,14 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
ouv = uv;
}
- if ((uv >= 0xd800 && uv <= 0xdfff) &&
+ if (UNICODE_IS_SURROGATE(uv) &&
!(flags & UTF8_ALLOW_SURROGATE)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
"Malformed UTF-8 character (UTF-16 surrogate 0x%04"UVxf")",
uv);
goto malformed;
- } else if ((uv == 0xfffe) &&
+ } else if (UNICODE_IS_BYTE_ORDER_MARK(uv) &&
!(flags & UTF8_ALLOW_BOM)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
@@ -318,7 +321,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
"Malformed UTF-8 character (%d byte%s, need %d)",
expectlen, expectlen == 1 ? "": "s", UNISKIP(uv));
goto malformed;
- } else if ((uv == 0xffff) &&
+ } else if (UNICODE_IS_ILLEGAL(uv) &&
!(flags & UTF8_ALLOW_FFFF)) {
if (dowarn)
Perl_warner(aTHX_ WARN_UTF8,
@@ -338,9 +341,9 @@ malformed:
}
if (retlen)
- *retlen = expectlen;
+ *retlen = expectlen ? expectlen : len;
- return UNICODE_REPLACEMENT_CHARACTER;
+ return 0;
}
/*
@@ -378,6 +381,10 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
{
STRLEN len = 0;
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g.
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
if (e < s)
Perl_croak(aTHX_ "panic: utf8_length: unexpected end");
while (s < e) {
@@ -408,6 +415,10 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
{
IV off = 0;
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g.
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
if (a < b) {
while (a < b) {
U8 c = UTF8SKIP(a);
@@ -435,17 +446,22 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b)
/*
=for apidoc Am|U8*|utf8_hop|U8 *s|I32 off
-Move the C<s> pointing to UTF-8 data by C<off> characters, either forward
-or backward.
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
WARNING: do not use the following unless you *know* C<off> is within
-the UTF-8 buffer pointed to by C<s>.
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
=cut */
U8 *
Perl_utf8_hop(pTHX_ U8 *s, I32 off)
{
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
if (off >= 0) {
while (off--)
s += UTF8SKIP(s);
@@ -453,10 +469,8 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off)
else {
while (off++) {
s--;
- if (*s & 0x80) {
- while ((*s & 0xc0) == 0x80)
- s--;
- }
+ while (UTF8_IS_CONTINUATION(*s))
+ s--;
}
}
return s;
@@ -494,14 +508,9 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
d = s = save;
while (s < send) {
- if (*s < 0x80) {
- *d++ = *s++;
- }
- else {
- STRLEN ulen;
- *d++ = (U8)utf8_to_uv_simple(s, &ulen);
- s += ulen;
- }
+ STRLEN ulen;
+ *d++ = (U8)utf8_to_uv_simple(s, &ulen);
+ s += ulen;
}
*d = '\0';
*len = d - save;
diff --git a/utf8.h b/utf8.h
index 25ddc14d09..26ef7236ee 100644
--- a/utf8.h
+++ b/utf8.h
@@ -46,10 +46,31 @@ END_EXTERN_C
#define UTF8_ALLOW_ANY 0x00ff
#define UTF8_CHECK_ONLY 0x0100
+#define UNICODE_SURROGATE_FIRST 0xd800
+#define UNICODE_SURROGATE_LAST 0xdfff
+#define UNICODE_REPLACEMENT 0xfffd
+#define UNICODE_BYTER_ORDER_MARK 0xfffe
+#define UNICODE_ILLEGAL 0xffff
+
+#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \
+ (c) <= UNICODE_SURROGATE_LAST)
+#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACMENT)
+#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTER_ORDER_MARK)
+#define UNICODE_IS_ILLEGAL(c) ((c) == UNICODE_ILLEGAL)
+
#define UTF8SKIP(s) PL_utf8skip[*(U8*)s]
#define UTF8_QUAD_MAX UINT64_C(0x1000000000)
+#define UTF8_IS_ASCII(c) ((c) < 0x80)
+#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf))
+#define UTF8_IS_CONTINUED(c) ((c) & 0x80)
+
+#define UTF8_CONTINUATION_MASK 0x3f
+#define UTF8_ACCUMULATION_SHIFT 6
+#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
+
#ifdef HAS_QUAD
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \
(uv) < 0x800 ? 2 : \
@@ -68,7 +89,6 @@ END_EXTERN_C
(uv) < 0x80000000 ? 6 : 7 )
#endif
-#define UNICODE_REPLACEMENT_CHARACTER 0xfffd
/*
* Note: we try to be careful never to call the isXXX_utf8() functions